# Train

## Train Parsing List

In [None]:
! cd ../../../../../
! cd data/library/LV-MHP-v2/train/parsing_annos/
! find ./ -type f > parsing.txt
! mv parsing.txt ../../../../../src/utility/dataset/LV-MHP-v2/list

In [None]:
def process_parsing_file(input_file, output_file):
    """
    Sort the filenames in the parsing file numerically by Ra_Rb_Rc.png and remove the leading `./`.

    Args:
        input_file (str): Path to the input parsing file.
        output_file (str): Path to the output file where sorted filenames will be saved.
    """
    import re

    try:
        # Read lines from the input file
        with open(input_file, 'r') as f:
            lines = f.readlines()

        # Remove leading `./` and strip whitespace
        cleaned_lines = [line.strip().lstrip('./') for line in lines]

        # Define a sorting key to extract numbers from filenames
        def sorting_key(filename):
            match = re.match(r"(\d+)_", filename)
            return int(match.group(1)) if match else float('inf')

        # Sort lines based on the extracted numerical value of Ra
        sorted_lines = sorted(cleaned_lines, key=sorting_key)

        # Write sorted lines to the output file
        with open(output_file, 'w') as f:
            for line in sorted_lines:
                f.write(line + '\n')

        print(f"Successfully processed and saved to {output_file}")

    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
input_file = 'parsing.txt'
output_file = 'parse.txt'
process_parsing_file(input_file, output_file)

In [None]:
! rm parsing.txt

## Train CSV generation

In [1]:
import re
import csv

def generate_csv_from_parse(input_file, csv_file):
    """
    Create a CSV file with UID, NumOfHuman, Path4IMG, Path4PARSE, and Path4POSE
    from a sorted parsing file.

    Args:
        input_file (str): Path to the input parsing file.
        csv_file (str): Path to the CSV file to be created.
    """
    try:
        # Read lines from the input file
        with open(input_file, 'r') as f:
            lines = f.readlines()

        # Remove leading `./` and strip whitespace
        cleaned_lines = [line.strip().lstrip('./') for line in lines]

        # Define a sorting key to extract numbers from filenames
        def sorting_key(filename):
            match = re.match(r"(\d+)_", filename)
            return int(match.group(1)) if match else float('inf')

        # Sort lines based on the extracted numerical value of Ra
        sorted_lines = sorted(cleaned_lines, key=sorting_key)

        # Create the CSV data structure
        csv_data = {}

        for line in sorted_lines:
            match = re.match(r"(\d+)_(\d+)_(\d+).png", line)
            if match:
                Ra, Rb, _ = match.groups()
                UID = int(Ra)
                Path4PARSE = f"{Ra}_{Rb}_*.png"
                Path4IMG = f"{Ra}.jpg"
                Path4POSE = f"{Ra}.mat"

                if UID not in csv_data:
                    csv_data[UID] = {
                        "UID": UID,
                        "NumOfHuman": Rb,
                        "Path4IMG": Path4IMG,
                        "Path4PARSE": Path4PARSE,
                        "Path4POSE": Path4POSE
                    }

        # Write the CSV file
        with open(csv_file, 'w', newline='') as csvfile:
            fieldnames = ["UID", "NumOfHuman", "Path4IMG", "Path4PARSE", "Path4POSE"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for row in csv_data.values():
                writer.writerow(row)

        print(f"Successfully processed and saved to {csv_file}")

    except Exception as e:
        print(f"An error occurred: {e}")

In [2]:
input_file = 'parse.txt'
csv_file = 'train.csv'
generate_csv_from_parse(input_file, csv_file)

Successfully processed and saved to train.csv
