In [None]:
import os
import re
import pandas as pd

def merge_csv_files(root_dir):
    file_dict = {}

    # Regex to capture the base name and part number
    pattern = re.compile(r'(.+)_part(\d+)\.csv$')

    # Walk through all directories and files in the root directory
    for dirpath, _, filenames in os.walk(root_dir):
        for file in filenames:
            match = pattern.search(file)
            if match:
                base_name = match.group(1)
                part_num = int(match.group(2))
                # Append the file path to the list in the dictionary
                if base_name not in file_dict:
                    file_dict[base_name] = []
                file_dict[base_name].append((part_num, os.path.join(dirpath, file)))

    # Process each base name separately
    for base_name, files in file_dict.items():
        # Sort files by the part number
        files.sort()
        # Read and concatenate all DataFrames
        df_list = [pd.read_csv(file_path) for _, file_path in files]
        merged_df = pd.concat(df_list, ignore_index=True)
        # Save the merged DataFrame
        output_filename = os.path.join(root_dir, f"{base_name}.csv")
        merged_df.to_csv(output_filename, index=False)
        print(f"Merged file saved as: {output_filename}")

# Usage
root_directory = './dataset/processed/PRISM2D_MouseEmbryo'  # Change to your directory path
merge_csv_files(root_directory)

root_directory = './dataset/processed/PRISM2D_HCC'  # Change to your directory path
merge_csv_files(root_directory)