In [25]:
import os
import re
import pandas as pd
from pprint import pprint
from tqdm import tqdm

def merge_csv_files(root_dir):
    file_dict = {}

    # Regex to capture the base name and part number
    pattern = re.compile(r'(.+)_part(\d+)\.csv$')

    # Walk through all directories and files in the root directory
    for dirpath, _, filenames in os.walk(root_dir):
        for file in filenames:
            match = pattern.search(file)
            if match:
                base_name = match.group(1)
                part_num = int(match.group(2))
                # Append the file path to the list in the dictionary
                if (dirpath, base_name) not in file_dict:
                    file_dict[(dirpath, base_name)] = []
                file_dict[(dirpath, base_name)].append([part_num, os.path.join(dirpath, file)])

    pprint(['\\'.join(_) for _ in file_dict.keys()])

    # Process each base name separately
    for file_info, files in tqdm(file_dict.items(), desc='merging'):
        dirpath, base_name = file_info
        files.sort()
        # Read and concatenate all DataFrames
        df_list = [pd.read_csv(file_path) for _, file_path in files]
        merged_df = pd.concat(df_list, ignore_index=True)
        # Save the merged DataFrame
        output_filename = os.path.join(dirpath, f"{base_name}.csv")
        merged_df.to_csv(output_filename, index=False)
        # print(f"Merged file saved as: {output_filename}")

# Usage
root_directory = './dataset/processed/PRISM2D_MouseEmbryo'  # Change to your directory path
merge_csv_files(root_directory)

root_directory = './dataset/processed/PRISM2D_HCC'  # Change to your directory path
merge_csv_files(root_directory)

['./dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\1-1\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\1-16\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\1-21\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\1-6\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\2-11\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\2-16\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E13.5\\2-6\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E14.5\\1-17\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E14.5\\1-18\\visualization\\rna_labeled',
 './dataset/processed/PRISM2D_MouseEmbryo\\E14.5\\1-22\\visualization\\rna_labeled']


merging: 100%|██████████| 10/10 [00:50<00:00,  5.02s/it]


['./dataset/processed/PRISM2D_HCC\\readout\\intensity']


merging: 100%|██████████| 1/1 [00:08<00:00,  8.12s/it]
