In [None]:
import os
import pandas as pd
import shutil

# Define the mappings and directories based on your setup
source_mapping = {
    'JMI': '01',
    'MAIDS': '02',
    'BHU': '03',
    'AIIMS': '04',
    'RIMS': '05',
    'TCIA' : '06'
}

class_mapping = {
    'normal': '0',
    'osmf': '1',
    'wd': '2',
    'md': '3',
    'pd': '4',
    'osmf-oscc': '5'
}

metadata_path = '/media/microcrispr8/DATA 2/researchxwsi/Dataframes/input-metadata-osmf-oscc.csv'
input_directory = '/media/microcrispr8/DATA 2/researchxwsi/old-name-wsi/OSMF-OSCC-AIIMS-WSI-Oct-24'
output_base_directory = '/media/microcrispr8/DATA 2/researchxwsi/rename-data/OSMF-OSCC'
rename_log_path = '/media/microcrispr8/DATA 2/researchxwsi/Dataframes/rename_log_osmf-oscc.csv'

# Load metadata
metadata_df = pd.read_csv(metadata_path)

# Reshape the DataFrame to a more structured format
columns = list(metadata_df.columns)
data = []

# Extract columns paired as filename and source for each class
for i in range(0, len(columns), 3):
    class_name = columns[i].split()[0]  # Get class name from column name
    source_col = columns[i+1]  # Source column is right next to filename column
    for index, row in metadata_df.iterrows():
        if pd.notna(row[columns[i]]):
            data.append({
                'filename': row[columns[i]],
                'class': class_name,
                'source': row[source_col]
            })

# Create a new DataFrame from the reshaped data
structured_df = pd.DataFrame(data)

# Ensure output directory exists
os.makedirs(output_base_directory, exist_ok=True)

# Prepare a list to keep track of filename changes
filename_changes = []
patient_counter = 1  # Initialize a patient ID counter

# Map filenames to their source and class using structured DataFrame
file_to_metadata = {
    row['filename']: (source_mapping[row['source']], class_mapping[row['class'].lower()])
    for index, row in structured_df.iterrows()
}

# Walk through the directory structure
for root, dirs, files in os.walk(input_directory):
    for file in files:
        if file.endswith('.ndpi') and file in file_to_metadata:
            source_id, class_id = file_to_metadata[file]
            patient_id = f"{patient_counter:03d}"
            new_filename = f"o-{class_id}-{source_id}-{patient_id}.ndpi"
            patient_counter += 1

            # Generate the path in the output directory mirroring the input structure
            relative_path = os.path.relpath(root, input_directory)
            output_directory = os.path.join(output_base_directory, relative_path)
            os.makedirs(output_directory, exist_ok=True)

            source_path = os.path.join(root, file)
            destination_path = os.path.join(output_directory, new_filename)
            
            # Copy and rename the file
            shutil.copy(source_path, destination_path)
            print(f"Copied and renamed {file} to {new_filename}")
            
            # Log the change
            filename_changes.append({'Old Filename': file, 'New Filename': new_filename})

# Save the rename log to a CSV
rename_log_df = pd.DataFrame(filename_changes)
rename_log_df.to_csv(rename_log_path, index=False)
print(f"Filename change log saved to {rename_log_path}")
