In [None]:
#TO CREATE A PROGRAM WHICH WOULD MAP FILE_NAMES WITH APPROPRIATE ID'S 
import pandas as pd
import os # Import the os module

# Define file paths

output_file_name_base = 'Combined_Document_With_Filenames.csv'

desktop_path = 'C:/Users/mohak/Desktop' 

# Construct the full output file path for your desktop
output_file_name = os.path.join(desktop_path, output_file_name_base)


try:
    # 1. Load the metadata file
    df_metadata = pd.read_csv("C:/Users/mohak/Desktop/metadata.cart.2025-06-07.csv")
    print(f"Successfully loaded '{df_metadata}'.")

    # Verify column names for metadata
    metadata_match_col = 'associated_entities/0/case_id'
    metadata_value_col = 'file_name'

    if metadata_match_col not in df_metadata.columns or metadata_value_col not in df_metadata.columns:
        raise ValueError(f"Required columns '{metadata_match_col}' or '{metadata_value_col}' not found in '{df_metadata}'.")

    # 2. Load the arranged file (using the previously generated combined file)
    df_arranged = pd.read_excel("C:/Users/mohak/Desktop/Arranged.xlsx")
    print(f"Successfully loaded '{df_arranged}' (as the base Arranged file).")

    # Verify column name for arranged file
    arranged_match_col = 'ID'

    if arranged_match_col not in df_arranged.columns:
        raise ValueError(f"Required column '{arranged_match_col}' (AQ) not found in '{df_arranged}'. This column was created in the previous step.")

    # Select only necessary columns from metadata to prepare for merge
    df_metadata_subset = df_metadata[[metadata_match_col, metadata_value_col]].copy()

    # Handle potential duplicates in metadata_match_col (e.g., if multiple files per case_id)
    if df_metadata_subset[metadata_match_col].duplicated().any():
        print(f"Warning: Duplicate values found in '{metadata_match_col}' in metadata. Keeping the first associated '{metadata_value_col}'.")
        df_metadata_subset = df_metadata_subset.drop_duplicates(subset=[metadata_match_col], keep='first')


    # 3. Merge the two dataframes
    df_combined_new = pd.merge(
        df_arranged,
        df_metadata_subset,
        left_on=arranged_match_col,
        right_on=metadata_match_col,
        how='left'
    )

    # Drop the redundant matching column from the right DataFrame
    df_combined_new = df_combined_new.drop(columns=[metadata_match_col])

    # 4. Rename the new column to 'File_name' (AR-titled)
    df_combined_new = df_combined_new.rename(columns={metadata_value_col: 'File_name'})

    # 5. Save the new combined file to the specified desktop path
    df_combined_new.to_csv(output_file_name, index=False)
    print(f"\nAttempted to save new combined document to '{output_file_name}'")
    print("\nIf you ran this code on your local machine, check your desktop for the file.")


except FileNotFoundError as e:
    print(f"Error: One of the input files was not found. Please ensure '{df_metadata}' and '{df_arranged}' are accessible in the same directory as your script. Error: {e}")
except ValueError as e:
    print(f"Error in data processing: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")