In [7]:
# pandas for data manipulation
import pandas as pd
# os for interacting with file system
import os

# path to binary classes csv file -> csv that contains subject IDs and their dementia classes
binary_csv_path = r"C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\csv_binary\csv_binary.csv"  
# directory where the extracted MRI scans are stored
mri_dir = r"C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)"  

# Load the binary classification subset into a dataframe
binary_df = pd.read_csv(binary_csv_path, sep='\t') # csv is tab-separated

# Function to find the MRI scan path for a subject
def get_mri_path(subject_id, mri_base_dir):
    # iterating through all disc folders in the directory
    for disc_name in os.listdir(mri_base_dir):
        disc_path = os.path.join(mri_base_dir, disc_name)
        # skip non-directory files
        if not os.path.isdir(disc_path):
            continue  
        # path to the MRI scan
        mri_path = os.path.join(disc_path, subject_id, "PROCESSED", "MPRAGE", "T88_111") # -> structure of the paths
        print(f"Looking for MRI in: {mri_path}")
        # check if MRI path exists
        if os.path.exists(mri_path):
            # lookingfor .img files in the directory that contain the subject ID
            for file in os.listdir(mri_path):
                if file.endswith(".img") and subject_id in file:
                    # return full path of the .img file
                    return os.path.join(mri_path, file)
    # return None if MRI scan is not found
    return None

# add new column to dataframe
binary_df['MRI_Path'] = binary_df['ID'].apply(lambda x: get_mri_path(x, mri_dir)) # -> apply mri path function to each subject ID to find the corresponding MRI scan

# checking subjects with missing MRI scan paths (NaN)
missing_mri = binary_df[binary_df['MRI_Path'].isna()]
if not missing_mri.empty:
    # print details of the subjects for who the MRI paths are missing
    print("MRI scans not found for the following subjects:")
    print(missing_mri[['ID', 'Class']])
else:
    # print message if all MRi scans are found succesfully
    print("All MRI scans found successfully.")

# file path for the updated csv file with MRI paths
mri_paths_csv = r"C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\binary_with_mri_paths.csv"  
# save dataframe to new csv file with mri paths
binary_df.to_csv(mri_paths_csv, index=False, sep='\t')
print(f"MRI paths saved to {mri_paths_csv}")

Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0001_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0002_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0003_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0004_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0005_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0006_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku\OneDrive\Documenten\CSAI year 3\Thesis\Dataset(extracted)\disc1\OAS1_0007_MR1\PROCESSED\MPRAGE\T88_111
Looking for MRI in: C:\Users\gjkku