In [2]:
import os
import pandas as pd

def organize_symlinks(original_directory, target_directory, excel_file):
    # Read the Excel file
    df = pd.read_excel(excel_file)

    # Get unique diagnoses
    unique_diagnoses = df['Diagnosis'].unique()

    # Create folders for each unique diagnosis
    for diagnosis in unique_diagnoses:
        diagnosis_folder = os.path.join(target_directory, str(diagnosis))
        os.makedirs(diagnosis_folder, exist_ok=True)

    # Iterate through each row in the dataframe
    for index, row in df.iterrows():
        ptid = str(row['PTID'])
        diagnosis = str(row['Diagnosis'])

        # Find the folder corresponding to PTID
        ptid_folder = os.path.join(original_directory, ptid)

        # Destination folder
        diagnosis_folder = os.path.join(target_directory, diagnosis)

        # Create a symlink to the PTID folder in the corresponding diagnosis folder
        if os.path.exists(ptid_folder) and os.path.isdir(ptid_folder):
            symlink_path = os.path.join(diagnosis_folder, ptid)
            if not os.path.exists(symlink_path):
                os.symlink(ptid_folder, symlink_path)
            else:
                print(f"Symlink for PTID {ptid} already exists in {diagnosis_folder}.")
        else:
            print(f"Folder for PTID {ptid} not found.")

# Usage
original_dir = '/Volumes/Expansion/datasets/adni/neuroimaging/all_patients'
target_dir = '/Volumes/Expansion/datasets/adni/neuroimaging/all_patients_by_diagnosis_symlink'
excel_file = '/Volumes/Expansion/datasets/adni/metadata/raw_information/ADNI_1_atrophy_grades.xlsx'
organize_symlinks(original_dir,target_dir, excel_file)


Symlink for PTID 128_S_1430 already exists in /Volumes/Expansion/datasets/adni/neuroimaging/all_patients_by_diagnosis_symlink/Alzheimer.


In [9]:
import glob
import os 
all_patients = []
unfinished_patients = []

base_dir = '/Volumes/Expansion/datasets/adni/neuroimaging/all_patients'
sym_dir = '/Volumes/Expansion/datasets/adni/neuroimaging/all_patients_by_diagnosis_symlink'

sym_patients_mci = os.listdir(sym_dir + '/MCI')
sym_patients_ad = os.listdir(sym_dir + '/Alzheimer')
sym_patients_normal = os.listdir(sym_dir + '/Normal')

sympts = sym_patients_mci+sym_patients_ad+sym_patients_normal

all_patients = os.listdir(base_dir)

unfinished = set(all_patients).intersection(sympts)
print(unfinished)
    

{'002_S_1268', '099_S_0352', '062_S_1182', '941_S_1295', '033_S_0516', '041_S_0721', '141_S_1137', '073_S_1357', '127_S_1032', '027_S_0307', '002_S_0782', '041_S_0549', '011_S_0002', '011_S_0021', '067_S_0076', '070_S_4708', '029_S_0836', '130_S_1201', '126_S_1340', '067_S_0290', '128_S_0227', '024_S_1063', '007_S_1222', '130_S_0449', '006_S_4192', '016_S_1149', '094_S_1164', '022_S_1394', '027_S_0116', '002_S_0955', '141_S_0810', '033_S_1279', '041_S_1260', '020_S_1288', '041_S_0898', '131_S_0497', '137_S_0366', '022_S_0129', '141_S_0696', '127_S_0259', '027_S_0179', '094_S_0692', '126_S_0891', '005_S_0814', '033_S_0906', '114_S_0979', '002_S_1018', '127_S_0431', '041_S_1412', '137_S_0443', '011_S_0003', '002_S_1280', '009_S_1334', '052_S_1352', '005_S_1341', '007_S_0070', '052_S_1250', '036_S_1001', '020_S_0899', '099_S_0958', '022_S_0044', '003_S_0981', '128_S_0528', '033_S_1283', '041_S_0679', '037_S_0377', '002_S_0685', '141_S_0982', '131_S_0441', '003_S_0908', '021_S_0642', '123_

In [11]:
smdir = '/Volumes/Expansion/datasets/adni/neuroimaging/all_patients_by_diagnosis_symlink/Additional'
for pt in unfinished:
    pt_folder = os.path.join(base_dir, pt)
    sym_folder = os.path.join(smdir, pt)
    # Create a symlink to the PTID folder in the corresponding diagnosis folder
    if os.path.isdir(pt_folder):
        if not os.path.exists(sym_folder):
            os.symlink(pt_folder, sym_folder)
        else:
            print(f"Symlink for PTID {pt} already exists in {pt_folder}.")
    else:
        print(f"Folder for PTID {pt} not found.")

In [12]:
print(len(unfinished))

761
