In [None]:
import os
import pandas as pd

## Just checking the amount of PD and control subjects

In [None]:
data_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/original"
clases = os.listdir(data_path)

for clase in clases:
    clase_path = os.path.join(data_path, clase, "PPMI")
    subjects = os.listdir(clase_path)
    print("clase: ", clase)
    print("subjects: ", len(subjects))

# Reading the corresponding patients information

**Regarding particular patients of interest**

In [None]:
gen_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered"
splits = ["test", "train"]
groups = ["control", "parkinson"]
train_control_cases, train_parkinson_cases, test_control_cases, test_parkinson_cases = [], [], [], []

for split in splits:
    for group in groups:
        cases = gen_path + "/" + split + "/" + group + "/" + "parcellation/preprocessed/full_rois/mri_png"

        if group == "control" and split == "train":
            train_control_cases = os.listdir(cases)
        elif group == "control" and split == "test":
            test_control_cases = os.listdir(cases)
        elif group == "parkinson" and split == "train":
            train_parkinson_cases = os.listdir(cases)
        else:
            test_parkinson_cases = os.listdir(cases)
            
train_control_cases = list(map(int, train_control_cases))
test_control_cases = list(map(int, test_control_cases))
train_parkinson_cases = list(map(int, train_parkinson_cases))
test_parkinson_cases = list(map(int, test_parkinson_cases))

In [None]:
print("====== control ======")
print(train_control_cases)
print("/n")
print(test_control_cases)
print("====== parkinson ======")
print(train_parkinson_cases)
print("/n")
print(test_parkinson_cases)

## General information

In [None]:
general_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/"
file_path = general_path  + "MDS-UPDRS_Part_III_14Feb2024.csv"
updrs3_df = pd.read_csv(file_path)
updrs3_df.head()

In [None]:
print(updrs3_df.columns)

## UPDRS information

In [None]:
general_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/"

print("UPDRS questionnarie PART I")
file_path1 = general_path  + "MDS-UPDRS_Part_I_Patient_Questionnaire_29Jan2024.csv"
updrs1_df = pd.read_csv(file_path1)
print(updrs1_df.columns)

print("UPDRS questionnarie PART II")
file_path2 = general_path  + "MDS_UPDRS_Part_II__Patient_Questionnaire_29Jan2024.csv"
updrs2_df = pd.read_csv(file_path2)
print(updrs2_df.columns)

print("UPDRS questionnarie PART IV")
file_path4 = general_path  + "MDS-UPDRS_Part_IV__Motor_Complications_29Jan2024.csv"
updrs4_df = pd.read_csv(file_path4)
print(updrs4_df.columns)

## Patients information

In [None]:
ctrl_test_df = updrs3_df[updrs3_df["PATNO"].isin(test_control_cases)]
ctrl_train_df = updrs3_df[updrs3_df["PATNO"].isin(train_control_cases)]
pd_test_df = updrs3_df[updrs3_df["PATNO"].isin(test_parkinson_cases)]
pd_train_df = updrs3_df[updrs3_df["PATNO"].isin(train_parkinson_cases)]

### UPDRS and H&Y scales

In [None]:
filtered_ctrl_test_df = ctrl_test_df.groupby("PATNO").count()
filtered_ctrl_test_df.head()

In [None]:
filtered_ctrl_test_df["NHY"]

**Angel estadio**

In [None]:
df = pd.read_csv(general_path + "estadio.csv")
df.groupby("PATNO").count()

### Demografic info

In [None]:
print("========== Train population ==========")
print("==== control: ====")
train_ctrl_age_mean = ctrl_train_df["ENROLL_AGE"].mean()
train_ctrl_age_std = ctrl_train_df["ENROLL_AGE"].std()
print("==== parkinson: ====")
train_pd_age_mean = pd_train_df["ENROLL_AGE"].mean()
train_pd_age_std = pd_train_df["ENROLL_AGE"].std()

print("control age: ", train_ctrl_age_mean, train_ctrl_age_std)
print("parkinson age: ", train_pd_age_mean, train_pd_age_std)

print("========== Test population ==========")
print("==== control: ====")
test_ctrl_age_mean = ctrl_test_df["ENROLL_AGE"].mean()
test_ctrl_age_std = ctrl_test_df["ENROLL_AGE"].std()
print("==== parkinson: ====")
test_pd_age_mean = pd_test_df["ENROLL_AGE"].mean()
test_pd_age_std = pd_test_df["ENROLL_AGE"].std()

print("control age: ", test_ctrl_age_mean, test_ctrl_age_std)
print("parkinson age: ", test_pd_age_mean, test_pd_age_std)

**This notebook is for many brain parcellations translation purposes** 

# CycleGan data moving

We have 58 subjects for the control and the parkinson groups, respectively. We want to train:
* **========== experiment 1: ==========** 
* Control to parkinson translation
* Domain A: control
* Domain B: parkinson
For the training of this net, we need (by experience) around 1800 frames by each domain. In this sense, each subject has 182 slices, so we will work in this dataframe interval:

* low_rate = 91-15
* up_rate = 91+15

The above results in 1798 slices over each domain where each suject is represented by the 31 central slices

* **========== experiment 2: ==========** 
* MRI to SPECT domain translation
In this case we want to get 900 mri images for control and the same amount for parkinson subjects. So, as we have 58 subjects in each group, we must to have the following:

900/58 = 16 images for mri control and parkinson, respectively. So, at the end we will have 

* Domain A and B (MRI, Spect, respectively): 928 mri and spect images for control and the same amount for parkinson in each domain.

In [None]:
# reading the original embc cases
root_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
original_embc_cases = root_path + "train/parkinson/parcellation/preprocessed/full_rois/spect_png/"
embc_cases = sorted(os.listdir(original_embc_cases))
print("amount of original cases: ", len(embc_cases))

#getting all the pd cases 
extension_embc_cases = root_path + "train/parkinson/extension/spect_png/"
augmented_cases = sorted(os.listdir(extension_embc_cases))
print("amount of augmented_cases: ", len(augmented_cases))

extra_cases = list(set(augmented_cases) - set(embc_cases))
print("amount of extra cases: ", len(extra_cases))

In [None]:
split = "train"
group = "parkinson"
modality = "spect_png"
experiment = "full_rois"
technique = "preprocessed"

source_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
current_root_path = os.path.join(source_path, split, group, "parcellation", technique, experiment, "preprocessed2", modality)
cases = sorted(os.listdir(current_root_path))
print("Number of cases: ", len(cases))


if experiment == "full_rois":
    save_path = "../data/" + technique + "/" + experiment + "/mri_to_spect/preprocessed2/" + split + "_" + modality.split("_")[0]  
else:
    save_path = "../data/" + technique + "/" + experiment + "/mri_to_spect/" + split + "_" + modality.split("_")[0] 
    
print("Saving to: ", save_path)

In [None]:
for case in embc_cases:
    print("Case: ", case)
    case_path = os.path.join(current_root_path, case)
    files = sorted(os.listdir(case_path))
    can_files = len(files)
    print("Number of files: ", can_files)
    
    if modality == "mri_png":
        half_frame = 145
        low_rate = half_frame - 8
        up_rate = half_frame + 8
    else:
        half_frame = 45
        low_rate = half_frame - 10
        up_rate = half_frame + 5
        
    
    for file in files[low_rate-1:up_rate]:
        file_path = os.path.join(case_path, file)
        if not os.path.exists(save_path):
            os.makedirs(save_path)            
        os.system("cp " + file_path + " " + save_path) 
        
print("Done!")

In [None]:
half_frame

# Classifier data setting
In this part we will create the csv file for the T1 and spect modalities
* **For T1 images:**

In [None]:
import os
import pandas as pd

In [None]:
#root_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
root_path = "/home/Data/franklin/Doctorado/parkinson/projects/T1-SPECT-PD-translation/imgs_results/"
path = "full_rois/preprocessed/mri_to_spect/prodromal_mri_filtered_slices/"
clases = os.listdir(os.path.join(root_path, path))
print(len(clases))

In [None]:
file_csv2 = open(root_path + "/raw_control_pd_MRI_fullRois_TRAIN.csv", '+w')
for clase in clases:
    cases_path = os.path.join(root_path, split, clase, "parcellation/raw", experiment, modality)
    cases = sorted(os.listdir(cases_path))
    for case in cases:
        case_path = os.path.join(cases_path, case)
        images = sorted(os.listdir(case_path))
        for image in images:
            image_path = os.path.join(case_path, image)
            col_name = ',' + clase + "\n"
            #print(image_path + col_name)
            file_csv2.write(image_path + col_name)
file_csv2.close()

In [None]:
df = pd.read_csv(root_path + "/raw_control_pd_SPECT_fullRois_TEST.csv", header=None)
df.columns = ["path", "label"]
df.groupby("label").count()

**For synthetic images**

In [None]:
root_path = "../imgs_results/full_rois/preprocessed/mri_to_spect/mri_filtered_slices/"
#modality = "test_mri"

#cases = os.listdir(root_path + modality)
cases = os.listdir(root_path)

#file_csv2 = open(root_path + modality + "_fullRois.csv", '+w')
file_csv2 = open(root_path + "mri_to_spect_filtered.csv", '+w')
for case in cases:
    #case_path = os.path.join(root_path, modality, case)
    case_path = os.path.join(root_path, case)
    imgs = sorted(os.listdir(case_path))
    for img in imgs:
        image_path = os.path.join(case_path, img)
        clase = img.split("_")[0]
        col_name = ',' + clase + "\n"
        #print(image_path+col_name)
        file_csv2.write(image_path + col_name)
file_csv2.close()

**Full train and test synthetic versions**

In [None]:
import os
import pandas as pd

root_path = "../imgs_results/full_rois/preprocessed/mri_to_spect/"
split = "train"
folder_path = os.path.join(root_path, "full_"+split+"_mri_png")

imgs = os.listdir(folder_path)

file_csv2 = open(root_path + "full_"+split + "_mriSpectFullRois.csv", '+w')
#print(root_path + "full_"+split + "_fullRois.csv")
for img in imgs:
    image_path = os.path.join(folder_path, img)
    clase = img.split("_")[0]
    col_name = ',' + clase + "\n"
    #print(image_path+col_name)
    file_csv2.write(image_path + col_name)
file_csv2.close()

In [None]:
df = pd.read_csv(root_path + "full_"+split+ "_mriSpectFullRois.csv", header=None)
df.columns = ["path", "label"]
df.groupby("label").count()

## For prodromal subjects

In [None]:
import os
import pandas as pd

root_path = "/home/Data/franklin/Doctorado/parkinson/projects/T1-SPECT-PD-translation/imgs_results/full_rois/preprocessed/mri_to_spect/"
split = "prodromal_mri_filtered_slices"
current_path = os.path.join(root_path, split)
cases = sorted(os.listdir(current_path))
print(len(cases))

In [None]:
file_csv = open(root_path + "/prodromal_synthetic_spect_fullRois_TRAIN.csv", '+w')

for case in cases:
    #print("Case: ", case)
    case_path = os.path.join(current_path, case)
    #print("case_path: ", case_path)
    images = sorted(os.listdir(case_path))
    #print(len(images))
    for image in images:
        image_path = os.path.join(case_path, image)
        col_name = ','+ "prodromal\n"
        #print(image_path + col_name)
        file_csv.write(image_path + col_name)
file_csv.close()

In [None]:
df = pd.read_csv(root_path + "embc_extension/extension_prodromal_MRI_fullRois_TRAIN.csv", header=None)
df.columns = ["path", "label"]
df.groupby("label").count()

## For SWEDD subjects

In [None]:
root_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
split = "swedd"
experiment = "extension"
modality = "spect_png"#<----- change this to mri_png when needed
cases = sorted(os.listdir(os.path.join(root_path, split, experiment, modality)))
print(len(cases))

In [None]:
file_csv = open(root_path + "/swedd_SPECT_preprocessed2_TEST.csv", '+w')

for case in cases:
    case_path = os.path.join(root_path, split, experiment, modality, case)
    images = sorted(os.listdir(case_path))
    for image in images:
        image_path = os.path.join(case_path, image)
        col_name = ','+ "swedd\n"
        #print(image_path + col_name)
        file_csv.write(image_path + col_name)
file_csv.close()

In [None]:
df = pd.read_csv(root_path + "/swedd_SPECT_preprocessed2_TEST.csv", header=None)
df.columns = ["path", "label"]
df.groupby("label").count()

### **For cycleGan**
#### Test_control mri

In [None]:
path = "../data/preprocessed/full_rois/mri_to_spect/"
set = "test"
group = "mri"#"control"

images = os.listdir(path + set + "_" + group)

file_csv2 = open(path + set + "_" + group + ".csv", '+w')
for image in images:
    image_path = os.path.join(path, set + "_" + group, image)
    clase = image.split("_")[0]
    col_name = ',' + clase + "\n"
    #print(image_path+col_name)
    file_csv2.write(image_path + col_name)
file_csv2.close()

#### MRI filtered slices

In [None]:
path = "../imgs_results/full_rois/mri_to_spect/mri_filtered_slices/"
groups = os.listdir(path)

file_csv2 = open(path + "mri_filtered_slices.csv", '+w')
for group in groups:
    
    images = os.listdir(path + group)
    
    for image in images:
        image_path = os.path.join(path, group, image)
        clase = image.split("_")[0]
        col_name = ',' + clase + "\n"
        file_csv2.write(image_path + col_name)
file_csv2.close()

In [None]:
import pandas as pd

df = pd.read_csv(path + "mri_filtered_slices.csv", header=None)
df.columns = ["path", "label"]
df.groupby("label").count()

## Moving preprocessed files 
The aim of this section is move all the preprocessing steps into a folder 

In [None]:
root_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered"
split = "test"
group = "control"
modality = "mri"
current_root_path = os.path.join(root_path, split, group, modality)
cases = sorted(os.listdir(current_root_path))

In [None]:
for case in cases[:2]:
    preprocessed_path = os.path.join(current_root_path, case, "preprocessed")
    files = sorted(os.listdir(preprocessed_path))
    nii_files = [file for file in files if file.endswith(".nii")]
    print("nii files: ", nii_files)
    print("amount of nii: ", len(nii_files))

**Until here**

# Making the csv file for preprocessed SPECT files in version 2

In [None]:
root_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
split = "train"
groups = ["control", "parkinson"]
file_csv2 = open(root_path + "pdControlSpectPreprocessed2TRAIN.csv", '+w')

for group in groups:
    cases_path = os.path.join(root_path, split, group, "parcellation/raw/full_rois/spect_png/")
    cases = os.listdir(cases_path)
    for case in cases:
        case_path = os.path.join(cases_path, case)
        images = os.listdir(case_path)
        for image in images:
            image_path = os.path.join(case_path, image)
            col_name = ',' + group + "\n"
            #print(image_path + col_name)
            file_csv2.write(image_path + col_name)

file_csv2.close()

# Know the PD distribution regarding the H&Y scale

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import shutil
import pandas as pd
import os

### From here for Prodromal and SWEDD subjects

In [None]:
root_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered"
csv_file = root_path + "/sweddStagesEmbcExtension.csv"
extra_df = pd.read_csv(csv_file)
extra_df.head()

In [None]:
#saving the patient ID and corresponding H&Y scale as txt file for each split
columns = ["Pat_id", "H & Y Stage"]
extra_df[columns].to_csv(root_path + "/sweddStages.txt", sep="\t", index=False)

### Until here

In [None]:
baseline_csv_file = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/embcBaselinev2.csv"
extended_csv = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/embcExtensionv2.csv"

bases_line_df = pd.read_csv(baseline_csv_file)
extended_df = pd.read_csv(extended_csv)
print("len of base: ", len(bases_line_df))
print("len extended: ", len(extended_df))

### From here we get the extra PD subjects

In [None]:
base_patients = bases_line_df["Pat_id"].unique()
#getting patients that no are in the base
extended_patients_df = extended_df[~extended_df["Pat_id"].isin(base_patients)]
print("len extended: ", len(extended_patients_df))

### Until here

In [None]:
bases_line_df

In [None]:
extended_patients_df

### Data split regarding the H&Y scale

In [None]:
plt.hist(extended_patients_df["H & Y Stage"].values)

Creating the train/test sets for the base line approach regarding the H&Y scale

In [None]:
stage_1 = bases_line_df[bases_line_df["H & Y Stage"]==1.0]
stage_2 = bases_line_df[bases_line_df["H & Y Stage"]==2.0]
stage_3 = bases_line_df[bases_line_df["H & Y Stage"]==3.0]
stage_4 = bases_line_df[bases_line_df["H & Y Stage"]==4.0]
stage_5 = bases_line_df[bases_line_df["H & Y Stage"]==5.0]

print("on stage 1: ", len(stage_1))
print("on stage 2: ", len(stage_2))
print("on stage 3: ", len(stage_3))
print("on stage 4: ", len(stage_4))
print("on stage 5: ", len(stage_5))
print("total: ", len(stage_1)+len(stage_2)+len(stage_3)+len(stage_4)+len(stage_5))

In [None]:
print("on stage 1: ", len(stage_1)*0.8)
print("on stage 2: ", len(stage_2)*0.8)
print("on stage 3: ", len(stage_3)*0.8)
print("on stage 4: ", len(stage_4)*0.8)
print("on stage 5: ", len(stage_5)*0.8)

In [None]:
#due to the 80/20 for the train/test setting, we have to ensure the proper PD distribution regarding the H&Y scale:
8 + 47 + 1 + 2 #---> for 1 to 4 H&Y rating scale

In [None]:
# choosing randomly some PD subjects for train/test sets
random_seed = 14

stg_1_sample = 8
stg_2_sample = 47
stg_3_sample = 1
stg_4_sample = 2
#stg_5_sample = 1

#for train
stg_1_train = stage_1.sample(stg_1_sample, random_state=random_seed)
stg_2_train = stage_2.sample(stg_2_sample, random_state=random_seed)
stg_3_train = stage_3.sample(stg_3_sample, random_state=random_seed)
stg_4_train = stage_4.sample(stg_4_sample, random_state=random_seed)

stg_1_patients_train = list(stg_1_train["Pat_id"].values)
stg_2_patients_train = list(stg_2_train["Pat_id"].values)
stg_3_patients_train = list(stg_3_train["Pat_id"].values)
stg_4_patients_train = list(stg_4_train["Pat_id"].values)

train_df = pd.concat([stg_1_train, stg_2_train, stg_3_train, stg_4_train])
print("for train: ", len(train_df))

#for test
stg_1_test = stage_1[~stage_1["Pat_id"].isin(stg_1_patients_train)]
stg_2_test = stage_2[~stage_2["Pat_id"].isin(stg_2_patients_train)]
stg_3_test = stage_3[~stage_3["Pat_id"].isin(stg_3_patients_train)]
stg_4_test = stage_4[~stage_4["Pat_id"].isin(stg_4_patients_train)]

stg_1_patients_test = list(stg_1_test["Pat_id"].values)
stg_2_patients_test = list(stg_2_test["Pat_id"].values)
stg_3_patients_test = list(stg_3_test["Pat_id"].values)
stg_4_patients_test = list(stg_4_test["Pat_id"].values)

test_df = pd.concat([stg_1_test, stg_2_test, stg_3_test, stg_4_test])
print("for train: ", len(test_df))

In [None]:
train_cases = train_df["Pat_id"].values
test_cases = test_df["Pat_id"].values

In [None]:
test_df

#### **Creating the txt files** 
* For both train/test in the EMBC baseline approach

In [None]:
#saving the patient ID and corresponding H&Y scale as txt file for each split
columns = ["Pat_id", "H & Y Stage"]
train_df[columns].to_csv("trainPdStagesV2.txt", sep="\t", index=False)
test_df[columns].to_csv("testPdStagesV2.txt", sep="\t", index=False)

**From extended version only get the original embc cases**

* For the additional PD subjects in the EMBC extension

In [None]:
gen_path = '/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/' 
csv_train = os.path.join(gen_path + 'pdControlSpectPreprocessed2TRAIN.csv')
csv_test = os.path.join(gen_path + 'pdControlSpectPreprocessed2TEST.csv')

extended_train_df, extended_test_df = pd.read_csv(csv_train), pd.read_csv(csv_test)
columns = ["path", "label"]
extended_train_df.columns = columns
extended_test_df.columns = columns

In [None]:
# reading the original embc cases
root_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
original_embc_cases = root_path + "train/parkinson/parcellation/preprocessed/full_rois/spect_png/"
embc_cases = sorted(os.listdir(original_embc_cases))
print("amount of original cases: ", len(embc_cases))

#getting all the pd cases 
extension_embc_cases = root_path + "train/parkinson/extension/spect_png/"
augmented_cases = sorted(os.listdir(extension_embc_cases))
print("amount of augmented_cases: ", len(augmented_cases))

extra_cases = list(set(augmented_cases) - set(embc_cases))
print("amount of extra cases: ", len(extra_cases))

**Getting the dataframe for only the extended patientes**

In [None]:
extra_1 = extended_patients_df["Pat_id"].unique()
print(len(extra_1))

**Until here**

In [None]:
#saving the patient ID and corresponding H&Y scale as txt file for each split
gen_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"
columns = ["Pat_id", "H & Y Stage"]
extended_patients_df[columns].to_csv(gen_path+"extensionPdStagesTest.txt", sep="\t", index=False)

### Reading the new train/test sets and move to the save folder.

In [None]:
root_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered"
save_path = root_path + "/stratifying_stages/"

sets = ["test", "train"]
modalities = ["mri_png", "spect_png"]

total_cases = "train_cases"

if total_cases == "test_cases":
    print("over test cases")
    total_cases = test_cases
    split = "test"
else:
    print("over train cases")
    total_cases = train_cases
    split = "train"

for case in total_cases:
    print("case: ", case)
    
    for modality in modalities:
        print("modality: ", modality) 
        modality_path1 = root_path + "/" + "train" + "/parkinson/parcellation/raw/full_rois/" + modality + "/" + str(case)
        modality_path2 = root_path + "/" + "test" + "/parkinson/parcellation/raw/full_rois/" + modality + "/" + str(case)
        if os.path.exists(modality_path1):
            current_modality_path = modality_path1
        if os.path.exists(modality_path2):
            current_modality_path = modality_path2
            
        cases = sorted(os.listdir(current_modality_path))
        print("case: ", case)
        source_path = current_modality_path
        current_save_path = save_path + split + "/parkinson/" + modality + "/" + str(case)
        print("copying from: ", source_path)
        print("to: ", current_save_path)  
        shutil.copytree(source_path, current_save_path)     
    

In [None]:
root_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/stratifying_stages"
split = "train"
groups = ["control", "parkinson"]
modality = "spect_png"

file_csv2 = open(root_path + "/pdControlSpectTRAIN.csv", '+w')

for group in groups:
    print("group: ", group)
    if group == "control":
        modality_path = os.path.join(root_path, split, group, "parcellation/raw/full_rois/", modality)
        cases = os.listdir(modality_path)   
    else:
        modality_path = os.path.join(root_path, split, group, modality)
        cases = sorted(os.listdir(modality_path))
        
    for case in cases:
        case_path = os.path.join(modality_path, case)
        images = sorted(os.listdir(case_path))
        for image in images:
            image_path = os.path.join(case_path, image)
            col_name = ',' + group + "\n"
            print(image_path + col_name)
            file_csv2.write(image_path + col_name)
            
file_csv2.close()
        
    

### Creating the csv file for the paper baseline

In [None]:
path = "/home/Data/Datasets/Parkinson/radiological/spect_paper"
split = "all_2d_val"
groups = ["HC", "PD"]

file_csv2 = open(path + "/pdControlSpectTEST.csv", '+w')

for group in groups:
    print("group: ", group)
    group_path = path + "/" + split + "/" + group
    images =   os.listdir(group_path)
    for image in images:
        img_path = group_path + "/" + image
        col_name = ',' + group + "\n"
        #print(img_path + col_name)
        file_csv2.write(img_path + col_name)
            
file_csv2.close()

# Unzip MRI-DTI data

In [None]:
from zipfile import ZipFile
from tqdm import tqdm

In [None]:
gen_path = "/home/Data/Datasets/Parkinson/radiological/PPMI/mri-dti/original/pd_mri_dti"
files = os.listdir(gen_path)
files

In [None]:
#unzip all the files
for file in tqdm(files):
    file_path = gen_path + "/" + file
    with ZipFile(file_path, mode='r') as zip_ref:
        zip_ref.extractall(gen_path)