In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime

# Utils

In [2]:
ctrl_mri_spect_df = pd.read_csv("../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/controlMri-Spect12_01_2023.csv")
ctrl_mri_spect_df['Acq Date'] = pd.to_datetime(ctrl_mri_spect_df['Acq Date'])

pd_mri_spect_df = pd.read_csv("../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/pdMriSpect_12_01_2023.csv")
pd_mri_spect_df['Acq Date'] = pd.to_datetime(pd_mri_spect_df['Acq Date'])

print(len(ctrl_mri_spect_df))
print(len(pd_mri_spect_df))

698
4229


In [3]:
def count_t1_types(df):
    print("T2 in corrected EPI: ", len(df[df["Description"].str.startswith("T2 in corrected EPI")]))
    print("T2 in T1: ", len(df[df["Description"].str.startswith("T2 in T1")]))
    print("T1-anatomical: ", len(df[df["Description"].str.startswith("T1-anatomical")]))

In [4]:
def count_t1_types_with_spect(df):

    #================= to know subjects with T1-anatomical and spect modality =================
    filtered_mri = df[(df['Modality'] == 'MRI') & df['Description'].str.startswith('T1-anatomical')]
    filtered_spect = df[(df['Modality'] == 'SPECT') & df['Description'].str.contains('DaTSCAN')]

    mri_subjects = sorted(filtered_mri['Subject'].tolist())
    spect_subjects = sorted(filtered_spect['Subject'].tolist())

    print("total t1-anatomical subjects: ", len(mri_subjects))
    print("total spect subjects: ", len(spect_subjects))

    #sujetos con T1-anatomical y DaTSCAN
    t1_anat_spect_subjects = sorted(list(set(mri_subjects) & set(spect_subjects)))
    print("total subjects with T1-anatomical and DaTSCAN: ", len(t1_anat_spect_subjects))

    #================= to know subjects with T2-in T1 and spect modality =================
    filtered_mri = df[(df['Modality'] == 'MRI') & df['Description'].str.startswith('T2 in T1')]

    mri_subjects_t2_in_t1 = sorted(filtered_mri['Subject'].tolist())

    print("total t2 in t1 subjects: ", len(mri_subjects_t2_in_t1))
    print("total spect subjects: ", len(spect_subjects))

    #sujetos con T2 in T1 y DaTSCAN
    t2_in_t1_spect_subjects = sorted(list(set(mri_subjects_t2_in_t1) & set(spect_subjects)))
    print("total subjects with T1-anatomical and DaTSCAN: ", len(t2_in_t1_spect_subjects))

    #================= to know subjects with T2 in corrected EPI and spect modality =================
    filtered_mri = df[(df['Modality'] == 'MRI') & df['Description'].str.startswith('T2 in corrected EPI')]

    mri_subjects_t2_in_epi = sorted(filtered_mri['Subject'].tolist())

    print("total t2 in epi subjects: ", len(mri_subjects_t2_in_epi))
    print("total spect subjects: ", len(spect_subjects))

    #sujetos con T2 in corrected EPI y DaTSCAN
    t2_in_epi_spect_subjects = sorted(list(set(mri_subjects_t2_in_epi) & set(spect_subjects)))
    print("total subjects with T2 in corrected EPI and DaTSCAN: ", len(t2_in_epi_spect_subjects))

    return t1_anat_spect_subjects

In [5]:
def cases_t1_spect(df, t1_anat_spect_subjects):
    #================= to know subjects with T1 and spect modality =================
    filtered_df = df[df['Description'].str.contains('DaTSCAN|^T1-anatomical')]

    filtered_t1_spect_df = filtered_df[filtered_df['Subject'].isin(t1_anat_spect_subjects)]
    filtered_t1_spect_df = filtered_t1_spect_df.reset_index(drop=True)
    return filtered_t1_spect_df


In [6]:
def cases_with_more_than_one_t1_spect(df):

    # Count the occurrences of 't1-anatomical' for each subject
    t1_counts = df[df['Description'] == 'T1-anatomical'].groupby('Subject').size()

    # Filter subjects with more than two 't1-anatomical' values
    subjects_with_more_than_one_t1 = t1_counts[t1_counts > 1].index.tolist()

    print("Subjects with more than one 't1-anatomical' values:", subjects_with_more_than_one_t1)
    print("amount of subjects with more than one t1-anatomical: ", len(subjects_with_more_than_one_t1))

    #=================================================================================================

    # Filter subjects with more than two 'Reconstructed DaTSCAN	' values
    spect_counts = df[df['Description'] == 'Reconstructed DaTSCAN'].groupby('Subject').size()

    # Filter subjects with more than two 't1-anatomical' values
    subjects_with_more_than_one_spects = spect_counts[spect_counts > 1].index.tolist()

    print("Subjects with more than one 'Reconstructed DaTSCAN:", subjects_with_more_than_one_spects)
    print("amount of subjects with more than one Reconstructed DaTSCAN: ", len(subjects_with_more_than_one_spects))
    
    return subjects_with_more_than_one_t1, subjects_with_more_than_one_spects

In [7]:
def filter_single_t1_records(df):

    # Identify subjects with more than two 't1-anatomical' values
    t1_counts = df[df['Description'] == 'T1-anatomical'].groupby('Subject').size()
    subjects_with_more_than_one_t1 = t1_counts[t1_counts > 1].index.tolist()

    # Filter and keep only the 't1-anatomical' records closer to the DaTSCAN date
    result_df = pd.DataFrame()

    for subject in subjects_with_more_than_one_t1:
        subject_df = df[df['Subject'] == subject]
        
        t1_anatomical_df = subject_df[subject_df['Description'] == 'T1-anatomical']
        dat_scan_df = subject_df[subject_df['Description'] == 'Reconstructed DaTSCAN']
        
        if not t1_anatomical_df.empty and not dat_scan_df.empty:
            #getting the closest t1-anatomical to the dat_scan date
            closest_t1_anatomical = t1_anatomical_df.loc[(t1_anatomical_df['Acq Date'] - dat_scan_df['Acq Date'].iloc[0]).abs().idxmin()]
            closest_t1_anatomical_df = closest_t1_anatomical.to_frame().T
            closest_t1_anatomical_df.columns = df.columns
            result_df = pd.concat([result_df, closest_t1_anatomical_df])

    return result_df

In [8]:
def filter_single_spect_records(df):
    #filtering spect cases with more than one record
    spect_counts = df[df['Description'] == 'Reconstructed DaTSCAN'].groupby('Subject').size()
    subjects_with_more_than_one_spect = spect_counts[spect_counts > 1].index.tolist()

    # Filter and keep only the 't1-anatomical' records closer to the DaTSCAN date
    result_df = pd.DataFrame()

    for subject in subjects_with_more_than_one_spect:
        subject_df = df[df['Subject'] == subject]
        
        dat_scan_df = subject_df[subject_df['Description'] == 'Reconstructed DaTSCAN']
        dat_scan_df = dat_scan_df.sort_values(by=['Acq Date'], ascending=False).head(1)

        result_df = pd.concat([result_df, dat_scan_df])

    return result_df

In [9]:
def index_to_drop(result_df, filtered_t1_spect_df):

    #getting the index of the T1 to be saved (over subjects with more than one T1)
    t1_save_index = result_df.index
    t1_save_index = sorted(t1_save_index.tolist())
    print(len(t1_save_index))
    print("t1_save_index: ", t1_save_index)


    # getting the index of the T1 to be saved (over subjects with just one T1)
    single_t1_index = []
    t1_counts = filtered_t1_spect_df[filtered_t1_spect_df['Description'] == 'T1-anatomical'].groupby('Subject').size()
    subjects_with_just_one_t1 = t1_counts[t1_counts == 1].index.tolist()
    for subject in subjects_with_just_one_t1:
        idx = filtered_t1_spect_df[filtered_t1_spect_df['Subject'] == subject].index.tolist()
        single_t1_index.extend(idx)
    print(len(single_t1_index))
    print("single_t1_index: ", single_t1_index)


    #getting the index of the spect to be saved
    spect_save_index = filtered_t1_spect_df[filtered_t1_spect_df["Description"]=="Reconstructed DaTSCAN"].index
    spect_save_index = sorted(spect_save_index.tolist())
    print(len(spect_save_index))
    print("spect_save_index: ", spect_save_index)


    #getting all the index from the filtered_t1_spect_df
    total_indexes = filtered_t1_spect_df.index
    total_indexes = sorted(total_indexes.tolist())
    print("total_indexes: ", len(total_indexes))
    print(total_indexes)

    #getting the difference between the total_t1_index and the single_t1_index
    spect_and_2t1_index = list(set(total_indexes) - set(single_t1_index))

    #spect_and_2t1_index difference with spect_save_index
    _2t1_index= list(set(spect_and_2t1_index) - set(spect_save_index))

    #the current drop indexes (_2t1_index - t1_save_index)
    current_delete_index = sorted(list(set(_2t1_index) - set(t1_save_index)))

    return current_delete_index

## Control analysis

In [None]:
count_t1_types(ctrl_mri_spect_df)
print("=============================================")
t1_anat_spect_subjects = count_t1_types_with_spect(ctrl_mri_spect_df)
print("================== cases with T1 and spect =========================")
filtered_t1_spect_df = cases_t1_spect(ctrl_mri_spect_df, t1_anat_spect_subjects)
filtered_t1_spect_df

**Identitying more than one case per modality**

In [None]:
cases_with_more_than_one_t1_spect(filtered_t1_spect_df)

### Getting the T1 cases that we really need

In [None]:
result_df = filter_single_t1_records(filtered_t1_spect_df)
result_df.head()

### Getting the indexes to be dropped

In [None]:
current_delete_index = index_to_drop(result_df, filtered_t1_spect_df)
print("current_delete_index: ", current_delete_index)

**Just for checking**

In [None]:
filtered_t1_spect_df.iloc[6]

In [None]:
filtered_t1_spect_df[filtered_t1_spect_df["Subject"]==4032]

**Dropping the index that we do not need**

In [None]:
final_ctrl_mri_spect_df = filtered_t1_spect_df.drop(current_delete_index)
final_ctrl_mri_spect_df = final_ctrl_mri_spect_df.reset_index(drop=True)
final_ctrl_mri_spect_df

In [None]:
final_ctrl_mri_spect_df.groupby('Subject').value_counts()

In [None]:
# Sort DataFrame by 'Subject_ID' and 'Date'
df = final_ctrl_mri_spect_df.sort_values(by=['Subject', 'Acq Date'])

# Group DataFrame by 'Subject_ID'
grouped = final_ctrl_mri_spect_df.groupby('Subject')

# Calculate date difference within each group
df['Date_Difference'] = grouped['Acq Date'].diff().dt.days

# Calculate average days difference
average_days_difference = df['Date_Difference'].mean()

print("Average days difference between subjects:", average_days_difference)

## Parkinson analysis

In [10]:
count_t1_types(pd_mri_spect_df)
print("=============================================")
t1_anat_spect_subjects = count_t1_types_with_spect(pd_mri_spect_df)
print("================== cases with T1 and spect =========================")
filtered_t1_spect_df = cases_t1_spect(pd_mri_spect_df, t1_anat_spect_subjects)
print("amount of cases with T1 and spect: ", len(filtered_t1_spect_df))
filtered_t1_spect_df

T2 in corrected EPI:  617
T2 in T1:  544
T1-anatomical:  544
total t1-anatomical subjects:  544
total spect subjects:  2524
total subjects with T1-anatomical and DaTSCAN:  187
total t2 in t1 subjects:  544
total spect subjects:  2524
total subjects with T1-anatomical and DaTSCAN:  187
total t2 in epi subjects:  617
total spect subjects:  2524
total subjects with T2 in corrected EPI and DaTSCAN:  187
amount of cases with T1 and spect:  1114


Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
0,I994024,57037,PD,F,54,BL,MRI,T1-anatomical,Processed,2017-02-22,NiFTI,11/30/2023
1,I846226,57037,PD,F,54,SC,SPECT,Reconstructed DaTSCAN,Processed,2017-02-22,DCM,11/30/2023
2,I993964,55395,PD,F,67,BL,MRI,T1-anatomical,Processed,2017-01-05,NiFTI,11/30/2023
3,I1298216,55395,PD,F,69,V06,SPECT,Reconstructed DaTSCAN,Processed,2019-01-31,DCM,11/30/2023
4,I846221,55395,PD,F,67,SC,SPECT,Reconstructed DaTSCAN,Processed,2016-12-01,DCM,11/30/2023
...,...,...,...,...,...,...,...,...,...,...,...,...
1109,I406748,3102,PD,M,66,V06,MRI,T1-anatomical,Processed,2013-02-06,NiFTI,11/30/2023
1110,I418336,3102,PD,M,65,V04,SPECT,Reconstructed DaTSCAN,Processed,2012-05-23,DCM,11/30/2023
1111,I333354,3102,PD,M,64,BL,MRI,T1-anatomical,Processed,2010-11-23,NiFTI,11/30/2023
1112,I504836,3102,PD,M,68,V10,SPECT,Reconstructed DaTSCAN,Processed,2014-11-11,DCM,11/30/2023


In [11]:
filtered = sorted(filtered_t1_spect_df["Subject"].unique())
print("amount of subjects with T1 and spect: ", len(filtered))

amount of subjects with T1 and spect:  187


**Identitying more than one case per modality**

In [12]:
many_t1_cases, many_spect_cases = cases_with_more_than_one_t1_spect(filtered_t1_spect_df)

Subjects with more than one 't1-anatomical' values: [3102, 3105, 3107, 3108, 3111, 3113, 3116, 3118, 3119, 3120, 3123, 3124, 3125, 3126, 3128, 3130, 3131, 3132, 3134, 3150, 3154, 3166, 3168, 3173, 3174, 3175, 3176, 3178, 3179, 3181, 3182, 3184, 3185, 3190, 3305, 3307, 3308, 3309, 3314, 3321, 3323, 3325, 3327, 3328, 3352, 3354, 3359, 3360, 3364, 3365, 3366, 3367, 3371, 3372, 3373, 3374, 3375, 3377, 3378, 3380, 3383, 3385, 3386, 3387, 3392, 3552, 3556, 3557, 3558, 3559, 3564, 3567, 3574, 3575, 3577, 3584, 3585, 3586, 3587, 3588, 3591, 3592, 3593, 3752, 3757, 3758, 3760, 3762, 3763, 3770, 3771, 3775, 3776, 3777, 3778, 3780, 3781, 3787, 3788, 3789, 3800, 3802, 3808, 3814, 3815, 3818, 3819, 3822, 3823, 3824, 3825, 3826, 3828, 3829, 3830, 3831, 3832, 3834, 3835, 3838, 3863, 3866, 3868, 4001, 4005, 4011, 4012, 4013, 4019, 4020, 4021, 4022, 4024, 4025, 4026, 4027, 4029, 4030, 4034, 4035, 4037, 4038, 4136, 40366, 41486, 41488, 41664, 50901]
amount of subjects with more than one t1-anatomical:  

**Due to there are many subjects with more than one spect record** it is need to stablish one of them as reference date for further T1-anatomical selection

In [13]:
filtered_single_spect_records_df = filter_single_spect_records(filtered_t1_spect_df)
filtered_single_spect = sorted(filtered_single_spect_records_df["Subject"].unique())
print("amount of single spect records: ", len(filtered_single_spect_records_df))

amount of single spect records:  161


**Getting subjects with single spect record originally**

In [14]:
original_single_spect = list(set(filtered) - set(filtered_single_spect))
print(sorted(original_single_spect))
print("amount of subjects with single T1 and spect (originally): ", len(original_single_spect))

[3129, 3167, 3311, 3314, 3322, 3332, 3376, 3764, 3800, 3814, 3827, 3833, 3837, 3867, 4006, 4069, 4136, 40800, 40806, 40882, 41289, 41488, 41664, 50485, 53060, 57037]
amount of subjects with single T1 and spect (originally):  26


In [15]:
# getting the index of the spect to be saved (over subjects with just one spect)
original_single_spect_df = pd.DataFrame()

for subject in original_single_spect:
    subject_df = filtered_t1_spect_df[filtered_t1_spect_df["Subject"]==subject]    
    dat_scan_df = subject_df[subject_df["Description"]=="Reconstructed DaTSCAN"]
    original_single_spect_df = pd.concat([original_single_spect_df, dat_scan_df])
    

original_single_spect_index = sorted(original_single_spect_df.index)  
print("original_single_spect_index: ", original_single_spect_index)

original_single_spect_index:  [1, 6, 22, 32, 35, 43, 51, 60, 61, 64, 68, 203, 231, 249, 265, 305, 368, 389, 467, 707, 803, 830, 836, 839, 953, 1002]


In [16]:
original_single_spect_df.iloc[22]

Image Data ID                  I389261
Subject                           3827
Group                               PD
Sex                                  F
Age                                 75
Visit                               SC
Modality                         SPECT
Description      Reconstructed DaTSCAN
Type                         Processed
Acq Date           2012-08-07 00:00:00
Format                             DCM
Downloaded                  11/30/2023
Name: 305, dtype: object

In [17]:
filtered_t1_spect_df[filtered_t1_spect_df["Subject"]==3827]

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
305,I389261,3827,PD,F,75,SC,SPECT,Reconstructed DaTSCAN,Processed,2012-08-07,DCM,11/30/2023
306,I371044,3827,PD,F,75,BL,MRI,T1-anatomical,Processed,2012-08-07,NiFTI,11/30/2023


**Now that we have fixed the DaTSCAN as reference** now we can get the single T1 records closer to DaTSCAN date (including original single spect and filtered versions)

In [18]:
#merging original spect records with the filtered version
final_spect_df = pd.concat([filtered_single_spect_records_df, original_single_spect_df])
final_spect_df.groupby('Subject').value_counts()   

Subject  Image Data ID  Group  Sex  Age  Visit  Modality  Description            Type       Acq Date    Format  Downloaded
3102     I504836        PD     M    68   V10    SPECT     Reconstructed DaTSCAN  Processed  2014-11-11  DCM     11/30/2023    1
3105     I504837        PD     M    73   V10    SPECT     Reconstructed DaTSCAN  Processed  2015-03-31  DCM     11/30/2023    1
3107     I770243        PD     M    75   U02    SPECT     Reconstructed DaTSCAN  Processed  2016-04-05  DCM     11/30/2023    1
3108     I688484        PD     F    54   V10    SPECT     Reconstructed DaTSCAN  Processed  2015-05-07  DCM     11/30/2023    1
3111     I419852        PD     M    67   V06    SPECT     Reconstructed DaTSCAN  Processed  2013-05-21  DCM     11/30/2023    1
                                                                                                                             ..
51632    I1144611       PD     F    65   V06    SPECT     Reconstructed DaTSCAN  Processed  2017-11-29  DCM  

In [19]:
def single_t1_records_wrt_spect(filtered_t1_spect_df, single_spect_records_df):
    # Identify subjects with more than two 't1-anatomical' values
    t1_counts = filtered_t1_spect_df[filtered_t1_spect_df['Description'] == 'T1-anatomical'].groupby('Subject').size()
    subjects_with_more_than_one_t1 = t1_counts[t1_counts > 1].index.tolist()

    # Filter and keep only the 't1-anatomical' records closer to the DaTSCAN date
    result_df = pd.DataFrame()

    for subject in subjects_with_more_than_one_t1:
        subject_df = filtered_t1_spect_df[filtered_t1_spect_df['Subject'] == subject]
        
        t1_anatomical_df = subject_df[subject_df['Description'] == 'T1-anatomical']
        if subject in single_spect_records_df['Subject'].unique():
            dat_scan_df = single_spect_records_df[single_spect_records_df['Subject'] == subject]
        else:
            dat_scan_df = subject_df[subject_df['Description'] == 'Reconstructed DaTSCAN']
        
        if not t1_anatomical_df.empty and not dat_scan_df.empty:
            #getting the closest t1-anatomical to the dat_scan date
            closest_t1_anatomical = t1_anatomical_df.loc[(t1_anatomical_df['Acq Date'] - dat_scan_df['Acq Date'].iloc[0]).abs().idxmin()]
            closest_t1_anatomical_df = closest_t1_anatomical.to_frame().T
            closest_t1_anatomical_df.columns = filtered_t1_spect_df.columns
            result_df = pd.concat([result_df, closest_t1_anatomical_df])
    
    return result_df   

In [20]:
filtered_single_t1_df = single_t1_records_wrt_spect(filtered_t1_spect_df, final_spect_df)
filtered_single_t1_df.head()

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
1109,I406748,3102,PD,M,66,V06,MRI,T1-anatomical,Processed,2013-02-06,NiFTI,11/30/2023
1104,I406752,3105,PD,M,71,V06,MRI,T1-anatomical,Processed,2013-04-18,NiFTI,11/30/2023
1097,I693531,3107,PD,M,74,V10,MRI,T1-anatomical,Processed,2015-05-08,NiFTI,11/30/2023
1091,I407111,3108,PD,F,52,V06,MRI,T1-anatomical,Processed,2013-04-24,NiFTI,11/30/2023
1083,I333364,3111,PD,M,66,V04,MRI,T1-anatomical,Processed,2012-05-23,NiFTI,11/30/2023


**Getting subjects with single T1 record originally**

In [21]:
# Identify subjects with more than two 't1-anatomical' values
t1_counts = filtered_t1_spect_df[filtered_t1_spect_df['Description'] == 'T1-anatomical'].groupby('Subject').size()
subjects_with_just_one_t1 = t1_counts[t1_counts == 1].index.tolist()

# Filter and keep only the 't1-anatomical' records closer to the DaTSCAN date
original_single_t1_df = pd.DataFrame()

for subject in subjects_with_just_one_t1:
    subject_df = filtered_t1_spect_df[filtered_t1_spect_df['Subject'] == subject]
    
    t1_anatomical_df = subject_df[subject_df['Description'] == 'T1-anatomical']    
    original_single_t1_df = pd.concat([original_single_t1_df, t1_anatomical_df])
    
original_single_t1_df.head()   

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
1044,I486666,3122,PD,M,63,V04,MRI,T1-anatomical,Processed,2013-05-22,NiFTI,11/30/2023
1013,I405975,3127,PD,F,49,BL,MRI,T1-anatomical,Processed,2012-09-13,NiFTI,11/30/2023
1001,I407257,3129,PD,M,56,BL,MRI,T1-anatomical,Processed,2013-02-05,NiFTI,11/30/2023
954,I330868,3167,PD,F,59,BL,MRI,T1-anatomical,Processed,2011-08-12,NiFTI,11/30/2023
840,I331048,3311,PD,M,75,BL,MRI,T1-anatomical,Processed,2012-01-31,NiFTI,11/30/2023


**Bulding the final PD dataframe with one record to T1 and Spect**

In [22]:
#indexes that we really need: 
# single spect record: original_single_spect_index
# single t1 record: original_single_t1_index
# filtered spect records (cases originally >2 records): filtered_single_spect_records_df
# from above get the t1 single record (cases originally >2 records): filtered_single_t1_df

final_pd_mri_spect_df = pd.concat([original_single_spect_df, filtered_single_spect_records_df,
                         original_single_t1_df, filtered_single_t1_df])

final_pd_mri_spect_df.groupby('Subject').value_counts()

Subject  Image Data ID  Group  Sex  Age  Visit  Modality  Description            Type       Acq Date    Format  Downloaded
3102     I406748        PD     M    66   V06    MRI       T1-anatomical          Processed  2013-02-06  NiFTI   11/30/2023    1
         I504836        PD     M    68   V10    SPECT     Reconstructed DaTSCAN  Processed  2014-11-11  DCM     11/30/2023    1
3105     I504837        PD     M    73   V10    SPECT     Reconstructed DaTSCAN  Processed  2015-03-31  DCM     11/30/2023    1
         I406752        PD     M    71   V06    MRI       T1-anatomical          Processed  2013-04-18  NiFTI   11/30/2023    1
3107     I770243        PD     M    75   U02    SPECT     Reconstructed DaTSCAN  Processed  2016-04-05  DCM     11/30/2023    1
                                                                                                                             ..
53060    I1474417       PD     M    73   V10    SPECT     Reconstructed DaTSCAN  Processed  2021-04-21  DCM  

In [27]:
final_pd_mri_spect_df[final_pd_mri_spect_df["Subject"]==3108]

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
1089,I688484,3108,PD,F,54,V10,SPECT,Reconstructed DaTSCAN,Processed,2015-05-07,DCM,11/30/2023
1091,I407111,3108,PD,F,52,V06,MRI,T1-anatomical,Processed,2013-04-24,NiFTI,11/30/2023


**Knowing the data adquisition date**

In [49]:
subject_ids_list = [3108,
3127,
3178,
3311,
3327,
3366,
3380,
3753,
3764,
3815,
4030,
4034,
4069,
50028,
51731]

In [50]:
# Filter the DataFrame based on the description value of "Reconstructed DaTSCAN"
filtered_df = final_pd_mri_spect_df[final_pd_mri_spect_df['Description'] == 'Reconstructed DaTSCAN']

# Filter the filtered DataFrame based on the subject IDs list
result = filtered_df[filtered_df['Subject'].isin(subject_ids_list)]

# Retrieve the subject_id and acq date from the filtered DataFrame
result = result[['Subject', 'Acq Date']]
# Sort the result based on the subject_id
result_sorted = result.sort_values(by='Subject')
# Save the sorted result as a text file
result_sorted.to_csv('/home/Data/Datasets/Parkinson/radiological/PPMI/spect-mri/temporal.txt', sep='\t', index=False)

In [46]:
result_sorted

Unnamed: 0,Subject,Acq Date
1112,3102,2014-11-11
1103,3105,2015-03-31
1092,3107,2016-04-05
1080,3111,2013-05-21
1075,3113,2013-07-12
...,...,...
15,50983,2017-04-19
11,51632,2017-11-29
6,53060,2021-04-21
3,55395,2019-01-31


**Know the difference time between T1 and SPECT acquisition**

In [24]:
# Sort DataFrame by 'Subject_ID' and 'Date'
df = final_pd_mri_spect_df.sort_values(by=['Subject', 'Acq Date'])

# Group DataFrame by 'Subject_ID'
grouped = final_pd_mri_spect_df.groupby('Subject')

# Calculate date difference within each group
df['Date_Difference'] = grouped['Acq Date'].diff().dt.days

# Calculate average days difference
average_days_difference = df['Date_Difference'].mean()

print("Average days difference between subjects:", average_days_difference)

Average days difference between subjects: -254.71657754010695


# Identifying the PD that were not used in the EMBC approach

In [None]:
pd_embc_subjects = []
filtered_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered"
splits = ["test", "train"]

for split in splits:
    split_path = os.path.join(filtered_path, split)
    spect_pd_path = os.path.join(split_path, "parkinson", "spect")
    cases = os.listdir(spect_pd_path)
    pd_embc_subjects.extend(cases)
    
pd_embc_subjects = np.asarray(pd_embc_subjects)
pd_embc_subjects = pd_embc_subjects.astype(int)
print("total pd embc subjects: ", len(pd_embc_subjects))
total_pd_mri_spect = final_pd_mri_spect_df["Subject"].unique()
print("total pd with mri and spect: ", len(total_pd_mri_spect))

In [None]:
print("subjects to be taken into account:")
pd_to_take = list(set(total_pd_mri_spect) - set(pd_embc_subjects))
print(pd_to_take)
print(len(pd_to_take))

# Data moving

In [None]:
# in summary we have for control and pd subjects: final_ctrl_mri_spect_df and final_pd_mri_spect_df
original_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/original"
save_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/"

ctrl_original = original_path + "/controlMri-Spect/PPMI/"
pd_original = original_path + "/pdMri-Spect/PPMI/"

In [None]:
ctrl_mri_spect_df[ctrl_mri_spect_df["Subject"]==3104]

In [None]:
#for control set from the final_ctrl_mri_spect_df dataframe
ctrl_subjects = sorted(final_ctrl_mri_spect_df["Subject"].unique())

for subject in ctrl_subjects:
    print("subject: ", subject)
    ctrl_subject_df = final_ctrl_mri_spect_df[final_ctrl_mri_spect_df["Subject"]==subject]
    
    #getting the Spect and T1 modalities from the original path
    ctrl_original_t1 = ctrl_original + str(subject) + "/T1-anatomical/"
    t1_files = os.listdir(ctrl_original_t1)
    
    for t1 in t1_files:
        original_date = t1.split("_")
        ctrl_subject_df[]
        
    
    ctrl_original_spect = ctrl_original + str(subject) + "/Reconstructed_DaTSCAN/"    
    
    print("related files: ", os.listdir(ctrl_original_subject))



## For extra PD patients that were not taken into account in the EMBC approach

In [None]:
# in summary we have for control and pd subjects: final_ctrl_mri_spect_df and final_pd_mri_spect_df
original_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/original"
save_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/test2/parkinson"

ctrl_original = original_path + "/controlMri-Spect/PPMI/"
pd_original = original_path + "/pdMri-Spect/PPMI/"

In [None]:
def find_nii_file(path):
    """
    Recursively search for a file with .nii extension in the given path.
    
    Args:
    - path (str): The path to search in.
    
    Returns:
    - str or None: The path of the first .nii file found, or None if no such file is found.
    """
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith('.nii') or file.endswith('.dcm'):
                return os.path.join(root, file)
    return None

In [None]:
import shutil

#for control set from the final_ctrl_mri_spect_df dataframe
pd_subjects = sorted(pd_to_take)

modality = "SPECT"

for subject in pd_subjects:
    print("subject: ", subject)
    subject_df = final_pd_mri_spect_df[final_pd_mri_spect_df["Subject"]==subject]
    
    #getting the Spect and T1 modalities from the original path
    if modality == "MRI":
        reference_date = subject_df[subject_df["Modality"]=="MRI"]["Acq Date"].values[0]
        source = pd_original + str(subject) + "/T1-anatomical/"
        current_save_path = save_path + "/mri/" + str(subject)
    else:
        reference_date = subject_df[subject_df["Modality"]=="SPECT"]["Acq Date"].values[0]
        source = pd_original + str(subject) + "/Reconstructed_DaTSCAN/"
        current_save_path = save_path + "/spect/" + str(subject) + "/"
    
    if not os.path.exists(current_save_path):
        os.makedirs(current_save_path)
    
    reference_date = reference_date.astype('datetime64[D]')  
    print("reference date: ", reference_date) 
    print(type(reference_date))  
    files = os.listdir(source)
    
    for file in files:
        
        original_date = file.split("_")[0]  
        original_date = np.datetime64(original_date)   
       
        if original_date == reference_date:
            print("original date: ", original_date)
            current_source = source + file
            current_file = find_nii_file(current_source)
            print("original file: ", current_file)
            print("moving to: ", current_save_path)
            # Copy file to destination
            shutil.copy2(current_file, current_save_path)
            
        else:
            None

## Moving the nii.gz converted files from the .dcm SPECT files

In [None]:
file1 = "PPMI_3116_NM_Reconstructed_DaTSCAN_Br_20170503135214533_1_S561084_spect.nii"
string = "3116"

if string in file1:
    print("yes")
else:
    print("no")

In [None]:
root_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/test2/parkinson/spect/"
files = os.listdir(root_path)

#for control set from the final_ctrl_mri_spect_df dataframe
pd_subjects = sorted(pd_to_take)

modality = "SPECT"

for subject in pd_subjects:
    print("subject: ", subject)
    save_path = root_path + str(subject) + "/"
    
    for file in files:
        file_path = root_path + file
        if os.path.isdir(file_path):
            None
        else:
            if str(subject) in file:
                current_file_path = root_path + file
                print("current file path: ", current_file_path)
                print("moving to: ", save_path)
                shutil.move(current_file_path, save_path)
            else:
                None           

## Checking the SPECT preprocessing missing cases

In [None]:
base_path = "../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/test2/parkinson/spect"
cases = os.listdir(base_path)
ref_can = 4
ref_can2 = 1
cases_to_resampled = []
cases_to_registered = []
other_cases = []

for case in cases:
    preprocessed_case_path = os.path.join(base_path, case, "preprocessed")
    can = len(os.listdir(preprocessed_case_path))
    if can != ref_can:
        
        if can == 0:
            #if no files were found, the the resampling method needs to be executed
            cases_to_resampled.append(case)
        
        #if only there is a one file maybe the registration process must be done
        elif can == 1:
            cases_to_registered.append(case)
        
        else:
            other_cases.append(case)
        
print("cases that need resampled: ", len(cases_to_resampled))
print("cases that need registration: ", len(cases_to_registered))
print("cases that need others: ", len(other_cases))

In [None]:
sorted(cases_to_resampled)

In [None]:
new_pd_patients = sorted(os.listdir(base_path))
print(new_pd_patients)

In [None]:
# open file
with open('newPdPatients.txt', 'w+') as f:
	
	# write elements of list
	for items in new_pd_patients:
		f.write('%s\n' %items)
	
	print("File written successfully")
# close the file
f.close()