In [1]:
import pandas as pd

In [27]:
df = pd.read_csv('../ADNI1_Complete_3Yr_1.5T_4_09_2024.csv')

In [28]:
len(df)

2182

In [29]:
df = df.query('Description in ["MPR-R; GradWarp; B1 Correction; N3; Scaled", "MPR; GradWarp; B1 Correction; N3; Scaled"]').query('Visit in ["sc", "m06", "m12", "m18", "m24", "m36"]').reset_index(drop=True)

In [30]:
len(df)

1534

In [31]:
df

Unnamed: 0,Image Data ID,Subject,Group,Sex,Age,Visit,Modality,Description,Type,Acq Date,Format,Downloaded
0,I122954,027_S_0644,MCI,M,77,m24,MRI,MPR; GradWarp; B1 Correction; N3; Scaled,Processed,6/25/2008,NiFTI,1/05/2024
1,I36979,036_S_0813,CN,M,73,sc,MRI,MPR; GradWarp; B1 Correction; N3; Scaled,Processed,8/25/2006,NiFTI,3/06/2024
2,I74591,005_S_0814,AD,F,71,sc,MRI,MPR; GradWarp; B1 Correction; N3; Scaled,Processed,8/30/2006,NiFTI,3/06/2024
3,I200471,011_S_1282,MCI,F,80,m36,MRI,MPR; GradWarp; B1 Correction; N3; Scaled,Processed,2/24/2010,NiFTI,1/05/2024
4,I67668,005_S_0602,CN,M,72,m06,MRI,MPR; GradWarp; B1 Correction; N3; Scaled,Processed,1/17/2007,NiFTI,3/06/2024
...,...,...,...,...,...,...,...,...,...,...,...,...
1529,I73037,006_S_1130,MCI,F,72,m06,MRI,MPR-R; GradWarp; B1 Correction; N3; Scaled,Processed,7/18/2007,NiFTI,3/06/2024
1530,I47036,027_S_0116,MCI,M,80,m12,MRI,MPR-R; GradWarp; B1 Correction; N3; Scaled,Processed,2/07/2007,NiFTI,3/06/2024
1531,I73642,041_S_1418,MCI,M,82,sc,MRI,MPR-R; GradWarp; B1 Correction; N3; Scaled,Processed,7/27/2007,NiFTI,3/06/2024
1532,I104489,005_S_0221,AD,M,70,m24,MRI,MPR-R; GradWarp; B1 Correction; N3; Scaled,Processed,4/24/2008,NiFTI,


# Datasets

In [7]:
from sklearn.model_selection import train_test_split

## CNN - pretrain

In [57]:
Subjects, Groups = df[['Subject', 'Group']].drop_duplicates(['Subject']).T.to_numpy()

In [69]:
train_subjects, test_subjects = train_test_split(Subjects, test_size=0.1, random_state=22, stratify=Groups)

In [83]:
train_df = df[df['Subject'].isin(train_subjects)][['Image Data ID','Group']]

In [84]:
train_images, train_labels = train_df.T.to_numpy()

In [85]:
test_df = df[df['Subject'].isin(test_subjects)][['Image Data ID','Group']]

In [86]:
test_images, test_labels = test_df.T.to_numpy()

In [89]:
len(test_images) + len(train_images)

1534

## CNN - finetune

In [197]:
df['Visit'].value_counts()

sc     302
m06    301
m12    301
m24    299
m36    215
m18    116
Name: Visit, dtype: int64

In [198]:
time='sc'

time_df = df[df['Visit'] == time]

In [199]:
Subjects, Groups = time_df[['Subject', 'Group']].drop_duplicates(['Subject']).T.to_numpy()

train_subjects, test_subjects = train_test_split(Subjects, test_size=0.1, random_state=22, stratify=Groups)

train_df = time_df[time_df['Subject'].isin(train_subjects)][['Image Data ID','Group']]
train_images, train_labels = train_df.T.to_numpy()

test_df = time_df[time_df['Subject'].isin(test_subjects)][['Image Data ID','Group']]
test_images, test_labels = test_df.T.to_numpy()

len(test_images) + len(train_images)

302

## Whole Network

In [201]:
Subjects, Groups = df[['Subject', 'Group']].drop_duplicates(['Subject']).T.to_numpy()

In [202]:
train_subjects, test_subjects = train_test_split(Subjects, test_size=0.1, random_state=22, stratify=Groups)

In [203]:
def reorder_images(group):
    order_mapping = {'sc': 0, 'm06': 1, 'm12': 2, 'm18': 3, 'm24': 4, 'm36': 5}
    result = [None] * 6
    for index, row in group.iterrows():
        result[order_mapping[row['Visit']]] = row['Image Data ID']
    return result

In [212]:
train_imagelist = df[df['Subject'].isin(train_subjects)].groupby('Subject')[['Visit', 'Image Data ID']].apply(reorder_images).rename('ImageIDs')

In [213]:
train_df = df[['Subject', 'Group']].drop_duplicates(['Subject']).merge(train_imagelist, left_on='Subject',right_index=True)[['ImageIDs', 'Group']]

In [214]:
train_imagelist, train_labels = train_df.T.to_numpy()

In [216]:
test_imagelist = df[df['Subject'].isin(test_subjects)].groupby('Subject')[['Visit', 'Image Data ID']].apply(reorder_images).rename('ImageIDs')
test_df = df[['Subject', 'Group']].drop_duplicates(['Subject']).merge(test_imagelist, left_on='Subject',right_index=True)[['ImageIDs', 'Group']]
test_imagelist, test_labels = test_df.T.to_numpy()

In [217]:
len(train_imagelist) + len(test_imagelist)

303