In [132]:
# Import Pandas Library for Data Preprocessing
import pandas as pd

In [133]:
# Read in the data of patients part of the OASIS dataset
patients = pd.read_csv('./oasis_cross-sectional.csv')

In [134]:
# Get the count, minimum, and maximim of each column for all the patients
patients.agg({"count", "min", "max"})

Unnamed: 0,ID,M/F,Hand,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF,Delay
min,OAS1_0001_MR1,F,R,18,1.0,1.0,14.0,0.0,1123,0.644,0.881,1.0
max,OAS1_0457_MR1,M,R,96,5.0,5.0,30.0,2.0,1992,0.893,1.563,89.0
count,436,436,436,436,235.0,216.0,235.0,235.0,436,436.0,436.0,20.0


In [135]:
# remove Gender, Delay, and Hand as they seem insignificant 
patients.drop(["M/F", "Hand", "Delay"], axis = 1, inplace = True)

In [136]:
# filter the patients diagnosed with Alzhiemer's Disease (AD)
ad = patients[patients['CDR'] == 1]

# Get the count, minimum, and maximim of each column for AD patients
ad.agg({"count", "min", "max"})

Unnamed: 0,ID,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF
min,OAS1_0028_MR1,65,1.0,1.0,15.0,1.0,1274,0.655,1.013
max,OAS1_0452_MR1,96,5.0,5.0,29.0,1.0,1732,0.763,1.377
count,28,28,28.0,24.0,28.0,28.0,28,28.0,28.0


In [137]:
# filter the patients diagnosed with Typical Development (TD)
td = patients[patients['CDR'] == 0]

# Get the count, minimum, and maximim of each column for TD patients
td.agg({"count", "min", "max"})

Unnamed: 0,ID,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF
min,OAS1_0001_MR1,33,1.0,1.0,25.0,0.0,1123,0.645,0.965
max,OAS1_0457_MR1,94,5.0,5.0,30.0,0.0,1818,0.847,1.563
count,135,135,135.0,133.0,135.0,135.0,135,135.0,135.0


In [138]:
# filter TD patients to above the age of 65 which is the min for AD
td = td[td['Age'] >= 65]

# filter TD patients to above the eTIV of 1480 which is the min for AD
td = td[td['eTIV'] >= 1480]

# filter TD patients to above the ASF of 1 which is the min for AD
td = td[td['ASF'] >= 1]

# Get the count, minimum, and maximim of each column for filtered TD patients
td.agg({"count", "min", "max"})

Unnamed: 0,ID,Age,Educ,SES,MMSE,CDR,eTIV,nWBV,ASF
min,OAS1_0010_MR1,67,1.0,1.0,26.0,0.0,1483,0.676,1.003
max,OAS1_0428_MR1,91,5.0,4.0,30.0,0.0,1750,0.805,1.183
count,28,28,28.0,27.0,28.0,28.0,28,28.0,28.0


The Count of patients with AD and TD are now equal, 28

In [140]:
# Combine the AD and TD patients into one dataframe
filtered_patients = pd.concat([ad, td])

# Get the ids of the patients that we will be using for network analysis
filtered_patients_ids = filtered_patients['ID']

# Save the list of patient ids in a csv
filtered_patients_ids.to_csv('patient_ids.csv', index=False, columns=['ID'])