# Cognitive Age Prediction with Structural and Functional MRI

## Structural MRI

### Read Data

In [80]:
import pandas as pd

# read information about subjects
subjects_data = pd.read_csv('/home/okozyn/Projects/inria/camcan_analysis/data/participant_data.csv', index_col=0)

subjects_data.head()

Unnamed: 0_level_0,age,hand,gender_text,gender_code,tiv_cubicmm,cbuid700,cbuid280_sess1,cbuid280_sess2,cbuid280_sess3,cbuid280_sess4,profilecode,profilecode_numeric
Observations,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
CC110033,24,90.0,MALE,1,1413746.0,CBU120746,,,,,,
CC110037,18,89.0,MALE,1,1386581.0,CBU110544,,,,,,
CC110045,24,100.0,FEMALE,2,1385355.0,CBU111116,,,,,,
CC110056,22,100.0,FEMALE,2,1286849.0,CBU110547,,CBU140153,CBU140224,,D,1.0
CC110062,20,58.0,MALE,1,1582719.0,CBU110468,,,,,,


In [81]:
# read features

# there is a repetition in features, because I didn't handle the exceptions correctly
# save log for the next time

area_data = pd.read_pickle('/home/okozyn/Projects/inria/camcan_analysis/data/structural/area_data.gzip', 
                           compression='gzip')
thickness_data = pd.read_pickle('/home/okozyn/Projects/inria/camcan_analysis/data/structural/thickness_data.gzip',
                               compression='gzip')
volume_data = pd.read_pickle('/home/okozyn/Projects/inria/camcan_analysis/data/structural/volume_data.gzip',
                            compression='gzip')

volume_data.head()

Unnamed: 0_level_0,Left-Lateral-Ventricle,Left-Inf-Lat-Vent,Left-Cerebellum-White-Matter,Left-Cerebellum-Cortex,Left-Thalamus-Proper,Left-Caudate,Left-Putamen,Left-Pallidum,3rd-Ventricle,4th-Ventricle,...,SupraTentorialVol,SupraTentorialVolNotVent,SupraTentorialVolNotVentVox,MaskVol,BrainSegVol-to-eTIV,MaskVol-to-eTIV,lhSurfaceHoles,rhSurfaceHoles,SurfaceHoles,EstimatedTotalIntraCranialVol
Measure:volume,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CC310256,16443.3,215.0,14808.5,56803.4,7378.4,3956.5,5018.7,2464.6,1684.8,2822.3,...,1194657.0,1160384.0,1158424.0,1847752.0,0.718849,0.988205,16.0,7.0,23.0,1869806.0
CC110033,6018.2,145.2,13938.6,56055.7,8523.9,3604.6,4557.6,2008.8,971.2,1426.7,...,1030259.0,1014403.0,1011785.0,1596489.0,0.757157,1.029559,41.0,23.0,64.0,1550654.0
CC110037,5150.4,226.8,14923.5,59652.3,7732.1,3223.5,4647.4,2154.4,773.3,1478.8,...,1032711.0,1022294.0,1019858.0,1618468.0,0.782415,1.072228,19.0,30.0,49.0,1509444.0
CC110098,6299.5,269.5,14468.0,59900.4,7368.8,3976.3,5239.7,1884.4,880.3,1790.9,...,1091133.0,1079604.0,1076145.0,1732368.0,0.740368,1.031667,12.0,28.0,40.0,1679193.0
CC110126,2711.6,352.6,16216.5,57535.4,7946.3,3431.7,4875.8,1994.7,726.2,1233.2,...,1094727.0,1088829.0,1086463.0,1628306.0,0.769026,1.009764,21.0,23.0,44.0,1612561.0


In [82]:
# check whether we have any NaN/None values.
thickness_data[thickness_data.isnull().any(axis=1)]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5114,5115,5116,5117,5118,5119,5120,5121,5122,5123


### Training and Prediction

In [83]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV

#### Area

In [84]:
# prepare data, subjects age
area_data_rnd = area_data.sample(frac=1)
y = subjects_data.loc[area_data_rnd.index.values].age.values
X = area_data_rnd.values

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

alphas = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 10]

clf = RidgeCV(alphas=alphas, fit_intercept=True, normalize=False, cv=5).fit(X_train, y_train)
print('Score: ', clf.score(X_train, y_train))
print('MSE: ', np.sum((clf.predict(X_test) - y_test)**2) / len(y_test))

# Metrics to add
# negative mean abs error, it should show distribution across different features
# variance
# to do cross-validation in a different way
# Monte Carlo shuffle split is a scheme for cross-validation
# learning curve function
# number of steps for cross validation should be around 10
# cross_val_predict - cross validated predictions, cross_val_score - cross validated scores
# scatter plots: predicted vs true age, mse scores (sklearn metrics)

Score:  0.9996164656450213
MSE:  134.36721599445966


#### Thickness

In [85]:
# prepare data, subjects age
thickness_data_rnd = thickness_data.sample(frac=1)
y = subjects_data.loc[thickness_data_rnd.index.values].age.values
X = thickness_data_rnd.values

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

alphas = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 10]

clf = RidgeCV(alphas=alphas, fit_intercept=True, normalize=False, cv=5).fit(X_train, y_train)
print('Score: ', clf.score(X_train, y_train))
print('MSE: ', np.sum((clf.predict(X_test) - y_test)**2) / len(y_test))

Score:  0.9998560993618588
MSE:  71.679412759749


#### Volume

In [86]:
# prepare data, subjects age
# shuffle data
volume_data_rnd = volume_data.sample(frac=1)
y = subjects_data.loc[volume_data_rnd.index.values].age.values
X = volume_data_rnd.values

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

alphas = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 10]

clf = RidgeCV(alphas=alphas, fit_intercept=True, normalize=False, cv=5).fit(X_train, y_train)
print('Score: ', clf.score(X, y))
print('MSE: ', np.sum((clf.predict(X_test) - y_test)**2) / len(y_test))

Score:  0.8009914080558235
MSE:  92.32038053994306


## Functional MRI