[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/sdiciotti/Age-Prediction-Demo/blob/master/Age_prediction.ipynb)

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, cross_validate
from sklearn.svm import SVR, SVC
from sklearn.metrics import mean_absolute_error, accuracy_score

In [3]:
df = pd.read_csv('NKI2_data.csv')

In [13]:
df
print("Dataframe shape before NaN removal:", np.shape(df)[0])

Dataframe shape before NaN removal: 73


In [14]:
df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)
print("Dataframe shape after NaN removal:", np.shape(df)[0])

Dataframe shape after NaN removal: 72


In [15]:
### REGRESSION ###
print('***Regression task')

SEED = 42
outer_n_folds = 5
inner_n_folds = 5
C = [0.1, 1, 10]

X = df.iloc[:,3::]
y = df['Age']

print('The whole dataset contains ' + str(np.shape(df)[0]) + ' subjects')
print('The age prediction will be performed using ' + str(np.shape(X)[1]) + ' MRI-derived features')
print() 

***Regression task
The whole dataset contains 72 subjects
The age prediction will be performed using 33 MRI-derived features



In [16]:
# NestedCV implemented in scikit-learn
outer_cv = KFold(n_splits=outer_n_folds, shuffle=True, random_state=SEED)
inner_cv = KFold(n_splits=inner_n_folds, shuffle=True, random_state=SEED)

clf = SVR(kernel='rbf', degree=3, gamma='scale', coef0=0.0, tol=0.001, C=0.1, epsilon=0.1, shrinking=True, cache_size=200, verbose=0, max_iter=- 1)
p_grid = [{'C': C}]     

X = np.asarray(X)
y = np.asarray(y)

clf_gs = GridSearchCV(clf, param_grid=p_grid, cv=inner_cv, refit='neg_mean_absolute_error', scoring='neg_mean_absolute_error', n_jobs=1, verbose = 4)
nested_score = cross_validate(clf_gs, X=X, y=y, cv=outer_cv, return_train_score=True, return_estimator=True, scoring = 'neg_mean_absolute_error', n_jobs=1)

#print(np.abs(nested_score['train_score']))
#print(np.abs(nested_score['test_score']))
print("Average MAE train:", np.abs(np.mean(nested_score['train_score'])))
print("Average MAE test:", np.abs(np.mean(nested_score['test_score'])))

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END ............................C=0.1;, score=-2.315 total time=   0.0s
[CV 2/5] END ............................C=0.1;, score=-1.942 total time=   0.0s
[CV 3/5] END ............................C=0.1;, score=-2.458 total time=   0.0s
[CV 4/5] END ............................C=0.1;, score=-3.115 total time=   0.0s
[CV 5/5] END ............................C=0.1;, score=-3.355 total time=   0.0s
[CV 1/5] END ..............................C=1;, score=-1.614 total time=   0.0s
[CV 2/5] END ..............................C=1;, score=-1.297 total time=   0.0s
[CV 3/5] END ..............................C=1;, score=-1.571 total time=   0.0s
[CV 4/5] END ..............................C=1;, score=-2.337 total time=   0.0s
[CV 5/5] END ..............................C=1;, score=-2.101 total time=   0.0s
[CV 1/5] END .............................C=10;, score=-1.444 total time=   0.0s
[CV 2/5] END .............................C=10;, 