In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd # read csv and manip tables 
import scipy.io #read .mat files

from sklearn.model_selection import KFold

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC,SVR
from sklearn import preprocessing

In [2]:
# load cognitive variables
demog = pd.read_csv('adni2_weights_vbm_rs_model.csv',index_col=0)
print(demog.columns)

# remove NaN values within the cognitive variable
nan_cog = ~np.isnan(demog['MMSE'].values)
print('nan_cog shape:',nan_cog.shape)
y = demog['MMSE'].values[nan_cog]
print('y shape:',y.shape)
print(y)

scl = preprocessing.StandardScaler()
y = scl.fit_transform(y.reshape(-1, 1))[:,0]

# now i want X to equal all the columns rs and vbm (56th to last)
X = demog.ix[:,55:].values[nan_cog]
print('X shape:',X.shape)
print(X)

Index(['RID', 'gender', 'age', 'diagnosis', 'pt_group', 'civetqc', 'mean_ct',
       'excluded', 'ctrlvsmci', 'ctrlvsad', 'mcivsad', 'mtladni2sites',
       'adni2', 'mean_gm', 'TIV', 'fd', 'adni2site2', 'adni2site6',
       'adni2site10', 'adni2site13', 'adni2site18', 'adni2site19',
       'adni2site31', 'adni2site53', 'adni2site100', 'adni2site129',
       'adni2site130', 'adni2site131', 'adni2site136', 'FDG', 'AV45', 'CDRSB',
       'ADAS11', 'ADAS13', 'MMSE', 'RAVLT_immediate', 'RAVLT_learning',
       'RAVLT_forgetting', 'RAVLT_perc_forgetting', 'FAQ', 'MOCA', 'EcogPtMem',
       'EcogPtLang', 'EcogPtVisspat', 'EcogPtPlan', 'EcogPtOrgan',
       'EcogPtDivatt', 'EcogPtTotal', 'EcogSPMem', 'EcogSPLang',
       'EcogSPVisspat', 'EcogSPPlan', 'EcogSPOrgan', 'EcogSPDivatt',
       'EcogSPTotal', 'rs1_sub1', 'rs1_sub2', 'rs1_sub3', 'rs2_sub1',
       'rs2_sub2', 'rs2_sub3', 'rs3_sub1', 'rs3_sub2', 'rs3_sub3', 'rs4_sub1',
       'rs4_sub2', 'rs4_sub3', 'rs5_sub1', 'rs5_sub2', 'rs5_sub3'

In [3]:
# remove rows with NaN values within X

# find the NaNs
nan_X = np.isnan(X)
print('nan_X:',nan_X.shape)
print(nan_X)

# sum the NaNs across rows/subjects
mask_nan = nan_X.sum(axis=1)
print(mask_nan)

# make mask boolean
mask_nan = mask_nan != 0
print(mask_nan)
print('mask_nan shape:',mask_nan.shape)

nan_X: (167, 24)
[[False False False ..., False False False]
 [False False False ..., False False False]
 [False False False ..., False False False]
 ..., 
 [False False False ..., False False False]
 [False False False ..., False False False]
 [False False False ..., False False False]]
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0 24  0  0  0  0  3  3  0  0  0  3  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3  0  0
  0  0  0  0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False  True False False False F

In [4]:
# apply the nan mask!

y = y[~mask_nan]
print(y)
print('y shape:',y.shape)

X = X[~mask_nan]
print(X)
print('X shape:',X.shape)


[ 0.24169364 -0.11236634  0.24169364  0.9498136  -0.11236634  0.9498136
  0.24169364 -0.11236634  0.24169364  0.24169364  0.9498136   0.24169364
  0.9498136   0.9498136   0.59575362  0.59575362  0.9498136   0.59575362
  0.9498136  -0.11236634  0.9498136   0.9498136   0.59575362  0.59575362
  0.59575362 -0.46642632  0.24169364  0.59575362  0.59575362 -0.8204863
 -0.11236634  0.9498136  -0.46642632 -1.17454628  0.24169364 -1.88266624
  0.24169364 -2.94484617  0.9498136   0.9498136   0.24169364  0.59575362
  0.9498136   0.9498136   0.24169364  0.59575362  0.59575362  0.59575362
  0.9498136  -1.88266624  0.59575362  0.59575362  0.59575362  0.9498136
  0.24169364  0.59575362  0.59575362 -0.46642632  0.24169364 -0.8204863
  0.9498136   0.9498136   0.59575362  0.59575362  0.59575362  0.9498136
 -0.46642632  0.59575362  0.24169364 -0.46642632  0.24169364  0.9498136
  0.59575362  0.9498136   0.24169364  0.9498136   0.9498136   0.9498136
 -0.11236634  0.24169364  0.59575362  0.59575362 -0.112366

In [5]:
# init CV
scores = []
skf = KFold(n_splits=3)
for train_index, test_index in skf.split(X):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # train model
    #clf = SVR(kernel='linear', C=1e3)
    clf = SVR(kernel='rbf', C=1e1, gamma=0.1)
    
    # grid search cv
    clf = GridSearchCV(clf, cv=10,param_grid={"C": [1e0, 1e1, 1e2, 1e3],"gamma": np.logspace(-2, 2, 5)})

    clf.fit(X_train, y_train)
    #sv_ratio = clf.best_estimator_.support_.shape[0] / (1.*len(y_train))
    #print("Support vector ratio: %.3f" % sv_ratio)
    
    # test
    scores.append(clf.score(X_test,y_test))
    print(scl.inverse_transform(clf.predict(X_test)))
    print(scores[-1])
    
print('Average R2: ', np.mean(scores))

[ 28.41796292  27.87929822  26.83114578  27.45883428  27.61063083
  27.14548945  28.31658093  26.88242507  27.24614227  27.71452817
  27.76708686  27.31310759  26.67434945  26.77880625  29.37007586
  29.35856747  27.32972601  27.69432956  28.06388591  28.12358034
  27.79838971  27.90703681  28.3777012   27.89541889  28.60707115
  27.18896661  27.86408426  26.63759095  26.43291499  27.17962021
  27.57280566  27.03521531  26.23423786  26.92145897  27.79510157
  25.84760562  27.73364007  27.31387457  27.91650179  25.92942575
  28.02249754  27.74579901  27.84854147  27.77690051  27.30056907
  28.71113567  28.14826091  28.74407876  28.09377947  26.56539871
  28.08647844  28.088771    27.79460991  28.29380457]
0.0524846987952
[ 28.37047982  29.29283598  29.73586448  26.54533344  27.17115023
  29.62663172  27.53824728  29.63125059  28.75781058  29.18351869
  28.04667338  27.66776438  28.30756968  28.81856669  27.47909888
  27.90402359  27.43787025  28.65749417  30.12726157  28.77705634
  25.6