In [None]:
# %load /Users/Jonathan/.je.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import glob
from os.path import join, isfile
# -------------------------------------------------------------------------------------
from nilearn import plotting, image;
from nilearn.masking import compute_epi_mask
import nibabel as nib
from nilearn.input_data import NiftiMasker
import time
# -------------------------------------------------------------------------------------
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.cross_validation import LeaveOneLabelOut, cross_val_score
from sklearn.feature_selection import SelectPercentile, f_classif
# -------------------------------------------------------------------------------------
plt.rcParams['figure.figsize'] = 12,6

In [None]:
# SUBJECTS = ['S08']
SUBJECTS = ['S01','S02','S03','S04','S05','S06','S07','S08','S09','S10','S11','S12']
RUNS = range(1,7)
PATH = '/imaging/ourlab/jerez/memcon/'
PERCENTILE = 5
CV = 5

In [None]:
from helper_functions import apply_niftiMasker, load_scanning_session_matrix, get_onsets
from classification import compute_classification

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# compute_mask(SUBJECTS)
# masker = compute_mask(SUBJECTS)

In [None]:
# load mask
mask = nib.load('12_subject_mask.nii')
plotting.plot_img(mask)
masker = NiftiMasker(mask_img = mask, standardize=True, memory="nilearn_cache", memory_level=1)
type(masker)

In [None]:
type(masker)

##-------##-------##-------##-------##-------##-------##-------##-------##-------##-------##-------##-------##-------

## Leave one subject out cross validation

In [None]:
from sklearn.metrics import accuracy_score
df = pd.DataFrame(columns=['Subject','classification_accuracy','classification_accuracy_reshuffled'])
SUBJECTS = ['S01','S02','S03','S04','S05','S06','S07','S08','S09','S10','S11','S12']

svc = SVC(kernel='linear')
feature_selection = SelectPercentile(f_classif, percentile=PERCENTILE)
anova_svc = Pipeline([('anova', feature_selection), ('svc', svc)])

##### Construct one large matrix [96 movies * 12 subjects,brain_voxels]

In [None]:
df_multi_subject = pd.DataFrame() # a multi-subject dataframe of [subject_number,onsets,brain_voxels]
for i,s in enumerate(SUBJECTS):
    if s == 'S11':
        RUNS = range(1,6)
    else:
        RUNS = range(1,7)
    print s,
    if not isfile(join(PATH,'data/Z_files',s + '_Z_GM.npy')): # Z file from Group Mask
        apply_niftiMasker_GM(s,RUNS,masker)
    single_subject_Z = load_scanning_session_matrix(s,GM = True)
    single_subject_onsets = get_onsets(s,RUNS)
    
    df_single_subject = pd.concat([pd.Series([s] * single_subject_onsets.shape[0]),single_subject_onsets['category'],pd.DataFrame(single_subject_Z)],axis = 1)
    df_multi_subject = pd.concat([df_multi_subject,df_single_subject],axis = 0)

In [None]:
single_subject_Z.shape

In [None]:
df_multi_subject.shape

In [None]:
df_multi_subject.columns = ['subject'] +df_multi_subject.columns[1:].tolist()

In [None]:
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.externals import joblib
from sklearn.utils import shuffle

classification_results = pd.DataFrame(columns=['Subject','classification_accuracy','classification_accuracy_reshuffled'])
X = df_multi_subject
y = df_multi_subject['category']
logo = LeaveOneGroupOut()
for i, (train_index, test_index) in enumerate(logo.split(X,y,df_multi_subject['subject'])):
    #print train_index, test_index
    X_train = df_multi_subject.iloc[train_index,2:]
    y_train = df_multi_subject.iloc[train_index,1]
    y_train_reshuffled = shuffle(df_multi_subject.iloc[train_index,1])
    print X_train.shape
    print y_train.shape
    X_test = df_multi_subject.iloc[test_index,2:]
    y_test = df_multi_subject.iloc[test_index,1]
    
    anova_svc.fit(X_train,y_train)
    predictions = anova_svc.predict(X_test)
    classification_accuracy = accuracy_score(y_test,predictions)
    
    anova_svc_reshuffle = Pipeline([('anova', feature_selection), ('svc', svc)])
    anova_svc_reshuffle.fit(X_train,y_train_reshuffled)
    predictions_reshuffled = anova_svc_reshuffle.predict(X_test)
    classification_accuracy_reshuffled = accuracy_score(y_test,predictions_reshuffled)
    
    print classification_accuracy
    classification_results.loc[i] = [SUBJECTS[i],classification_accuracy,classification_accuracy_reshuffled]
    #joblib.dump(anova_svc,'models/' +classification_results['Subject'][i] +'_featSelec.pkl')
classification_results

In [None]:
classification_results

In [None]:
from helper_functions import plot_classification_accuracies
plot_classification_accuracies(classification_results)