# Setting a parameter by cross-validation  
http://nilearn.github.io/auto_examples/02_decoding/plot_haxby_grid_search.html#sphx-glr-auto-examples-02-decoding-plot-haxby-grid-search-py

The approach we will use here is "nested cross-validation." It consists in doing cross-validation loops to set the model parameters inside the CV loop used to judge the prediction performance: the parameters are set separately on each fold, never using the data used to measure performance.  



In [2]:
import os
import numpy as np
import nilearn
import glob
#import matplotlib
import nibabel as nib
import pandas as pd 
from nilearn.input_data import NiftiMasker 

In [None]:
# load the data 
#image mask
imag_mask=os.path.join('/projects/niblab/nilearn_projects/power_roimask_4bi.nii.gz')
#our behavioral csv file 
stim = os.path.join('/projects','niblab','scripts','nilean_stuff','label_all_sub.csv')
#our dataset concatenated image 
dataset='/projects/niblab/bids_projects/Experiments/ChocoData/derivatives/group_ana/w1_imagine_all.nii.gz'

#load behavioral data into a pandas df
labels = pd.read_csv(stim, sep=",")
y=labels["label"]
session = labels["sub"]

condition_mask = y.isin(['unapp', 'app', 'H2O'])
y = y[condition_mask]

In [None]:
# For decoding, standardizing is often very important
nifti_masker = NiftiMasker(mask_img=imag_mask, sessions=session,
                           standardize=True,
                           memory="nilearn_cache", memory_level=1)
X = nifti_masker.fit_transform(dataset)
# Restrict to non rest data
X = X[condition_mask]
session = session[condition_mask]

In [None]:
from sklearn.svm import SVC
svc = SVC(kernel='rbf', C=10, gamma=1.0)


# Define the dimension reduction to be used.
# Here we use a classical univariate feature selection based on F-test,
# namely Anova. We set the number of features to be selected to 500
from sklearn.feature_selection import SelectKBest, f_classif
feature_selection = SelectKBest(f_classif, k=500)

# We have our classifier (SVC), our feature selection (SelectKBest), and now,
# we can plug them together in a *pipeline* that performs the two operations
# successively:
from sklearn.pipeline import Pipeline
anova_svc = Pipeline([('anova', feature_selection), ('svc', svc)])

In [None]:
anova_svc.fit(X, y)
y_pred = anova_svc.predict(X)

from sklearn.model_selection import cross_val_score

k_range = [10, 15, 30, 50, 150, 300, 500, 1000, 1500, 3000, 5000]
cv_scores = []
scores_validation = []

for k in k_range:
    feature_selection.k = k
    cv_scores.append(np.mean(
        cross_val_score(anova_svc, X[session < 1], y[session < 1], cv=3)))
    print("CV score: %.4f" % cv_scores[-1])

    anova_svc.fit(X[session < 1], y[session < 1])
    y_pred = anova_svc.predict(X[session == 1])
    scores_validation.append(np.mean(y_pred == y[session == 1]))
    print("score validation: %.4f" % scores_validation[-1])