 # MVPA
 based on https://peerherholz.github.io/workshop_weizmann/advanced/machine_learning_nilearn.html

In [8]:
import numpy as np
import nibabel as nb
import os
from nilearn.input_data import NiftiMasker

In [9]:
# define paths
root_dir = '/Volumes/T7/BIDS-BRAINPLAYBACK-TASK2'
fmriprep_dir = os.path.join(root_dir, 'derivatives', 'fmriprep23')
dataset_dir  = os.path.join(root_dir, 'derivatives', 'mvpa_01_musicnoise')

# brain masks
#mask_brain_file = os.path.join(root_dir, 'derivatives', 'mni_icbm152_t1_tal_nlin_asym_09c.nii')
mask_gm_file    = os.path.join(root_dir, 'derivatives', 'mni_icbm152_gm_tal_nlin_asym_09c_res-2_dilated.nii')

In [10]:
# list datasets and concatenate

# find all *_dataset.nii.gz files in dataset_dir
dataset_files = [os.path.join(dataset_dir, f) for f in os.listdir(dataset_dir) if f.endswith('_musicnoise_confounds_dataset.nii.gz') & f.startswith('sub-')]
dataset_files.sort()

# find all *_trial_types.txt files in dataset_dir
label_files = [os.path.join(dataset_dir, f) for f in os.listdir(dataset_dir) if f.endswith('_musicnoise_confounds_trial_types.txt') & f.startswith('sub-')]
label_files.sort()

In [11]:
# concatenate all datasets
D = nb.concat_images(dataset_files, axis=3)

In [12]:
# concatenate all labels into a single string array
labels = np.concatenate([np.loadtxt(l, dtype=str) for l in label_files])

# trim each label to remove the 2 digit number in the end
labels = np.array([l[:-2] for l in labels])

In [14]:
# Load mask
masker = NiftiMasker(mask_img=mask_gm_file, standardize=False, detrend=False)
samples = masker.fit_transform(D)

In [15]:
# generate an array of chunk labels
# 13 subjects, 10 noise and 9 x 2 music for each of the 4 runs
chunks = np.repeat(np.arange(1,14), 10*4 + 9*2*4)

In [16]:
# Let's specify the classifier
from sklearn.svm import LinearSVC
clf = LinearSVC(multi_class="ovr", max_iter=1000, class_weight='balanced')


In [17]:
# Perform the cross validation (takes time to compute)
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
cv_scores = cross_val_score(estimator=clf,
                            X=samples,
                            y=labels,
                            groups=chunks,
                            cv=LeaveOneGroupOut(),
                            n_jobs=6,
                            verbose=1)


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  13 out of  13 | elapsed:   12.4s finished


In [18]:
print('Average accuracy = %.02f percent\n' % (cv_scores.mean() * 100))
print('Accuracy per fold:', cv_scores, sep='\n')


Average accuracy = 86.06 percent

Accuracy per fold:
[0.88392857 0.90178571 0.80357143 0.92857143 0.875      0.76785714
 0.91964286 0.85714286 0.8125     0.89285714 0.83035714 0.83035714
 0.88392857]


In [19]:
# import pandas as pd
# results1 = pd.DataFrame({'accuracy': cv_scores})
# results1.to_csv(os.path.join(dataset_dir, 'mvpa-lsa-musicnoise-confounds-gm-results-1.csv'), index=False)
# print('saved accuracy results to mvpa-musicnoise-confounds-gm-results-1.csv')

# # %%
# # Import the permuation function
# from sklearn.model_selection import permutation_test_score

# # %%
# # Run the permuation cross-validation
# null_cv_scores = permutation_test_score(estimator=clf,
#                                         X=samples,
#                                         y=labels,
#                                         groups=chunks,
#                                         cv=LeaveOneGroupOut(),
#                                         n_permutations=1000,
#                                         n_jobs=30,
#                                         verbose=1)

# # %%
# print('Prediction accuracy: %.02f' % (null_cv_scores[0] * 100),
#       'p-value: %.04f' % (null_cv_scores[2]),
#       sep='\n')

# # %%
# results2 = pd.DataFrame({'accuracy': null_cv_scores})
# results2.to_csv(os.path.join(dataset_dir, 'mvpa-lsa-results-2.csv'), index=False)
# print('saved accuracy results to mvpa-lsa-results-2.csv')
print('python script finished running.')

python script finished running.
