# 0. Setup

In [3]:
import nilearn
import pandas as pd
import numpy as np
import os
from nilearn.input_data import NiftiLabelsMasker
from nilearn.image import index_img 


from nilearn import plotting
from nilearn import connectome
from nilearn import datasets
from nilearn import image

from nilearn.regions import connected_label_regions

from nilearn.image import concat_imgs, mean_img, load_img
from nilearn.plotting import plot_anat, plot_img, plot_stat_map




In [4]:
# Set the path to the data directory
src_dir = '../data/derivatives/fc_mvpa'

# Set the participant and session IDs
part_id = 'sub-10'
ses_id = 'ses-01'


# 1. Load Mask / atlas / meta

In [5]:
atlas_ho= datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr50-2mm')
atlas_ho_filename = atlas_ho.maps
labels_ho = atlas_ho.labels
number_of_regions_ho = len(labels_ho[1:])
region_labels_ho=connected_label_regions(atlas_ho_filename)


# 2. Load data.

In [6]:
# extract time series from ROIs
from nilearn.input_data import NiftiLabelsMasker

masker = NiftiLabelsMasker(labels_img=atlas_ho_filename, 
                           standardize="zscore_sample", 
                           standardize_confounds="zscore_sample",
                            memory='nilearn_cache')


In [7]:
data_dir = os.path.join(src_dir, part_id, ses_id)


fn = f'{part_id}_{ses_id}_task-02a-MVPA-12sBOLD_correlation-matrix.csv'


# Load feature set without index column.
X = pd.read_csv(os.path.join(data_dir, fn), index_col=0)

# Load csv file with targets.
target_set = pd.read_csv(os.path.join(data_dir, f'{part_id}_{ses_id}_task-02a-MVPA-12sBOLD_targets.csv'))

# Load csv file with groups.
group_set = pd.read_csv(os.path.join(data_dir, f'{part_id}_{ses_id}_task-02a-MVPA-12sBOLD_groups.csv'))

In [8]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2294,2295,2296,2297,2298,2299,2300,2301,2302,2303
0,1.0,0.472867,0.436031,0.514384,0.538561,0.383304,0.361168,0.270609,0.238606,0.434412,...,0.539531,0.514461,0.389919,0.415573,0.367763,0.087698,0.111545,0.141712,0.455021,1.0
1,1.0,0.349611,0.668151,0.626731,0.602441,0.238092,0.125390,0.648085,0.319852,0.558617,...,0.180248,0.229835,-0.118430,0.037520,0.066224,-0.109608,-0.131272,0.225610,0.430073,1.0
2,1.0,0.437966,0.548335,0.568625,0.320594,0.262169,0.543516,0.671902,0.369025,0.427831,...,0.378764,0.555047,-0.065700,0.387637,0.269983,-0.058294,0.168840,0.260122,0.540543,1.0
3,1.0,0.448497,0.583478,0.519315,0.397270,0.154706,0.509454,0.605059,0.023484,0.464463,...,0.450476,0.618124,-0.053374,0.337868,0.250099,-0.192076,-0.011393,0.058183,0.379482,1.0
4,1.0,0.493920,0.478096,0.503723,0.400250,0.286006,0.582199,0.514801,0.135923,0.271833,...,0.174668,0.388716,0.412687,0.179425,0.104147,-0.088435,0.265158,0.298934,0.473302,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,1.0,0.398384,0.519039,0.171393,0.237881,0.234724,0.391118,0.484918,0.102054,0.102346,...,0.379838,0.366767,-0.022971,0.216385,0.267523,-0.128571,0.063801,0.148280,0.475625,1.0
72,1.0,0.619738,0.658710,0.591666,0.267869,0.666599,0.756025,0.673688,0.386197,0.678179,...,0.418409,0.627331,0.023029,0.621386,0.514178,0.439777,0.469907,0.517417,0.593605,1.0
73,1.0,0.396131,0.694257,0.660639,0.212614,0.352581,0.512189,0.349390,-0.281890,-0.195372,...,0.661415,0.534743,0.294292,0.525161,0.416039,-0.259563,0.304586,0.069270,0.391460,1.0
74,1.0,0.613916,0.725165,0.453658,0.278723,0.682547,0.661304,0.316044,0.007664,0.054449,...,0.676797,0.738578,-0.034625,0.486314,0.728473,0.058598,0.128654,-0.001377,0.512728,1.0


In [9]:
# Getting the correlation matrix 
partial_correlation_matrix = X.T


In [10]:
# Number of instances (time points) in the data
n_instances = partial_correlation_matrix.shape[1]

# Create an adjacency matrix with time points as the third dimension
correlation_3d = np.zeros((number_of_regions_ho, number_of_regions_ho, n_instances))

# Indices of the lower triangle of the matrix and excluding the diagonal
tril_idx = np.tril_indices(number_of_regions_ho, k=-1)

# Fill the adjacency matrix 
for idx, col in enumerate(partial_correlation_matrix.columns):

    temp_values = partial_correlation_matrix[col].values
    #Fill the lower triangle of the matrix
    matrix_2d = temp_values.reshape((number_of_regions_ho, number_of_regions_ho))
    correlation_3d[:, :, idx] = matrix_2d

In [11]:
# Number of elements in the lower triangle of the matrix
n_lower = len(tril_idx[0]) 

# Create a 2D matrix with the lower triangle of the correlation matrix
flattened_lower_triangle = np.zeros((n_lower, n_instances))

for idx in range(n_instances):
    # Fill the new matrix with 
    flattened_lower_triangle[:, idx] = correlation_3d[:, :, idx][tril_idx]

flattened_lower_triangle.shape

(1128, 76)

In [27]:
df_lower= pd.DataFrame(flattened_lower_triangle.T, columns=[f'{labels_ho[i]}_{labels_ho[j]}' for i, j in zip(tril_idx[0], tril_idx[1])])

In [28]:
df_lower

Unnamed: 0,Frontal Pole_Background,Insular Cortex_Background,Insular Cortex_Frontal Pole,Superior Frontal Gyrus_Background,Superior Frontal Gyrus_Frontal Pole,Superior Frontal Gyrus_Insular Cortex,Middle Frontal Gyrus_Background,Middle Frontal Gyrus_Frontal Pole,Middle Frontal Gyrus_Insular Cortex,Middle Frontal Gyrus_Superior Frontal Gyrus,...,"Supracalcarine Cortex_Temporal Fusiform Cortex, anterior division","Supracalcarine Cortex_Temporal Fusiform Cortex, posterior division",Supracalcarine Cortex_Temporal Occipital Fusiform Cortex,Supracalcarine Cortex_Occipital Fusiform Gyrus,Supracalcarine Cortex_Frontal Operculum Cortex,Supracalcarine Cortex_Central Opercular Cortex,Supracalcarine Cortex_Parietal Operculum Cortex,Supracalcarine Cortex_Planum Polare,Supracalcarine Cortex_Heschl's Gyrus (includes H1 and H2),Supracalcarine Cortex_Planum Temporale
0,0.472867,0.436031,0.433712,0.514384,0.298930,0.234409,0.538561,0.422222,0.298688,0.355118,...,0.445370,0.539531,0.514461,0.389919,0.415573,0.367763,0.087698,0.111545,0.141712,0.455021
1,0.349611,0.668151,0.329324,0.626731,0.426617,0.686683,0.602441,0.367634,0.581698,0.561251,...,0.179311,0.180248,0.229835,-0.118430,0.037520,0.066224,-0.109608,-0.131272,0.225610,0.430073
2,0.437966,0.548335,0.201776,0.568625,0.174270,0.659180,0.320594,0.115109,0.543405,0.426564,...,0.144941,0.378764,0.555047,-0.065700,0.387637,0.269983,-0.058294,0.168840,0.260122,0.540543
3,0.448497,0.583478,0.348565,0.519315,0.473245,0.519277,0.397270,0.350462,0.279547,0.302973,...,0.376591,0.450476,0.618124,-0.053374,0.337868,0.250099,-0.192076,-0.011393,0.058183,0.379482
4,0.493920,0.478096,0.501712,0.503723,0.508312,0.500085,0.400250,0.373563,0.510933,0.417964,...,-0.120594,0.174668,0.388716,0.412687,0.179425,0.104147,-0.088435,0.265158,0.298934,0.473302
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,0.398384,0.519039,0.375862,0.171393,-0.066403,0.147866,0.237881,0.086013,0.123693,-0.085955,...,0.082488,0.379838,0.366767,-0.022971,0.216385,0.267523,-0.128571,0.063801,0.148280,0.475625
72,0.619738,0.658710,0.581271,0.591666,0.331898,0.618991,0.267869,0.265901,0.196553,-0.032595,...,0.248681,0.418409,0.627331,0.023029,0.621386,0.514178,0.439777,0.469907,0.517417,0.593605
73,0.396131,0.694257,0.376891,0.660639,0.172699,0.662892,0.212614,0.155702,0.226223,0.239705,...,-0.030081,0.661415,0.534743,0.294292,0.525161,0.416039,-0.259563,0.304586,0.069270,0.391460
74,0.613916,0.725165,0.529407,0.453658,0.232961,0.379417,0.278723,0.270847,0.532843,0.134713,...,0.514444,0.676797,0.738578,-0.034625,0.486314,0.728473,0.058598,0.128654,-0.001377,0.512728


In [13]:
df_lower  = pd.DataFrame(flattened_lower_triangle.T)

# 3. Classification using scikit routines

In [14]:
from nilearn.maskers import NiftiMasker
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.model_selection import GridSearchCV

from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.pipeline import Pipeline


logo = LeaveOneGroupOut()

In [15]:
X=df_lower.values

In [16]:
y = list(target_set['target'])

# transform the target_set unique strings to integers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

# 0 agaisnt all others
# y = np.where(y == 1, 0, 1)

print(y)

[1 8 8 0 0 3 3 2 2 7 7 5 5 6 6 9 9 4 4 1 8 8 3 3 9 9 5 5 4 4 6 6 0 0 7 7 2
 2 1 7 7 9 9 6 6 2 2 5 5 4 4 3 3 0 0 8 8 1 5 5 4 4 3 3 9 9 0 0 2 2 7 7 8 8
 6 6]


In [17]:
runs_group = list(group_set['group'])

print(runs_group)

# unique values in runs_group
print(np.unique(runs_group))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
[0 1 2 3]


In [18]:
param_grid = [{'estimator__svc__C': [0.1, 1, 10, 100, 1000]}]

In [19]:
selector = SelectKBest(f_classif)
#using anova as it
svc_ovr = OneVsRestClassifier(
    Pipeline(
        [
            ("anova", selector),
            ("svc", SVC(kernel="linear", C=0.1))
        ]
    )
)

In [20]:
from sklearn.metrics import roc_auc_score
def roc_auc_score_multiclass(actual_class, pred_class, average, multi_class):

  #creating a set of all the unique classes using the actual class list
  unique_class = set(actual_class)
  roc_auc_dict = {}
  for per_class in unique_class:
    #creating a list of all the classes except the current class 
    other_class = [x for x in unique_class if x != per_class]

    #marking the current class as 1 and all other classes as 0
    new_actual_class = [0 if x in other_class else 1 for x in actual_class]
    new_pred_class = [0 if x in other_class else 1 for x in pred_class]

    #using the sklearn metrics method to calculate the roc_auc_score
    roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average, multi_class = multi_class)
    roc_auc_dict[per_class] = roc_auc

  return roc_auc_dict