In [1]:
import os, re, glob, json
from pathlib import Path
import numpy as np
import pandas as pd
import nibabel as nib
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel, non_parametric_inference
from nilearn import image
import matplotlib.pyplot as plt
from nilearn.plotting import plot_design_matrix
from nilearn.plotting import plot_stat_map
from nilearn.glm import threshold_stats_img
from nilearn import plotting
from nilearn.image import load_img, resample_to_img
from sklearn.model_selection import LeaveOneGroupOut
from nilearn.decoding import Decoder

In [2]:
BASE = "/local/anpa/ds003242-1.0.0"
DERIVATIVES = f"{BASE}/derivatives/"
DERIVATIVES_FMRIPREP = f"{DERIVATIVES}/fmriprep/" 
FIRST_LEVEL_SEP_RUNS = f"{DERIVATIVES}/firstlevel_separate_runs/"  
TASK = "CIC"
TR = 2.0

In [3]:
def resample_mask_to_bold(anat_mask, bold_img):
    """Resample anat mask to the space of the bold image.
    As in tutorial https://nilearn.github.io/dev/auto_examples/04_glm_first_level/plot_first_level_details.html
    """
    return resample_to_img(
        anat_mask,
        bold_img,
        interpolation="nearest",
        copy_header=True,
        force_resample=True,
    )

def fprep_func_dir(sub):
    return Path(BASE)/f"derivatives/fmriprep/sub-{sub}/func"

In [4]:
all_sub_dirs = sorted([p.name.split("sub-")[-1] for p in Path(FIRST_LEVEL_SEP_RUNS).glob("sub-*") if p.is_dir()])
all_sub_dirs[:3]

['SAXSISO01b', 'SAXSISO01f', 'SAXSISO01s']

In [5]:
fasting_participants = [s for s in all_sub_dirs if s.endswith("f")]
social_participants = [s for s in all_sub_dirs if s.endswith("s")]
baseline_participants = [s for s in all_sub_dirs if s.endswith("b")]

In [6]:
fasting_food = [
    f
    for s in fasting_participants
    for f in Path(f"{FIRST_LEVEL_SEP_RUNS}/sub-{s}").rglob("*Food_*_effsize.nii.gz")]
fasting_social = [
    f
    for s in fasting_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Social_*_effsize.nii.gz")]
fasting_control = [
    f
    for s in fasting_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Control_*_effsize.nii.gz")]


social_food = [
    f
    for s in social_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Food_*_effsize.nii.gz")]
social_social = [
    f
    for s in social_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Social_*_effsize.nii.gz")]
social_control = [
    f
    for s in social_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Control_*_effsize.nii.gz")]


baseline_food = [
    f
    for s in baseline_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Food_*_effsize.nii.gz")]
baseline_social = [
    f
    for s in baseline_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Social_*_effsize.nii.gz")]
baseline_control = [
    f
    for s in baseline_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Control_*_effsize.nii.gz")]

In [7]:
conditions = ['Food_1', 'Food_2', 'Food_3',
              'Social_1', 'Social_2', 'Social_3',
              'Control_1', 'Control_2', 'Control_3']

# 3 Classes, after fasting. CV folds based on paritcipants, 10 folds

In [8]:
runs_food = [str(f).split('/')[-2][-3:-1] for f in fasting_food]
labels_food  = ['Food']  * len (runs_food)

runs_social = [str(f).split('/')[-2][-3:-1] for f in fasting_social]
labels_social  = ['Social']  * len (runs_social)

runs_control = [str(f).split('/')[-2][-3:-1] for f in fasting_control]
labels_control  = ['Control']  * len (runs_control)

In [9]:
X = np.array(fasting_food + fasting_social + fasting_control)
y = np.array(labels_food + labels_social + labels_control)
groups = np.array(runs_food + runs_social + runs_control)

In [10]:
from sklearn.model_selection import StratifiedGroupKFold
cv = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=0)

decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=cv,
    scoring='accuracy',
    n_jobs=2
)
decoder.fit(X, y, groups)

  f = msb / msw
  f = msb / msw
  f = msb / msw


In [11]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.4059 / 


In [12]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Control 0.3351851851851852
Food 0.3651234567901235
Social 0.5172839506172839


# 3 Classes during baseline

In [13]:
runs_food = [str(f).split('/')[-2][-3:-1] for f in baseline_food]
labels_food  = ['Food']  * len (runs_food)

runs_social = [str(f).split('/')[-2][-3:-1] for f in baseline_social]
labels_social  = ['Social']  * len (runs_social)

runs_control = [str(f).split('/')[-2][-3:-1] for f in baseline_control]
labels_control  = ['Control']  * len (runs_control)

In [14]:
X = np.array(baseline_food + baseline_social + baseline_control)
y = np.array(labels_food + labels_social + labels_control)
groups = np.array(runs_food + runs_social + runs_control)

In [15]:
cv = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=0)

decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=cv,
    scoring='accuracy',
    n_jobs=2
)
decoder.fit(X, y, groups)

  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw


In [16]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.4033 / 


In [17]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Control 0.3342592592592593
Food 0.3490286855482934
Social 0.5266430646332607


# 3 Classes after social isolation

In [18]:
runs_food = [str(f).split('/')[-2][-3:-1] for f in social_food]
labels_food  = ['Food']  * len (runs_food)

runs_social = [str(f).split('/')[-2][-3:-1] for f in social_social]
labels_social  = ['Social']  * len (runs_social)

runs_control = [str(f).split('/')[-2][-3:-1] for f in social_control]
labels_control  = ['Control']  * len (runs_control)

In [19]:
X = np.array(social_food + social_social + social_control)
y = np.array(labels_food + labels_social + labels_control)
groups = np.array(runs_food + runs_social + runs_control)

In [20]:
cv = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=0)

decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=cv,
    scoring='accuracy',
    n_jobs=2
)
decoder.fit(X, y, groups)

  f = msb / msw
  f = msb / msw
  f = msb / msw


In [21]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.4025 / 


In [22]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Control 0.33395061728395065
Food 0.3458333333333333
Social 0.5277777777777778


# 3 Classes food predictions in different conditions 

In [23]:
runs_food_1 = [str(f).split('/')[-2][-3:-1] for f in fasting_food]
labels_food_1  = ['Food_1']  * len (runs_food_1)

runs_food_2 = [str(f).split('/')[-2][-3:-1] for f in social_food]
labels_food_2  = ['Food_2']  * len (runs_food_2)

runs_food_3 = [str(f).split('/')[-2][-3:-1] for f in baseline_food]
labels_food_3 = ['Food_3']  * len (runs_food_3)

In [24]:
X = np.array(fasting_food + social_food + baseline_food)
y = np.array(labels_food_1 + labels_food_2 + labels_food_3)
groups = np.array(runs_food_1 + runs_food_2 + runs_food_3)

In [25]:
cv = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=0)

decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=cv,
    scoring='accuracy',
    n_jobs=2
)
decoder.fit(X, y, groups)

  f = msb / msw
  f = msb / msw
  f = msb / msw


In [26]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.3378 / 


In [27]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Food_1 0.3393972403776325
Food_2 0.3351397966594045
Food_3 0.3389251997095134


# 3 Classes social predictions in different conditions 

In [28]:
runs_social_1 = [str(f).split('/')[-2][-3:-1] for f in fasting_social]
labels_social_1  = ['Social_1']  * len (runs_social_1)

runs_social_2 = [str(f).split('/')[-2][-3:-1] for f in social_social]
labels_social_2  = ['Social_2']  * len (runs_social_2)

runs_social_3 = [str(f).split('/')[-2][-3:-1] for f in baseline_social]
labels_social_3 = ['Social_3']  * len (runs_social_3)

In [29]:
X = np.array(fasting_social + social_social + baseline_social)
y = np.array(labels_social_1 + labels_social_2 + labels_social_3)
groups = np.array(runs_social_1 + runs_social_2 + runs_social_3)

In [30]:
cv = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=0)

decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=cv,
    scoring='accuracy',
    n_jobs=2
)

decoder.fit(X, y, groups)

  f = msb / msw
  f = msb / msw
  f = msb / msw
  f = msb / msw


In [31]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.3349 / 


In [32]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Social_1 0.3323347857661583
Social_2 0.33710965867828613
Social_3 0.3351851851851852


In [33]:
decoder.predict(['/local/anpa/ds003242-1.0.0/derivatives/firstlevel_separate_runs/sub-SAXSISO08b/0_Social_2_effsize.nii.gz'])
#True label baseline = Social_3

  return self.func(*args, **kwargs)


array(['Social_3'], dtype='<U8')