In [1]:
import os, re, glob, json
from pathlib import Path
import numpy as np
import pandas as pd
import nibabel as nib
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel, non_parametric_inference
from nilearn import image
import matplotlib.pyplot as plt
from nilearn.plotting import plot_design_matrix
from nilearn.plotting import plot_stat_map
from nilearn.glm import threshold_stats_img
from nilearn import plotting
from nilearn.image import load_img, resample_to_img
from sklearn.model_selection import LeaveOneGroupOut
from nilearn.decoding import Decoder

In [2]:
BASE = "/local/anpa/ds003242-1.0.0"
DERIVATIVES = f"{BASE}/derivatives/"
DERIVATIVES_FMRIPREP = f"{DERIVATIVES}/fmriprep/" 
FIRST_LEVEL_SEP_RUNS = f"{DERIVATIVES}/firstlevel_separate_runs/"  
TASK = "CIC"
TR = 2.0

In [3]:
def resample_mask_to_bold(anat_mask, bold_img):
    """Resample anat mask to the space of the bold image.
    As in tutorial https://nilearn.github.io/dev/auto_examples/04_glm_first_level/plot_first_level_details.html
    """
    return resample_to_img(
        anat_mask,
        bold_img,
        interpolation="nearest",
        copy_header=True,
        force_resample=True,
    )

def fprep_func_dir(sub):
    return Path(BASE)/f"derivatives/fmriprep/sub-{sub}/func"

In [4]:
all_sub_dirs = sorted([p.name.split("sub-")[-1] for p in Path(FIRST_LEVEL_SEP_RUNS).glob("sub-*") if p.is_dir()])
all_sub_dirs[:3]

['SAXSISO01b', 'SAXSISO01f', 'SAXSISO01s']

In [5]:
fasting_participants = [s for s in all_sub_dirs if s.endswith("f")]
social_participants = [s for s in all_sub_dirs if s.endswith("s")]
baseline_participants = [s for s in all_sub_dirs if s.endswith("b")]

In [6]:
fasting_food = [
    f
    for s in fasting_participants
    for f in Path(f"{FIRST_LEVEL_SEP_RUNS}/sub-{s}").rglob("*Food_*_effsize.nii.gz")]
fasting_social = [
    f
    for s in fasting_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Social_*_effsize.nii.gz")]
fasting_control = [
    f
    for s in fasting_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Control_*_effsize.nii.gz")]


social_food = [
    f
    for s in social_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Food_*_effsize.nii.gz")]
social_social = [
    f
    for s in social_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Social_*_effsize.nii.gz")]
social_control = [
    f
    for s in social_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Control_*_effsize.nii.gz")]


baseline_food = [
    f
    for s in baseline_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Food_*_effsize.nii.gz")]
baseline_social = [
    f
    for s in baseline_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Social_*_effsize.nii.gz")]
baseline_control = [
    f
    for s in baseline_participants
    for f in Path(f'{FIRST_LEVEL_SEP_RUNS}/sub-{s}').rglob("*Control_*_effsize.nii.gz")]

In [7]:
conditions = ['Food_1', 'Food_2', 'Food_3',
              'Social_1', 'Social_2', 'Social_3',
              'Control_1', 'Control_2', 'Control_3']

# 3 Classes, after fasting. CV folds based on runs (6)

In [8]:
runs_food = [str(f).split('/')[-1][0] for f in fasting_food]
labels_food  = ['Food']  * len (runs_food)

runs_social = [str(f).split('/')[-1][0] for f in fasting_social]
labels_social  = ['Social']  * len (runs_social)

runs_control = [str(f).split('/')[-1][0] for f in fasting_control]
labels_control  = ['Control']  * len (runs_control)

In [9]:
X = np.array(fasting_food + fasting_social + fasting_control)
y = np.array(labels_food + labels_social + labels_control)
groups = np.array(runs_food + runs_social + runs_control)

In [10]:
from collections import Counter
Counter(y), Counter(groups)

(Counter({'Food': 570, 'Social': 570, 'Control': 570}),
 Counter({'0': 288, '1': 288, '2': 288, '3': 288, '4': 279, '5': 279}))

In [12]:
decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=LeaveOneGroupOut(),
    scoring='accuracy',
    n_jobs=2
)
decoder.fit(X, y, groups)

# When stand=False : ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.

In [None]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.6818 / 


In [None]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))


Control 0.5073924731182796
Food 0.6251120071684588
Social 0.9127837514934288


# Permutation test.

-  “If there was no real relationship between the labels (Food/Social/Control) and the brain data, how often would I get an accuracy as good as the one I observed?”


Shuffle the labels (destroy real structure).

Train/test classifier as usual.

Collect accuracy → this is one draw from the null distribution.

Repeat many times → build full null distribution.

Compare your real accuracy to that null distribution.

In [14]:
import numpy as np
from nilearn.decoding import Decoder
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.utils import shuffle

In [17]:
n_perms = 10000
perm_accs = [] #null distribution from permutations

In [None]:
for i in range(n_perms):
    # Shuffle labels within groups to preserve structure
    print(f"Permutation {i}")
    y_perm = shuffle(y, random_state=i)
    
    decoder_perm = Decoder(estimator='svc', cv=LeaveOneGroupOut(), scoring='accuracy', n_jobs=-1)
    decoder_perm.fit(X, y_perm, groups=groups)
    acc = np.mean(list(decoder_perm.cv_scores_.values()))
    with open ("permutations.txt", "a") as f:
        f.write(f"{acc}\n")
    #perm_accs.append(acc)

#perm_accs = np.array(perm_accs)

In [53]:
# --- Compute p-value ---
p_value = np.mean(perm_accs >= classification_accuracy)
print(f"P-value from permutation test: {p_value:.5f}")
# Null hypothesis:
# There is no relationship between the fMRI data and the labels (Food, Social, Control).

P-value from permutation test: 0.00000


# Evolution - accuracy when trained/validated on 1-3 run vs 4-6?

# 3 Classes during baseline

In [16]:
runs_food = [str(f).split('/')[-1][0] for f in baseline_food]
labels_food  = ['Food']  * len (runs_food)

runs_social = [str(f).split('/')[-1][0] for f in baseline_social]
labels_social  = ['Social']  * len (runs_social)

runs_control = [str(f).split('/')[-1][0] for f in baseline_control]
labels_control  = ['Control']  * len (runs_control)

In [17]:
X = np.array(baseline_food + baseline_social + baseline_control)
y = np.array(labels_food + labels_social + labels_control)
groups = np.array(runs_food + runs_social + runs_control)

In [18]:
decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    scoring='accuracy',
    cv=LeaveOneGroupOut(),
    n_jobs=2
)
decoder.fit(X, y, groups)

In [21]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.6724 / 


In [22]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Control 0.5083258661887694
Food 0.5966995221027479
Social 0.9121117084826763


# 3 Classes after social isolation

In [11]:
runs_food = [str(f).split('/')[-1][0] for f in social_food]
labels_food  = ['Food']  * len (runs_food)

runs_social = [str(f).split('/')[-1][0] for f in social_social]
labels_social  = ['Social']  * len (runs_social)

runs_control = [str(f).split('/')[-1][0] for f in social_control]
labels_control  = ['Control']  * len (runs_control)

In [12]:
X = np.array(social_food + social_social + social_control)
y = np.array(labels_food + labels_social + labels_control)
groups = np.array(runs_food + runs_social + runs_control)

In [13]:
decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    scoring='accuracy',
    #screening_percentile=5,
    cv=LeaveOneGroupOut(),
    n_jobs=2
)
decoder.fit(X, y, groups)

In [None]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.6613 / Chance level: 0.3333333333333333


In [15]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Control 0.49652777777777773
Food 0.560763888888889
Social 0.9265046296296297


# 3 Classes food predictions in different conditions 

In [24]:
runs_food_1 = [str(f).split('/')[-1][0] for f in fasting_food]
labels_food_1  = ['Food_1']  * len (runs_food_1)

runs_food_2 = [str(f).split('/')[-1][0] for f in social_food]
labels_food_2  = ['Food_2']  * len (runs_food_2)

runs_food_3 = [str(f).split('/')[-1][0] for f in baseline_food]
labels_food_3 = ['Food_3']  * len (runs_food_3)

In [25]:
X = np.array(fasting_food + social_food + baseline_food)
y = np.array(labels_food_1 + labels_food_2 + labels_food_3)
groups = np.array(runs_food_1 + runs_food_2 + runs_food_3)

In [26]:
decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=LeaveOneGroupOut(),
    scoring='accuracy',
    n_jobs=2
)
decoder.fit(X, y, groups)

In [27]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.6764 / 


In [28]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Food_1 0.687517497200448
Food_2 0.6879742389365849
Food_3 0.6536029975944588


# 3 Classes social predictions in different conditions 

In [29]:
runs_social_1 = [str(f).split('/')[-1][0] for f in fasting_social]
labels_social_1  = ['Social_1']  * len (runs_social_1)

runs_social_2 = [str(f).split('/')[-1][0] for f in social_social]
labels_social_2  = ['Social_2']  * len (runs_social_2)

runs_social_3 = [str(f).split('/')[-1][0] for f in baseline_social]
labels_social_3 = ['Social_3']  * len (runs_social_3)

In [30]:
X = np.array(fasting_social + social_social + baseline_social)
y = np.array(labels_social_1 + labels_social_2 + labels_social_3)
groups = np.array(runs_social_1 + runs_social_2 + runs_social_3)

In [31]:
decoder = Decoder(
    t_r=TR,
    estimator="svc",
    #mask=resampled_anat_mask,
    #mask_strategy = "whole-brain",
    standardize=True,
    #screening_percentile=5,
    cv=LeaveOneGroupOut(),
    n_jobs=2
)
decoder.fit(X, y, groups)

In [32]:
classification_accuracy = np.mean(list(decoder.cv_scores_.values()))
print(
    f"Classification accuracy: {classification_accuracy:.4f} / "
)

Classification accuracy: 0.9022 / 


In [33]:
for label, scores in decoder.cv_scores_.items():
    print(label, np.mean(scores))

Social_1 0.8920091256210768
Social_2 0.9171880194776909
Social_3 0.8973800480638169


In [34]:
decoder.predict(['/local/anpa/ds003242-1.0.0/derivatives/firstlevel_separate_runs/sub-SAXSISO08b/0_Social_2_effsize.nii.gz'])
#True label baseline = Social_3

  return self.func(*args, **kwargs)


array(['Social_3'], dtype='<U8')