# Statistical analysis in `nilearn`

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import os
from nilearn import datasets

os.environ["NILEARN_SHARED_DATA"] = "~/shared/data/nilearn_data"
datasets.get_data_dirs()

In the previous examples, we've generated connectivity matrices as our features-of-interest for machine learning.
However, there are many other kinds of relevant features we may want to extract from neuroimaging data.
Alternatively, we may not be interested in doing machine learning at all,
but instead performing statistical analysis using methods such as the General Linear Model (GLM).

In this example, we'll perform a GLM analysis of a dataset provided by the Haxby Lab,
in which participants were shown a number different categories of visual images.

First, we need to download the dataset and its associated stimuli.

In [None]:
import numpy as np
import pandas as pd

from nilearn.datasets import fetch_haxby

haxby_dataset = fetch_haxby(subjects=(2,), fetch_stimuli=True)
print(haxby_dataset.description)

# set TR in seconds, following information in the original paper
t_r = 2.5

In [None]:
from nilearn import plotting

plotting.plot_roi(haxby_dataset.mask, bg_img=haxby_dataset.anat[0])

In [None]:
import nibabel as nib
from nilearn import image

mean_img_ = image.mean_img(haxby_dataset.func[0], copy_header=True)
plotting.plot_epi(mean_img_)

n_trs = nib.load(haxby_dataset.func[0]).shape[-1]
print(f"There are {n_trs} TRs in the file {haxby_dataset.func[0]}")

In [None]:
import matplotlib.pyplot as plt

key_stimuli = []
exp_stimuli = []
for key, values in haxby_dataset.stimuli.items():
    key_stimuli.append(key)

    try:
        exp_stimuli.append(values[0])
    except KeyError:
        exp_stimuli.append(values['scrambled_faces'][0])

# update naming convention of 'controls' to match labels in behavioral csv
key_stimuli[4] = 'scrambledpix' 

fig, axes = plt.subplots(nrows=1, ncols=8, figsize=(12, 12))
# fig.suptitle("Example stimuli used in the experiment")

for img_path, img_categ, ax in zip(exp_stimuli, key_stimuli, axes.ravel()):
    ax.imshow(plt.imread(img_path), cmap="gray")
    ax.set_title(img_categ)

for ax in axes.ravel():
    ax.axis("off")

## Generate design matrices for each run

Unlike in our previous connectivity analyses, we are now interested in known "events" that occurred throughout each run.
These events correspond to the presentation of a different visual image.
The presentation of each image is included as information in an accompanying CSV file.
We will use this information to generate design matrices for each run to use in our GLM.

Note in this case that all runs have been concatenated into a single nii file; the run identifier is indicated in the `chunk` label.

In [None]:
# Load target information as string 
events = pd.read_csv(haxby_dataset.session_target[0], sep=" ")
events

In [None]:
unique_conditions = events["labels"].unique()
conditions = events["labels"].values

In [None]:
# Record these as an array of runs
runs = events["chunks"].to_numpy()
unique_runs = events["chunks"].unique()
print(unique_runs)

In [None]:
# events will take the form of a dictionary of Dataframes, one per run
events = {}

for run in unique_runs:

    # get the condition label per run
    conditions_run = conditions[runs == run]

    # get the number of scans per run, then the corresponding
    # vector of frame times
    n_scans = len(conditions_run)
    frame_times = t_r * np.arange(n_scans)

    # each event last the full TR
    duration = t_r * np.ones(n_scans)

    # Define the events object
    events_ = pd.DataFrame(
        {
            "onset": frame_times,
            "trial_type": conditions_run,
            "duration": duration,
        }
    )
    # remove the rest condition and insert into the dictionary
    # this will be our baseline in the GLM, so we don't want to model it as a condition
    events[run] = events_[events_.trial_type != "rest"]

In [None]:
events[0]

## Run the GLM

In [None]:
from nilearn.glm.first_level import FirstLevelModel

z_maps = []
conditions_label = []
run_label = []

# Instantiate the glm
glm = FirstLevelModel(
    t_r=t_r,
    mask_img=haxby_dataset.mask,
    high_pass=0.008,
    smoothing_fwhm=4,
)

In [None]:
from nilearn.image import index_img

for run in unique_runs:
    # grab the fmri data for that particular run
    fmri_run = index_img(haxby_dataset.func[0], runs == run)

    # fit the GLM
    glm.fit(fmri_run, events=events[run])

    # set up contrasts: one per condition
    conditions = events[run].trial_type.unique()
    for condition_ in conditions:
        z_maps.append(glm.compute_contrast(condition_))
        conditions_label.append(condition_)
        run_label.append(run)

In [None]:
report = glm.generate_report(
    contrasts=conditions,
    bg_img=mean_img_,
)
report