In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
from pprint import pprint
from pathlib import Path
from random import randint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact
from tqdm.notebook import tqdm
import nibabel as nib
import glmsingle
from glmsingle.glmsingle import GLM_single
import bids
from bids import BIDSLayout
from scipy.ndimage import zoom

dir2 = os.path.abspath('..')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: 
    sys.path.append(dir1)

from glmsingle_utils import (
    load_results, 
    plot_results,
    save_results
)

In [2]:
# Enter the parent directory of the TC2See dataset folder
dataset_root = Path(input())

  D:/Datasets


In [10]:
tc2see_version = 1 # [1, 2]
dataset_path = dataset_root / f"TC2See_v{tc2see_version}"
num_runs = 6 if tc2see_version == 1 else 8

subject = '01'  # ['01', '02']
task = "bird"
space = 'T1w' # ['T1w', 'MNI152NLin2009cAsym']
tr = 1.97
resample_tr = tr # Optionally resample the tr to something else

# Initialize BIDSLayouts for querying files.
dataset_layout = BIDSLayout(dataset_path / 'TC2See_prdgm')
derivatives_layout = BIDSLayout(dataset_path / 'derivatives_TC2See_prdgm/fmriprep')

In [11]:
# Collect all BOLD images for each run (as nibabel images) and event information (as pandas dataframes)
run_images = []
events_dfs = []

for run_id in tqdm(range(1, num_runs + 1)):
    
    # Get the BOLD image
    bids_image = derivatives_layout.get(
        subject=subject,
        run=run_id,
        space=space, 
        task=task,
        desc='preproc', 
        extension='nii.gz'
    )[0]
    nibabel_image = bids_image.get_image()
    run_images.append(nibabel_image)
    
    # Get the events file
    events_file = dataset_layout.get(
        subject=subject,
        run=run_id,
        task=task,
        extension='tsv'
    )[0]
    events_df = pd.read_csv(events_file.path, sep='\t')
    events_dfs.append(events_df)


  0%|          | 0/6 [00:00<?, ?it/s]

In [33]:
events_df = pd.read_csv(events_file.path, sep='\t')
events_df = events_df[events_df['stimulus'].str.endswith('png')]

In [40]:
events_df

Unnamed: 0,onset,duration,class_id,response,response_time,same,stimulus,tr
1,0.240146,2.0,174.0,,0.000000,False,docs/cropped/174.Palm_Warbler_2.png,0.121901
4,4.364597,2.0,126.0,,0.000000,False,docs/cropped/126.Nelson_Sharp_tailed_Sparrow_0...,2.215532
7,8.184927,2.0,130.0,,0.000000,False,docs/cropped/130.Tree_Sparrow_6.png,4.154785
10,11.970066,2.0,130.0,True,23.621453,True,docs/cropped/130.Tree_Sparrow_8.png,6.076176
13,16.201529,2.0,117.0,,0.000000,False,docs/cropped/117.Clay_colored_Sparrow_8.png,8.224126
...,...,...,...,...,...,...,...,...
323,431.727405,2.0,113.0,True,443.377654,True,docs/cropped/113.Baird_Sparrow_5.png,219.150967
326,435.702700,2.0,175.0,,0.000000,False,docs/cropped/175.Pine_Warbler_4.png,221.168883
329,439.643819,2.0,172.0,,0.000000,False,docs/cropped/172.Nashville_Warbler_6.png,223.169451
332,443.639441,2.0,168.0,,0.000000,False,docs/cropped/168.Kentucky_Warbler_2.png,225.197686


In [12]:
# Load the fmri mask. 
# fmriPrep outputs one mask for every functional run, but they are mostly identical
# This will grab the mask from the very first one
mask_image = derivatives_layout.get(
    subject=subject,
    run=1,
    task=task,
    space=space, 
    desc='brain',
    extension='nii.gz'
)[0].get_image()
fmri_mask = mask_image.get_fdata().astype(bool)
H, W, D = fmri_mask.shape

# Load the fmri data, apply the mask, then optionally resample the TRs if resample_tr is set
fmri_batch = []
for run_image in tqdm(run_images):
    fmri_data = run_image.get_fdata()
    fmri_data = fmri_data[fmri_mask]
    if resample_tr != tr:
        fmri_data = zoom(fmri_data, zoom=(1, tr / resample_tr))
    fmri_batch.append(fmri_data)
    

  0%|          | 0/6 [00:00<?, ?it/s]

In [26]:
# Cell for visualizing the fMRI runs

all_runs = np.concatenate(fmri_batch)
mean, std = all_runs.mean(), all_runs.std()

show_volume = np.zeros_like(fmri_mask, dtype=float)
_, T = fmri_batch[0].shape

@interact(run_id=(0, num_runs-1), d=(0, D-1), t=(0, T-1))
def show(run_id, d, t):
    show_volume[fmri_mask] = fmri_batch[run_id][:, t]
    plt.figure(figsize=(8, 8))
    plt.imshow(show_volume[:, :, d], cmap='gray', vmin=mean-2*std, vmax=mean+2*std)
    plt.show()

interactive(children=(IntSlider(value=2, description='run_id', max=5), IntSlider(value=32, description='d', ma…

In [42]:
# Create the design matrices

# Collect all conditions
conditions = []
for events_df in events_dfs:
    for i, event in events_df.iterrows():
        if not event['stimulus'].endswith('png'):
            continue
        condition_name = Path(event['stimulus']).stem.split('.')[1]
        conditions.append(condition_name)
conditions = list(set(conditions))
conditions.sort()
conditions = {condition: i for i, condition in enumerate(conditions)}
C = len(conditions)

# Construct the design matrices
design_batch = []
for run_id, events in enumerate(events_dfs):
    V, T = fmri_batch[run_id].shape
    design_matrix = np.zeros(shape=(T, C))
    for id, event in events.iterrows():
        if not event['stimulus'].endswith('png'):
            continue
        condition_name = Path(event['stimulus']).stem.split('.')[1]
        c = conditions[condition_name]
        if resample_tr == tr:
            t = round(event.tr)
        else:
            t = round(event.tr / resample_tr)
        design_matrix[t, c] = 1
    design_batch.append(design_matrix)

@interact(run_id=(0, len(design_batch)-1))
def show(run_id):
    design_matrix = design_batch[run_id]
    print(design_matrix.sum())
    plt.figure(figsize=(8, 8))
    plt.xlabel('Conditions')
    plt.ylabel('TRs')
    plt.imshow(design_matrix)

interactive(children=(IntSlider(value=2, description='run_id', max=5), Output()), _dom_classes=('widget-intera…

In [18]:
np.concatenate(design_batch).sum(axis=0)

array([3., 1., 1., 1., 6., 6., 3., 1., 1., 1., 1., 1., 1., 1., 3., 6., 3.,
       1., 1., 1., 3., 3., 6., 1., 6., 1., 1., 1., 1., 3., 1., 6., 1., 6.,
       1., 1., 1., 1., 3., 1., 1., 6., 1., 1., 1., 1., 1., 3., 3., 3., 1.,
       1., 1., 1., 1., 6., 1., 3., 6., 3., 1., 1., 1., 6., 3., 1., 1., 6.,
       1., 1., 1., 1., 3., 3., 1., 1., 6., 6., 1., 6., 1., 1., 1., 1., 3.,
       1., 1., 1., 3., 6., 1., 1., 1., 1., 1., 6., 3., 1., 3., 3., 1., 6.,
       1., 1., 1., 6., 1., 3., 1., 1., 1., 6., 6., 3., 1., 1., 1., 1., 3.,
       6., 1., 1., 1., 1., 1., 1., 3., 3., 3., 6., 1., 6., 1., 3., 1., 1.,
       1., 1., 1., 1., 1., 3., 1., 1., 3., 1., 3., 6., 1., 1., 1., 1., 1.,
       3., 6., 1., 3., 1., 1., 1., 6., 1., 1., 3., 1., 1., 3., 1., 1., 6.,
       3., 1., 1., 3., 6., 1., 1., 1., 6., 3., 1., 1., 1., 3., 6., 1., 1.,
       1., 1., 1., 1., 3., 6., 3., 1., 1., 1., 6., 1., 1., 3., 6., 3., 6.,
       1., 1., 1., 1., 6., 1., 1., 1., 6., 3., 1., 1., 3., 1., 1., 1., 3.,
       1., 3., 6., 1., 6.

In [45]:
# Load previous results
runs_path = dataset_path / 'derivatives_TC2See_prdgm/glmsingle'
previous_runs = [p.name for p in runs_path.iterdir()]
print(previous_runs)

run_name = 'run3'
output_path = runs_path / run_name
results_glmsingle = load_results(output_path)

['desktop.ini', 'run1', 'run2', 'run3']


In [24]:
results_glmsingle['typed']['betasmd'].shape

(149157, 672)

In [None]:
# Or run GLMsingle from scratch

break # set a new run name before running to avoid overwriting old results
run_name = 'run_3'

glmsingle_obj = GLM_single(dict(
    wantlibrary=1,
    wantglmdenoise=1,
    wantfracridge=1,
    wantfileoutputs=[1,1,1,1],
    wantmemoryoutputs=[1,1,1,1],
))

pprint(glmsingle_obj.params)

output_path = dataset_path / f'derivatives_TC2See_prdgm/glmsingle/{run_name}'
results_glmsingle = glmsingle_obj.fit(
    design=design_batch,
    data=fmri_batch,
    stimdur=2,
    tr=resample_tr,
    outputdir=str(output_path),
)

In [None]:
# Save nifti files for results
save_results(results_glmsingle, output_path, fmri_mask)

In [20]:
# Inspect the results at different layers
@interact(d_layer=(0, D-1))
def plot_results_layer(d_layer):
    plot_results(results_glmsingle, fmri_mask, d_layer)

interactive(children=(IntSlider(value=32, description='d_layer', max=65), Output()), _dom_classes=('widget-int…

In [69]:
import h5py
from noise_ceiling import (
    compute_ncsnr,
    compute_nc,
    group_repetitions
)

with h5py.File(dataset_path / f'derivatives_TC2See_prdgm/tc2see-v1-bold.hdf5', 'r') as f:
    group = f['sub-01']
    stimulus_ids = group['stimulus_ids'][:]
    ids = np.array(list(conditions.keys()))
        
betas = results_glmsingle['typed']['betasmd']

In [70]:
grouped_repetitions = []
for i in range(2, 7):
    x = group_repetitions(ids, num_repetitions=i)
    if x is not None:
        grouped_repetitions.append(x)
print([x.shape for x in grouped_repetitions])

ncsnr = compute_ncsnr(betas.T, grouped_repetitions)
nc = compute_nc(ncsnr, num_averages=1)
nc_volume = np.zeros_like(fmri_mask, dtype=float)
nc_volume[fmri_mask] = nc

[]


ValueError: need at least one array to concatenate

In [65]:
D = nc_volume.shape[2]
@interact(d=(0, D-1), original=True)
def show(d):
    plt.figure(figsize=(12, 12))
    plt.imshow(nc_volume[:, :, d], cmap='jet', vmin=0., vmax=50,)

interactive(children=(IntSlider(value=32, description='d', max=65), Output()), _dom_classes=('widget-interact'…