## Set up the environment

In [1]:
import pandas as pd
from glob import glob
import seaborn as sns
from tqdm import tqdm
from pathlib import Path
import numpy as np
from nilearn import plotting, surface
import nibabel as nib
from src.tools import camera_switcher
pd.set_option('display.max_rows', None)


In [2]:
import os
os.environ["SUBJECTS_DIR"] = "/Users/emcmaho7/Dropbox/projects/SI_fmri/SIfMRI_modeling/data/raw/freesurfer"
os.environ["FREESURFER_HOME"] = "/Applications/freesurfer"

In [3]:
top_dir = '/Users/emcmaho7/Dropbox/projects/SI_fmri/SIfMRI_modeling'
interim_dir = f'{top_dir}/data/interim'
raw_dir = f'{top_dir}/data/raw'
figure_dir = f'{top_dir}/reports/figures'
Path(f'{interim_dir}/Plotting').mkdir(parents=True, exist_ok=True)
Path(f'{figure_dir}/Plotting').mkdir(parents=True, exist_ok=True)

## Model comparisons

In [56]:
def compute_surf_stats(prefix, sub, hemi):
    file = f'{prefix}_hemi-{hemi}.mgz'
    if not os.path.exists(file):
        cmd = '/Applications/freesurfer/bin/mri_vol2surf '
        cmd += f'--src {prefix}.nii.gz '
        cmd += f'--out {file} '
        cmd += f'--regheader sub-{sub} '
        cmd += f'--hemi {hemi} '
        cmd += '--projfrac 1'
        os.system(cmd)
    return surface.load_surf_data(file)


def load_surf_mesh(path, sub, hemi):
    return f'{path}/freesurfer/sub-{sub}/surf/{hemi}.inflated', \
            f'{path}/freesurfer/sub-{sub}/surf/{hemi}.sulc'


def plot_stats(surf_mesh, bg_map, surf_map, hemi_, figure_prefix,
                vmax=0.6, negative_values=False,
                cmap_name='icefire', threshold=1e-6):
    cmap=sns.color_palette(cmap_name, as_cmap=True)
    hemi_name = 'left' if hemi_ == 'lh' else 'right'

    surf_map = np.nan_to_num(surf_map)
    if not negative_values:
        surf_map[surf_map < 0] = 0
        vmin = 0
        tickvals = list(np.round(np.linspace(vmin, vmax, num=5), 2)) 
    else:
        vmin = None
        tickvals = list(np.round(np.linspace(-1*vmax, vmax, num=5), 2)) 

    for view in ['ventral', 'lateral', 'medial']:
        colorbar = True if view == 'medial' and hemi_ == 'rh' else False
        fig = plotting.plot_surf_roi(surf_mesh=surf_mesh,
                                        roi_map=surf_map,
                                        bg_map=bg_map,
                                        threshold=threshold,
                                        vmax=vmax,
                                        vmin=vmin,
                                        engine='plotly',
                                        colorbar=colorbar,
                                        view=view,
                                        cmap=cmap,
                                        hemi=hemi_name)
        fig.figure.update_layout(scene_camera=camera_switcher(hemi_, view),
                                 paper_bgcolor="rgba(0,0,0,0)",
                                 plot_bgcolor="rgba(0,0,0,0)")
        if colorbar: 
            mesh_data = fig.figure.data[1]
            mesh_data.update(colorbar=dict(
                len=0.5,
                tickfont=dict(color='black', size=25),
                tickformat='.',
                tickvals=tickvals,  # Example tick values
            ))
        fig.figure.write_image(f'{figure_prefix}_view-{view}_hemi-{hemi_}.png')


## Model performance difference

In [57]:
def plot_difference(df_, cat1, cat2):
    contrast_name = f'{cat1}-{cat2}'
    for subj_id in tqdm(range(1,5), desc='Plotting subjects'):
        subj_id_str = str(subj_id).zfill(2)
        stat_file = f'{interim_dir}/Plotting/sub-{subj_id_str}_model-{contrast_name}'
        plot_file = f'{figure_dir}/Plotting/sub-{subj_id_str}_model-{contrast_name}'

        img = nib.load(f'{raw_dir}/fmri_betas/sub-{subj_id_str}_space-T1w_desc-train-fracridge_data.nii.gz')

        dims = img.shape[:-1]
        header, affine = img.header, img.affine

        df1 = df_.loc[(df_.model_class == cat1) & (df_.subj_id == subj_id)].reset_index(drop=True)
        df2 = df_.loc[(df_.model_class == cat2) & (df_.subj_id == subj_id)].reset_index(drop=True)
        scores_arr = df1['test_score'].to_numpy() - df2['test_score'].to_numpy()
        indices = df1[['i_index', 'j_index', 'k_index']].to_numpy()

        score_img = np.zeros(dims)
        score_img[indices[:, 0], indices[:, 1], indices[:, 2]] = scores_arr
        score_img = nib.Nifti1Image(score_img, affine=affine, header=header)
        nib.save(score_img, f'{stat_file}.nii.gz')

        for hemi in ['rh', 'lh']:
            surf = compute_surf_stats(stat_file, subj_id_str, hemi)
            inflated, sulcus = load_surf_mesh(raw_dir, subj_id_str, hemi)
            plot_stats(inflated, sulcus, surf, hemi, plot_file, vmax=.3, negative_values=True)

In [6]:
best_model_info = pd.read_csv(f'{interim_dir}/ResultsSummary/overall_best_model.csv').set_index('model_class')
best_model_info

Unnamed: 0_level_0,most_frequent_model_uid,count
model_class,Unnamed: 1_level_1,Unnamed: 2_level_1
image,timm_beit_large_patch16_384,2691
language,sentence-transformers_all-roberta-large-v1,4418
video,x3d_s,7578


In [10]:
paths = [f'{interim_dir}/VideoNeuralEncoding/model-{best_model_info.loc["video", "most_frequent_model_uid"]}*', 
         f'{interim_dir}/LanguageNeuralEncoding/no_perturbation/model-{best_model_info.loc["language", "most_frequent_model_uid"]}*.gz',
         f'{interim_dir}/VisionNeuralEncoding/grouped_average/model-{best_model_info.loc["image", "most_frequent_model_uid"]}*.gz',]
files = []
for path in paths:
    files += glob(path)

scores = []
for file in files:
    if 'csv' in file:
        df = pd.read_csv(file)
    else:
        df = pd.read_pickle(file)

    if 'r_var_dist' in df.columns:
        df.drop(columns=['r_var_dist', 'r_null_dist'], inplace=True)

    if 'Video' in file:
        df['model_class'] = 'video'
    elif 'Language' in file:
        df['model_class'] = 'language'
    else:# 'Video' in file:
        df['model_class'] = 'image'
    scores.append(df)
    
scores = pd.concat(scores).reset_index(drop=True)
scores.head()

Unnamed: 0,voxel_id,roi_name,stream_name,subj_id,reliability,i_index,j_index,k_index,layer_index,layer_relative_depth,layer,train_score,model_uid,test_score,model_class
0,0,none,none,1,0.322961,2,33,31,311,0.702032,ReLU-9-42,0.284082,x3d_s,-0.130411,video
1,1,none,none,1,0.340669,2,33,32,217,0.489842,Conv3d-9-29,0.276329,x3d_s,-0.007874,video
2,2,none,none,1,0.313496,2,34,35,138,0.311512,Swish-6-62,0.254926,x3d_s,-0.103982,video
3,3,none,none,1,0.254933,2,35,31,392,0.884876,Sequential-8-14,0.330047,x3d_s,-0.185278,video
4,4,none,none,1,0.300477,2,36,36,214,0.48307,BatchNorm3d-7-25,0.3659,x3d_s,0.287451,video


In [58]:
plot_difference(scores, 'video', 'image')
plot_difference(scores, 'video', 'language')


you have specified symmetric_cmap=False but the map contains negative values; setting symmetric_cmap to True


vmin cannot be chosen when cmap is symmetric


you have specified symmetric_cmap=False but the map contains negative values; setting symmetric_cmap to True


vmin cannot be chosen when cmap is symmetric


you have specified symmetric_cmap=False but the map contains negative values; setting symmetric_cmap to True


vmin cannot be chosen when cmap is symmetric


you have specified symmetric_cmap=False but the map contains negative values; setting symmetric_cmap to True


vmin cannot be chosen when cmap is symmetric

Plotting subjects: 100%|██████████████████████████| 4/4 [02:09<00:00, 32.36s/it]

you have specified symmetric_cmap=False but the map contains negative values; setting symmetric_cmap to True


vmin cannot be chosen when cmap is symmetric


you have specified symmetric_cmap=False but the map contains negative values; setting symmetric_cmap to True


vmin cannot be ch

## Plot whole brain score

In [59]:
def plot_one_variable(df_, variable, vmax=.7):
    for subj_id in tqdm(range(1,5), desc='Plotting subjects'):
        subj_id_str = str(subj_id).zfill(2)
        stat_file = f'{interim_dir}/Plotting/sub-{subj_id_str}_{variable}'
        plot_file = f'{figure_dir}/Plotting/sub-{subj_id_str}_{variable}'

        img = nib.load(f'{raw_dir}/fmri_betas/sub-{subj_id_str}_space-T1w_desc-train-fracridge_data.nii.gz')

        dims = img.shape[:-1]
        header, affine = img.header, img.affine

        df_filtered = df_.loc[df_.subj_id == subj_id].reset_index(drop=True)
        scores_arr = df_filtered[variable].to_numpy()
        indices = df_filtered[['i_index', 'j_index', 'k_index']].to_numpy()

        score_img = np.zeros(dims)
        score_img[indices[:, 0], indices[:, 1], indices[:, 2]] = scores_arr
        score_img = nib.Nifti1Image(score_img, affine=affine, header=header)
        nib.save(score_img, f'{stat_file}.nii.gz')

        for hemi in ['lh', 'rh']:
            surf = compute_surf_stats(stat_file, subj_id_str, hemi)
            inflated, sulcus = load_surf_mesh(raw_dir, subj_id_str, hemi)
            plot_stats(inflated, sulcus, surf, hemi, plot_file,
                       vmax=vmax, cmap_name='magma')

In [60]:
voxel_scores = pd.read_csv(f'{interim_dir}/ResultsSummary/voxelwise_best_model.csv')
voxel_scores['normalized_test_score'] = voxel_scores['test_score'] / voxel_scores['reliability']
voxel_scores.head()

Unnamed: 0,voxel_id,roi_name,stream_name,subj_id,reliability,i_index,j_index,k_index,layer_index,layer_relative_depth,layer,train_score,model_uid,test_score,model_class,normalized_test_score
0,0,none,none,1,0.322961,2,33,31,394,0.845494,Sigmoid-6-252,0.390368,torchvision_efficientnet_b1_imagenet1k_v1,0.140451,image,0.434885
1,1,none,none,1,0.340669,2,33,32,373,0.890215,Conv2d-5-114,0.413691,timm_dm_nfnet_f1,0.246346,image,0.723126
2,2,none,none,1,0.313496,2,34,35,276,0.673171,DropPath-5-96,0.345103,timm_jx_nest_base,-0.04095,image,-0.130625
3,3,none,none,1,0.254933,2,35,31,320,0.867209,Identity-3-126,0.439776,timm_beitv2_large_patch16_224,0.288849,image,1.133039
4,4,none,none,1,0.300477,2,36,36,177,0.885,BatchNorm3d-6-114,0.413664,c2d_r50,0.235988,video,0.785378


In [None]:
plot_one_variable(voxel_scores, 'test_score')
plot_one_variable(voxel_scores, 'normalized_test_score', vmax=1.)

## Hierarchy

In [61]:
video_score = scores.set_index('model_class').loc['video'].reset_index(drop=True)
video_score.head()

Unnamed: 0,voxel_id,roi_name,stream_name,subj_id,reliability,i_index,j_index,k_index,layer_index,layer_relative_depth,layer,train_score,model_uid,test_score
0,0,none,none,1,0.322961,2,33,31,311,0.702032,ReLU-9-42,0.284082,x3d_s,-0.130411
1,1,none,none,1,0.340669,2,33,32,217,0.489842,Conv3d-9-29,0.276329,x3d_s,-0.007874
2,2,none,none,1,0.313496,2,34,35,138,0.311512,Swish-6-62,0.254926,x3d_s,-0.103982
3,3,none,none,1,0.254933,2,35,31,392,0.884876,Sequential-8-14,0.330047,x3d_s,-0.185278
4,4,none,none,1,0.300477,2,36,36,214,0.48307,BatchNorm3d-7-25,0.3659,x3d_s,0.287451


In [62]:
plot_one_variable(video_score, 'layer_relative_depth', vmax=1.)


choosing both vmin and a threshold is not allowed; setting vmin to 0


choosing both vmin and a threshold is not allowed; setting vmin to 0


choosing both vmin and a threshold is not allowed; setting vmin to 0


choosing both vmin and a threshold is not allowed; setting vmin to 0

Plotting subjects: 100%|██████████████████████████| 4/4 [01:57<00:00, 29.33s/it]
