In [1]:
import os 
import pandas as pd
import numpy as np
from scipy.io import savemat
from utils.utils import *
from utils.flips import get_prefered_xy

### Matlab export notebook
This notebook contains cells that extract data from the models and export them in both matlab and csv format; which are then used for statistical anaylsis 

Current format is a bit messy, so things might be broken down the line 

If I wasnt lazy I would send all these directly to the right folder, but I am lazy 

In [2]:
"""
Threshold is used to determine which data is from the lh, and which is from the rh, since we want to check if hemis is a varible with a satstically significant impact 
"""
import nibabel as nib
threshold = []
for sub in subj_list:
    maskdata_lh_path = os.path.join(mask_dir, sub , f'lh.{sub}.testrois.mgz')
    maskdata_lh = nib.load(maskdata_lh_path).get_fdata().squeeze()
    lh_indices = np.where((maskdata_lh >= 1) & (maskdata_lh <= 15))[0] 
    threshold.append(lh_indices.shape[0])

threshold  # used for lh - rh differentiation 
    

[17901, 19290, 17885, 13606, 14050, 19115, 13976, 15588]

In [3]:
# get the models

cos_sin = get_prefered_xy(subj_list, sessions, fetch_conds=False) 
models_subs = {}
models = ['best_roi', 'oself']
columns = ["x0", "y0", "sigma", "slope", "intercept", "test_var_explained", "var_explained", "mds_ecc", "mds_ang", "roi", "best_roi"]
model_study = 'oself'
mode = "train"
rotated = True

for i, sub in enumerate(subj_list):
    models_subs[sub] = {}
    noise_ceilling_file = os.path.join(noise_dir, f'{sub}_noise_ceilling_all_vox.npy')
    noise_ceilling = np.load(noise_ceilling_file, allow_pickle=True)
    for m in models:
        if rotated:
            m_file = os.path.join(models_dir, f'best_fits_{m}_{sub}_{mode}.npy')
        if not rotated:
            m_file = os.path.join(models_dir, f'best_fits_{m}_{sub}_{mode}_basevoxel_notrotated.npy')
        models_subs[sub][m] = pd.DataFrame(np.load(m_file, allow_pickle=True), columns=columns)
        models_subs[sub][m][columns[:-2]] = models_subs[sub][m][columns[:-2]].astype(np.float32)
        models_subs[sub][m]['noise_ceilling'] = noise_ceilling
        models_subs[sub][m]['voxel_performance'] = models_subs[sub][m]["test_var_explained"] / models_subs[sub][m]['noise_ceilling']
        # this should be okay 

models_subs_bestroi = models_subs
#savemat('models_subs_notrotated.mat', models_subs_notrotated)
#savemat('models_subs_bestroi.mat', models_subs_bestroi)


(907,)


In [9]:
def save_to_matlab(subj_lists, rois, model, variable, m, median=False):
    """
    Save a .csv of a variable for a model, export it to CSV to be used in matlab later)

    IMPORTANT NOTE: this slightly changed the way I name things; this should be adapted in the matlab script

    Also, I did not include saving the unrotated data since we do not use it 

    Inputs
    -----------
    subj_list: list of subjects

    rois: dict, ROIs and their value 
    
    model: dict, the model containing all the data

    variable: string, the variable of choice

    m: string, oself or best roi model

    median: False for mean, True for Median
    """
    
    lh = np.zeros((len(subj_list), len(rois.keys())))
    for i , sub in enumerate(subj_list):
        for j, roi in enumerate(rois.keys()):
            if median:
                lh[i, j] = model[sub][m]["lh"].groupby('roi')[variable].median()[roi]
            else:
                lh[i, j] = model[sub][m]["lh"].groupby('roi')[variable].mean()[roi]

    lh_df = pd.DataFrame(lh, index=subj_list, columns=rois.keys())
    lh_df.to_csv(f'matlab/{variable}_{m}_rotated_lh.csv', index=True)

    rh = np.zeros((len(subj_list), len(rois.keys())))
    for i , sub in enumerate(subj_list):
        for j, roi in enumerate(rois.keys()):
            if median:
                rh[i, j] = model[sub][m]["lh"].groupby('roi')[variable].median()[roi]
            else:
                rh[i, j] = model[sub][m]["lh"].groupby('roi')[variable].mean()[roi]

    rh_df = pd.DataFrame(lh, index=subj_list, columns=rois.keys())
    rh_df.to_csv(f'matlab/{variable}_{m}_rotated_rh.csv', index=True)

In [10]:
rotated = True
models = ['best_roi', 'oself']
columns = ["x0", "y0", "sigma", "slope", "intercept", "test_var_explained", "var_explained", "mds_ecc", "mds_ang",  "roi", "best_roi"]
for i, sub in enumerate(subj_list):
    models_subs[sub] = {}
    noise_ceilling_file = os.path.join(noise_dir, f'{sub}_noise_ceilling_all_vox.npy')
    noise_ceilling = np.load(noise_ceilling_file, allow_pickle=True)
    for m in models:
        if rotated:
            m_file = os.path.join(models_dir, f'best_fits_{m}_{sub}_{mode}.npy')
        if not rotated:
            m_file = os.path.join(models_dir, f'best_fits_{m}_{sub}_{mode}_basevoxel_notrotated.npy')

        model = np.load(m_file, allow_pickle=True)
        models_subs[sub][m] = {}
        for hemi in hemis:
            
            if hemi == "lh":
                models_subs[sub][m][hemi] = pd.DataFrame(model[:threshold[i]], columns=columns)
            if hemi == "rh":
                models_subs[sub][m][hemi] = pd.DataFrame(model[threshold[i]:], columns=columns)
            models_subs[sub][m][hemi][columns[:-2]] = models_subs[sub][m][hemi][columns[:-2]].astype(np.float32)
            if hemi == 'lh': 
                models_subs[sub][m][hemi]['noise_ceilling'] = noise_ceilling[:threshold[i]]
                models_subs[sub][m][hemi]['voxel_performance'] = models_subs[sub][m][hemi]["test_var_explained"] / models_subs[sub][m][hemi]['noise_ceilling']
            if hemi == 'rh': 
                models_subs[sub][m][hemi]['noise_ceilling'] = noise_ceilling[threshold[i]:]
                models_subs[sub][m][hemi]['voxel_performance'] = models_subs[sub][m][hemi]["test_var_explained"] / models_subs[sub][m][hemi]['noise_ceilling']
# no need to get noise ceilling here)


for m in models:
    save_to_matlab(subj_list, rois, models_subs, 'test_var_explained', m)
    save_to_matlab(subj_list, rois, models_subs, 'sigma', m)
    save_to_matlab(subj_list, rois, models_subs, 'noise_ceilling', m)
    save_to_matlab(subj_list, rois, models_subs, 'voxel_performance', m, median=True)