In [1]:
import os 
import numpy as np
import pandas as pd
from numpy import ndarray
from nsddatapaper_rsa.utils.nsd_get_data import get_conditions, get_labels, get_betas
from nsddatapaper_rsa.utils.utils import category_dict, mds, average_over_conditions
from utils.utils import *
from utils.kabsch2D import *
from nsd_access import NSDAccess 
from matplotlib import cm 
import scprep
import matplotlib.pyplot as plt
import nibabel as nib
from scipy.spatial.distance import pdist

In [2]:
# Check MDS for 40 sessions

In [3]:
n_jobs = 2 # Local machine, don't go too crazy
n_sessions = 40
n_subjects = 1 # only try sub 1 here

nsda = NSDAccess(nsd_dir)

outpath = os.path.join(betas_dir, 'roi_analyses')
if not os.path.exists(outpath):
    os.makedirs(outpath)

targetspace = 'nativesurface'

# I found this in the paper repo -> mainfigures -> SCIENCE.RSA
lh_file = os.path.join(proj_dir, 'lh.highlevelvisual.mgz')
rh_file = os.path.join(proj_dir, 'rh.highlevelvisual.mgz')


# load them
maskdata_lh = nib.load(lh_file).get_fdata().squeeze()
maskdata_rh = nib.load(rh_file).get_fdata().squeeze()

maskdata = np.hstack((maskdata_lh, maskdata_rh))


# subjects
subs = ['subj0{}'.format(x+1) for x in range(n_subjects)]

In [22]:
betas_06 = np.load(proj_dir + '/betas/subj06/subj06_29_betas_train_nativesurface.npy')   # might be due to the fact that I am missing sessions
np.argwhere(np.isnan(betas_06))

array([[   78,    62],
       [   78,    95],
       [   78,   111],
       ...,
       [23224,  9010],
       [23224,  9032],
       [23224,  9071]])

In [21]:
betas_06[78, 62]

0.31928384

In [11]:
betas_06.shape

(39283, 9082)

In [5]:
outpath = os.path.join(proj_dir, 'roi_analyses')

In [15]:
# Get the maskdata and hope for the best
maskdata_long_file = os.path.join(data_dir, 'nsddata', 'custom_roi', 'subj01', 'subj01.testrois.npy')
maskdata_long = np.load(maskdata_long_file, allow_pickle=True).astype(int)
maskdata_long_bool = (maskdata_long > 0)

In [17]:
conditions = get_conditions(nsd_dir, subs[0], n_sessions)

		sub: subj01 fetching condition trials in session: 1
		sub: subj01 fetching condition trials in session: 2
		sub: subj01 fetching condition trials in session: 3
		sub: subj01 fetching condition trials in session: 4
		sub: subj01 fetching condition trials in session: 5
		sub: subj01 fetching condition trials in session: 6
		sub: subj01 fetching condition trials in session: 7
		sub: subj01 fetching condition trials in session: 8
		sub: subj01 fetching condition trials in session: 9
		sub: subj01 fetching condition trials in session: 10
		sub: subj01 fetching condition trials in session: 11
		sub: subj01 fetching condition trials in session: 12
		sub: subj01 fetching condition trials in session: 13
		sub: subj01 fetching condition trials in session: 14
		sub: subj01 fetching condition trials in session: 15
		sub: subj01 fetching condition trials in session: 16
		sub: subj01 fetching condition trials in session: 17
		sub: subj01 fetching condition trials in session: 18
		sub: subj01 fetch

In [18]:
conditions = np.asarray(conditions).ravel()
conditions_bool = [
    True if np.sum(conditions == x) == 3 else False for x in conditions]

conditions_sampled = conditions[conditions_bool]

# find the subject's unique condition list (sample pool)
sample = np.unique(conditions[conditions_bool])
assert sample.shape[0] == 10000

In [8]:
nsda.read_betas('subj01', 1)

array([[-0.6676476 , -1.23361218, -2.19000125, ...,  1.2338618 ,
         0.77526712, -0.19022603],
       [ 1.73149765,  1.11558926,  0.67865771, ...,  1.44248617,
         1.14095104,  1.85204315],
       [-0.43909064, -0.21986611, -0.88706809, ...,  0.20550986,
         0.35769048,  0.1540612 ],
       ...,
       [ 0.83362132,  0.66902828,  1.20667899, ..., -0.97955889,
        -0.35754338,  0.668881  ],
       [ 0.1318295 , -0.77404815,  0.16410784, ...,  0.07227028,
         0.01431777,  1.14696586],
       [-0.05056639, -1.03044128, -0.52647913, ...,  1.32064319,
         1.30511808,  1.71471679]])

In [None]:
betas_mean_file = os.path.join(outpath, f'{subs[0]}_betas_list_{targetspace}_averaged.npy') 
if not os.path.exists(betas_mean_file):
    betas_mean = get_betas(
        nsd_dir, 
        subs[0],
        n_sessions,
        mask=maskdata_long_bool,
        targetspace=targetspace
    )
    print(f'concatenating betas for {subs[0]}')
    betas_mean = np.concatenate(betas_mean, axis=1).astype(np.float32)

    print(f'Now averaging them')
    betas_mean = average_over_conditions(
        betas_mean,
        conditions,
        conditions_sampled
    ).astype(np.float32)

    print(f'Saving conditions averaged betas')
    np.save(betas_mean_file, betas_mean)

else:
    print(f'loading betas for {sub}')
    betas_mean = np.load(betas_mean_file, allow_pickle=True)

		sub: subj01 fetching betas for trials in session: 1
		sub: subj01 fetching betas for trials in session: 2
		sub: subj01 fetching betas for trials in session: 3
		sub: subj01 fetching betas for trials in session: 4
		sub: subj01 fetching betas for trials in session: 5
		sub: subj01 fetching betas for trials in session: 6
		sub: subj01 fetching betas for trials in session: 7
		sub: subj01 fetching betas for trials in session: 8
		sub: subj01 fetching betas for trials in session: 9
		sub: subj01 fetching betas for trials in session: 10
		sub: subj01 fetching betas for trials in session: 11
		sub: subj01 fetching betas for trials in session: 12
		sub: subj01 fetching betas for trials in session: 13
		sub: subj01 fetching betas for trials in session: 14
		sub: subj01 fetching betas for trials in session: 15
		sub: subj01 fetching betas for trials in session: 16
		sub: subj01 fetching betas for trials in session: 17
		sub: subj01 fetching betas for trials in session: 18
		sub: subj01 fetch

In [None]:
# save full ROI
betas_mean = np.load('subj01_betas_list_fsaverage_averaged.npy', allow_pickle=True)

for roi in range(1, 6):
    mask_name = ROIS[roi]
    rdm_file = os.path.join(outpath, f'{subs[0]}_{mask_name}_fullrdm_correlation.npy'
                           )

    if not os.path.exists(rdm_file):

        #array of mask vertices
        vs_mask = maskdata == roi
        print(f'working on ROI: {mask_name}')

        masked_betas = betas_mean[vs_mask, :]

        good_vox = [
            True if np.sum(np.isnan(x)) == 0 else False for x in masked_betas]

        if np.sum(good_vox) != len(good_vox):
            print(f'found some NaN for ROI: {mask_name} - {subs[0]}')

        # prepare for correlation distance
        X = masked_betas.T

        print(f'computing RDM for roi: {mask_name}')
        rdm = pdist(X, metric='correlation')

        if np.any(np.isnan(rdm)):
            raise ValueError

        print(f'saving full rdm for {mask_name} : {subs[0]}')
        np.save(rdm_file, rdm)