In [1]:
import argparse
import time
import numpy as np
import pandas as pd
from pathlib import Path
from src import regress

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.model_selection import KFold
import nibabel as nib

In [2]:
def load_neural(out_dir, sid, dataset):
    path = f'{out_dir}/GroupRuns/sub-{sid}/sub-{sid}_{dataset}-data.npy'
    return np.load(path)

In [3]:
def combine_mask_sd(mask, a, b):
    a_sd = np.invert(np.isclose(a.std(axis=0), 0.))
    b_sd = np.invert(np.isclose(b.std(axis=0), 0.))
    arr = np.vstack([mask, a_sd, b_sd])
    return np.all(arr, axis=0)

In [4]:
sid = '04'
data_dir='/Users/emcmaho7/Dropbox/projects/SI_fmri/SIfMRI_analysis/data/raw'
out_dir = '/Users/emcmaho7/Dropbox/projects/SI_fmri/SIfMRI_analysis/data/interim'
mask_dir = '/Users/emcmaho7/Dropbox/projects/SI_fmri/SIfMRI_analysis/data/interim/Reliability'

mask = np.load(f'{mask_dir}/sub-{sid}_set-train_reliability-mask.npy').astype('bool')

In [5]:
X_train = np.load(f'{out_dir}/GenerateModels/annotated_model_set-train.npy')
X_control = np.load(f'{out_dir}/GenerateModels/control_model_conv2_set-train.npy')
X_train = np.hstack([X_train, X_control])

X_test = np.load(f'{out_dir}/GenerateModels/annotated_model_set-test.npy')

y_train = load_neural(out_dir, sid, 'train')
y_test = load_neural(out_dir, sid, 'test')
mask = combine_mask_sd(mask, y_train, y_test)
np.save(f'{out_dir}/VoxelEncodingTest/sub-{sid}_mask.npy', mask)

y_train = y_train[:, mask]
y_test = y_test[:, mask]

In [6]:
X_train, X_test = regress.scale(X_train, X_test)
y_train, y_test = regress.scale(y_train, y_test)
np.save(f'{out_dir}/VoxelEncodingTest/sub-{sid}_y-test.npy', y_test)

In [7]:
alphas = 10. ** np.arange(start=-2., stop=6.)
kf = KFold(n_splits=4, shuffle=True, random_state=0)
clf = RidgeCV(cv=kf, scoring="r2", alphas=alphas)
clf.fit(X_train, y_train)
alpha = clf.alpha_
print(alpha)

1000.0


In [8]:
lr = Ridge(fit_intercept=False, alpha=alpha)
lr.fit(X_train, y_train)

Ridge(alpha=1000.0, fit_intercept=False)

In [9]:
np.save(f'{out_dir}/VoxelEncodingTest/sub-{sid}_betas.npy', lr.coef_[:, :12])

In [10]:
models = {'visual': [0, 1, 2],
          'socialprimitive': [3, 4],
          'social': [5, 6, 7, 8, 9, 10, 11]}
features = pd.read_csv(f'{data_dir}/annotations/annotations.csv').sort_values(by=['video_name']).drop(columns=['video_name']).columns.to_numpy()
for ifeature, feature in enumerate(features):
    models[feature] = ifeature

In [11]:
prediction = dict()
for key in models:
    inds = models[key]
    if type(inds) is list:
        y_pred = X_test[:, inds] @ lr.coef_[:, inds].T
    else:
        y_pred = X_test[:, inds:inds+1] @ lr.coef_[:, inds:inds+1].T
    prediction[key] = y_pred
    np.save(f'{out_dir}/VoxelEncodingTest/sub-{sid}_{key}.npy', y_pred)
print('finished')

finished
