# CNN Extractor + Regressor

### Constant

In [1]:
SUBJ = "subj01"
DATA_PATH = "../../data.nosync/{}".format(SUBJ)
EXTRACTOR = "vit-gpt2-image-captioning"
LAYER = ["encoder"]

## Load data

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src import dataset

feat, l_frmi, r_frmi = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, True)

# normalize
feat = StandardScaler().fit_transform(feat)

print("Feature Shape: {}".format(feat.shape))
print("Left fMRI shape: {}".format(l_frmi.shape))
print("Right fMRI shape: {}".format(r_frmi.shape))


Using fMRI from: ../../data.nosync/subj01/training_split/training_fmri
Using data from: ../../data.nosync/subj01/training_split/training_features
Feature Shape: (9841, 2048)
Left fMRI shape: (9841, 19004)
Right fMRI shape: (9841, 20544)


In [27]:
import numpy as np

covar = np.corrcoef(feat, rowvar=False)

In [28]:
covar.shape

(2048, 2048)

### Data split

In [3]:
X_train, X_test, l_fmri_train, l_fmri_test, r_fmri_train, r_fmri_test = train_test_split(
    feat, l_frmi, r_frmi, train_size=0.8, random_state=1001)

print("X_train shape: {}".format(X_train.shape))
print("l_fmri_train shape: {}".format(l_fmri_train.shape))
print("r_fmri_train shape: {}".format(r_fmri_train.shape))

print()

print("X_test shape: {}".format(X_test.shape))
print("l_fmri_test shape: {}".format(l_fmri_test.shape))
print("r_fmri_test shape: {}".format(r_fmri_test.shape))


X_train shape: (7872, 2048)
l_fmri_train shape: (7872, 19004)
r_fmri_train shape: (7872, 20544)

X_test shape: (1969, 2048)
l_fmri_test shape: (1969, 19004)
r_fmri_test shape: (1969, 20544)


## Modelling

### Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.linear_model import Ridge
from src.utils import compute_perason_numpy
import numpy as np

grid_search_l = GridSearchCV(Ridge(), param_grid={"alpha": [1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10]}, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_l.fit(X=feat, y=l_frmi)
print("Best Param: {}".format(grid_search_l.best_params_))
print("Best Score: {}".format(grid_search_l.best_score_))

In [None]:
grid_search_l.cv_results_

In [None]:
grid_search_r = GridSearchCV(Ridge(), param_grid={"alpha": [1e1, 1e2, 1e3, 1e4, 1e5]}, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_r.fit(X=feat, y=r_frmi)
print("Best Param: {}".format(grid_search_r.best_params_))
print("Best Score: {}".format(grid_search_r.best_score_))

In [None]:
grid_search_r.cv_results_

### Cross Validate

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.linear_model import Ridge, LinearRegression as Linear, Lasso
from sklearn.metrics import make_scorer
import numpy as np
from src.utils import compute_perason_numpy

model_l = cross_validate(Ridge(alpha=2e4), X=feat, y=l_frmi, cv=5, n_jobs=5,
                         scoring=make_scorer(lambda x, y: np.median(compute_perason_numpy(x, y))), verbose=1)

for k, v in model_l.items():
    print("<{}>:\t{}".format(k, ",\t".join(["{:.3f}".format(x) for x in v])))


In [None]:
model_r = cross_validate(Ridge(alpha=2e4), X=feat, y=r_frmi, cv=5, n_jobs=5,
                         scoring=make_scorer(lambda x, y: np.median(compute_perason_numpy(x, y))), verbose=1)

for k, v in model_r.items():
    print("<{}>:\t{}".format(k, ",\t".join(["{:.3f}".format(x) for x in v])))


### Standard Train/Eval

In [21]:
from sklearn.linear_model import Ridge

model_l = Ridge(alpha=1e11).fit(X=X_train, y=l_fmri_train)
y_pred_l = model_l.predict(X_test)

model_r = Ridge(alpha=1e11).fit(X=X_train, y=r_fmri_train)
y_pred_r = model_r.predict(X_test)


In [17]:
from src.utils import compute_perason_numpy

lh_correlation = compute_perason_numpy(y_pred_l, l_fmri_test)
rh_correlation = compute_perason_numpy(y_pred_r, r_fmri_test)

## Visualization

### Dev Pearson's R

In [18]:
from src.visualize import histogram, box_plot

img_save = "./img/ridge_vit-gpt2-image-captioning_encoder_alpha_1e4"

histogram(DATA_PATH, lh_correlation, rh_correlation,
          "{}, Ridge(alpha={}), {}-{}".format(SUBJ, 1e4, EXTRACTOR, "+".join(LAYER)))



Mean of empty slice.


invalid value encountered in scalar divide



In [None]:
box_plot(DATA_PATH, lh_correlation, rh_correlation, "{}, Ridge(alpha={}), {}-{}".format(SUBJ,
         1e4, EXTRACTOR)


### Train Pearson's R

In [19]:
y_pred_l_train = model_l.predict(X_train)
y_pred_r_train = model_r.predict(X_train)

In [20]:
lh_correlation_train = compute_perason_numpy(y_pred_l_train, l_fmri_train)
rh_correlation_train = compute_perason_numpy(y_pred_r_train, r_fmri_train)

In [22]:
histogram(DATA_PATH, lh_correlation_train, rh_correlation_train,
          "{}, Ridge(alpha={}), {}-{}, TRAIN".format(SUBJ, 1e4, EXTRACTOR, "+".join(LAYER)))



Mean of empty slice.


invalid value encountered in scalar divide



In [None]:
box_plot(DATA_PATH, lh_correlation_train, rh_correlation_train,
         "{}, Ridge, {}-{}, TRAIN".format(SUBJ, EXTRACTOR, "+".join(LAYER)))
