# Run for submission

## Constant

In [72]:
SUBJ = "subj08"
DATA_PATH = "../../data.nosync/{}".format(SUBJ)
EXTRACTOR = "vit-gpt2-image-captioning"
LAYER = ["encoder-last4-pca-512", "decoder-last4-pca-512", "encoder-last3-pca-512", "decoder-last3-pca-512", "encoder-last2-pca-512", "decoder-last2-pca-512", "encoder-last1-pca-512", "decoder-last1-pca-512"]
SAVE_PATH = "./submission.nosync/ridge_vit-gpt2-image-captioning_encoder+decoder_last1234_pca_512/{}".format(SUBJ)

## Load Data

In [73]:
from sklearn.preprocessing import StandardScaler
from src import dataset

feat, l_frmi, r_frmi = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, True)

# normalize
feat = StandardScaler().fit_transform(feat)

print("Feature Shape: {}".format(feat.shape))
print("Left fMRI shape: {}".format(l_frmi.shape))
print("Right fMRI shape: {}".format(r_frmi.shape))

Using fMRI from: ../../data.nosync/subj08/training_split/training_fmri
Using data from: ../../data.nosync/subj08/training_split/training_features
Feature Shape: (8779, 4096)
Left fMRI shape: (8779, 18981)
Right fMRI shape: (8779, 20530)


## Alpha Selection

In [74]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from src.utils import compute_perason_numpy
import numpy as np

# grid_search_l = GridSearchCV(Ridge(), param_grid={"alpha": [1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7]}, scoring=make_scorer(
#     lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
# grid_search_l.fit(X=feat, y=l_frmi)

# print("Best Param: {}".format(grid_search_l.best_params_))
# print("Best Score: {}".format(grid_search_l.best_score_))

In [75]:
# grid_search_l.cv_results_

In [76]:
# grid_search_r = GridSearchCV(Ridge(), param_grid={"alpha": [1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7]}, scoring=make_scorer(
#     lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
# grid_search_r.fit(X=feat, y=r_frmi)

# print("Best Param: {}".format(grid_search_r.best_params_))
# print("Best Score: {}".format(grid_search_r.best_score_))

In [77]:
# grid_search_r.cv_results_

## Training

In [78]:
# model_left = Ridge(alpha=grid_search_l.best_params_["alpha"]).fit(X=feat, y=l_frmi)
# model_right = Ridge(alpha=grid_search_r.best_params_["alpha"]).fit(X=feat, y=r_frmi)
model_left = Ridge(alpha=2e6).fit(X=feat, y=l_frmi)
model_right = Ridge(alpha=2e6).fit(X=feat, y=r_frmi)

## Prediction

### Load test data

In [79]:
test_feat, _, _ = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, False)

# normalize
test_feat = StandardScaler().fit_transform(test_feat)

print("Test Feature Shape: {}".format(test_feat.shape))

Using data from: ../../data.nosync/subj08/test_split/test_features
Test Feature Shape: (395, 4096)


### Predict

In [80]:
l_fmri_pred = model_left.predict(test_feat)
r_fmri_pred = model_right.predict(test_feat)

## Save

In [81]:
import os
import numpy as np

if not os.path.isdir(SAVE_PATH):
    os.makedirs(SAVE_PATH)

l_fmri_pred = l_fmri_pred.astype(np.float32)
r_fmri_pred = r_fmri_pred.astype(np.float32)

np.save(os.path.join(SAVE_PATH, 'lh_pred_test.npy'), l_fmri_pred)
np.save(os.path.join(SAVE_PATH, 'rh_pred_test.npy'), r_fmri_pred)