# Run for submission

## Constant

In [41]:
SUBJ = "subj08"
DATA_PATH = "../../data.nosync/{}".format(SUBJ)
EXTRACTOR = "vit-gpt2-image-captioning"
LAYER = ["decoder-last4-pca-512", "decoder-last3-pca-512", "decoder-last2-pca-512", "decoder-last1-pca-512"]
SAVE_PATH = "./submission.nosync/ridge_vit-gpt2-image-captioning_last1234_pca_512/{}".format(SUBJ)

## Load Data

In [42]:
from sklearn.preprocessing import StandardScaler
from src import dataset

feat, l_frmi, r_frmi = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, True)

# normalize
feat = StandardScaler().fit_transform(feat)

print("Feature Shape: {}".format(feat.shape))
print("Left fMRI shape: {}".format(l_frmi.shape))
print("Right fMRI shape: {}".format(r_frmi.shape))

Using fMRI from: ../../data.nosync/subj08/training_split/training_fmri
Using data from: ../../data.nosync/subj08/training_split/training_features
Feature Shape: (8779, 2048)
Left fMRI shape: (8779, 18981)
Right fMRI shape: (8779, 20530)


## Alpha Selection

In [43]:
from sklearn.linear_model import Ridge, LinearRegression as Linear
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from src.utils import compute_perason_numpy
import numpy as np

grid_search_l = GridSearchCV(Ridge(), param_grid={"alpha": [1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7]}, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_l.fit(X=feat, y=l_frmi)

print("Best Param: {}".format(grid_search_l.best_params_))
print("Best Score: {}".format(grid_search_l.best_score_))

Fitting 5 folds for each of 7 candidates, totalling 35 fits




Best Param: {'alpha': 10000000.0}
Best Score: 0.2617783186347155


In [44]:
grid_search_l.cv_results_

{'mean_fit_time': array([11.98431983, 20.23769488, 24.87217784, 19.84432549, 19.56627316,
        19.45886731, 19.79281044]),
 'std_fit_time': array([0.06397782, 0.32832771, 0.24087377, 0.1222267 , 0.12083471,
        0.12612005, 0.12453723]),
 'mean_score_time': array([13.10127616, 17.21644139, 16.81160378, 15.68842735, 15.49573183,
        15.64900746, 15.54087577]),
 'std_score_time': array([0.01852029, 0.11764717, 0.43300594, 0.15722604, 0.14220232,
        0.15078244, 0.56660592]),
 'param_alpha': masked_array(data=[100000.0, 200000.0, 500000.0, 1000000.0, 2000000.0,
                    5000000.0, 10000000.0],
              mask=[False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'alpha': 100000.0},
  {'alpha': 200000.0},
  {'alpha': 500000.0},
  {'alpha': 1000000.0},
  {'alpha': 2000000.0},
  {'alpha': 5000000.0},
  {'alpha': 10000000.0}],
 'split0_test_score': array([0.25203889, 0.25362818, 0.25458044, 0.25495555, 0

In [45]:
grid_search_r = GridSearchCV(Ridge(), param_grid={"alpha": [1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7]}, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_r.fit(X=feat, y=r_frmi)

print("Best Param: {}".format(grid_search_r.best_params_))
print("Best Score: {}".format(grid_search_r.best_score_))

Fitting 5 folds for each of 7 candidates, totalling 35 fits
Best Param: {'alpha': 10000000.0}
Best Score: 0.2650124395278021


In [46]:
grid_search_r.cv_results_

{'mean_fit_time': array([21.8956418 , 22.81348033, 22.52295933, 21.84492574, 20.83361702,
        35.86053305, 35.38794436]),
 'std_fit_time': array([0.04153754, 0.01148521, 0.01281831, 0.04212305, 0.02190151,
        0.14818029, 0.09269443]),
 'mean_score_time': array([17.838237  , 21.84109478, 17.26934905, 17.04161086, 21.40584178,
        26.7802515 , 21.01015644]),
 'std_score_time': array([0.02747907, 0.12125869, 0.04329248, 0.02899679, 0.13980138,
        0.10735089, 0.07776297]),
 'param_alpha': masked_array(data=[100000.0, 200000.0, 500000.0, 1000000.0, 2000000.0,
                    5000000.0, 10000000.0],
              mask=[False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'alpha': 100000.0},
  {'alpha': 200000.0},
  {'alpha': 500000.0},
  {'alpha': 1000000.0},
  {'alpha': 2000000.0},
  {'alpha': 5000000.0},
  {'alpha': 10000000.0}],
 'split0_test_score': array([0.25309874, 0.25454403, 0.25549577, 0.25577936, 0

## Training

In [47]:
model_left = Ridge(alpha=grid_search_l.best_params_["alpha"]).fit(X=feat, y=l_frmi)
model_right = Ridge(alpha=grid_search_r.best_params_["alpha"]).fit(X=feat, y=r_frmi)

## Prediction

### Load test data

In [48]:
test_feat, _, _ = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, False)

# normalize
test_feat = StandardScaler().fit_transform(test_feat)

print("Test Feature Shape: {}".format(test_feat.shape))

Using data from: ../../data.nosync/subj08/test_split/test_features
Test Feature Shape: (395, 2048)


### Predict

In [49]:
l_fmri_pred = model_left.predict(test_feat)
r_fmri_pred = model_right.predict(test_feat)

## Save

In [50]:
import os
import numpy as np

if not os.path.isdir(SAVE_PATH):
    os.makedirs(SAVE_PATH)

l_fmri_pred = l_fmri_pred.astype(np.float32)
r_fmri_pred = r_fmri_pred.astype(np.float32)

np.save(os.path.join(SAVE_PATH, 'lh_pred_test.npy'), l_fmri_pred)
np.save(os.path.join(SAVE_PATH, 'rh_pred_test.npy'), r_fmri_pred)