# Run for submission

## Constant

In [101]:
import os.path as osp

SUBJ = "subj08"
PATH_TEMPLATE = "../../data.nosync/{}".format(SUBJ)

TRAINING_PATH_TEMPLATE = osp.join(PATH_TEMPLATE, "training_split/training_{}")
TRAINING_FEATURE_PATH_TEMPLATE = TRAINING_PATH_TEMPLATE.format("features")
FEATURE = [
    #osp.join("vit-gpt2-image-captioning", "encoder-last4-pca-512"),
    osp.join("vit-gpt2-image-captioning", "decoder-last4-pca-512"),
    #osp.join("vit-gpt2-image-captioning", "encoder-last3-pca-512"),
    osp.join("vit-gpt2-image-captioning", "decoder-last3-pca-512"),
    #osp.join("vit-gpt2-image-captioning", "encoder-last2-pca-512"),
    osp.join("vit-gpt2-image-captioning", "decoder-last2-pca-512"),
    #osp.join("vit-gpt2-image-captioning", "encoder-last1-pca-512"),
    osp.join("vit-gpt2-image-captioning", "decoder-last1-pca-512"),
    osp.join("resnet50-imagenet1k-v2", "avgpool")
]

TRAIN_FEATURE_PATHS = [osp.join(TRAINING_FEATURE_PATH_TEMPLATE, x) for x in FEATURE]
TESTING_FEATURE_PATH_TEMPLATE = osp.join(PATH_TEMPLATE, "test_split/test_features")
TESTING_FEATURE_PATHS = [osp.join(TESTING_FEATURE_PATH_TEMPLATE, x) for x in FEATURE]
FMRI_PATH = TRAINING_PATH_TEMPLATE.format("fmri")

SAVE_PATH = "./submission.nosync/ridge_res50+vit-gpt2-decoder-last4/{}".format(SUBJ)

## Load Data

In [102]:
from sklearn.preprocessing import StandardScaler
from src import dataset
import numpy as np

feat = dataset.get_features(TRAIN_FEATURE_PATHS)
l_fmri = np.load(osp.join(FMRI_PATH, "lh_training_fmri.npy"))
r_fmri = np.load(osp.join(FMRI_PATH, "rh_training_fmri.npy"))

# normalize
feat = StandardScaler().fit_transform(feat)

print("Feature Shape: {}".format(feat.shape))
print("Left fMRI shape: {}".format(l_fmri.shape))
print("Right fMRI shape: {}".format(r_fmri.shape))

Feature Shape: (8779, 4096)
Left fMRI shape: (8779, 18981)
Right fMRI shape: (8779, 20530)


## Alpha Selection

In [103]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
# from sklearn.metrics import make_scorer
# from src.utils import compute_perason_numpy
# import numpy as np

# grid_search_l = GridSearchCV(Ridge(), param_grid={"alpha": [1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7]}, scoring=make_scorer(
#     lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
# grid_search_l.fit(X=feat, y=l_frmi)

# print("Best Param: {}".format(grid_search_l.best_params_))
# print("Best Score: {}".format(grid_search_l.best_score_))

In [104]:
# grid_search_l.cv_results_

In [105]:
# grid_search_r = GridSearchCV(Ridge(), param_grid={"alpha": [1e5, 2e5, 5e5, 1e6, 2e6, 5e6, 1e7]}, scoring=make_scorer(
#     lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
# grid_search_r.fit(X=feat, y=r_frmi)

# print("Best Param: {}".format(grid_search_r.best_params_))
# print("Best Score: {}".format(grid_search_r.best_score_))

In [106]:
# grid_search_r.cv_results_

## Training

In [107]:
# model_left = Ridge(alpha=grid_search_l.best_params_["alpha"]).fit(X=feat, y=l_frmi)
# model_right = Ridge(alpha=grid_search_r.best_params_["alpha"]).fit(X=feat, y=r_frmi)
model_left = Ridge(alpha=2e4).fit(X=feat, y=l_fmri)
model_right = Ridge(alpha=2e4).fit(X=feat, y=r_fmri)

## Prediction

### Load test data

In [108]:
test_feat = dataset.get_features(TESTING_FEATURE_PATHS)

# normalize
test_feat = StandardScaler().fit_transform(test_feat)

print("Test Feature Shape: {}".format(test_feat.shape))

Test Feature Shape: (395, 4096)


### Predict

In [109]:
l_fmri_pred = model_left.predict(test_feat)
r_fmri_pred = model_right.predict(test_feat)

## Save

In [110]:
import os
import numpy as np

if not os.path.isdir(SAVE_PATH):
    os.makedirs(SAVE_PATH)

l_fmri_pred = l_fmri_pred.astype(np.float32)
r_fmri_pred = r_fmri_pred.astype(np.float32)

np.save(os.path.join(SAVE_PATH, 'lh_pred_test.npy'), l_fmri_pred)
np.save(os.path.join(SAVE_PATH, 'rh_pred_test.npy'), r_fmri_pred)