# Run for submission

## Constant

In [55]:
SUBJ = "subj08"
DATA_PATH = "../../data.nosync/{}".format(SUBJ)
EXTRACTOR = "resnet50-imagenet1k-v2"
LAYER = ["layer3", "avgpool"]
SAVE_PATH = "./submission.nosync/ridge_resnet50_layer3+avgpool/{}".format(SUBJ)
ALPHA = 1e4

## Load Data

In [56]:
from sklearn.preprocessing import StandardScaler
from src import dataset

feat, l_frmi, r_frmi = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, True)

# normalize
feat = StandardScaler().fit_transform(feat)

print("Feature Shape: {}".format(feat.shape))
print("Left fMRI shape: {}".format(l_frmi.shape))
print("Right fMRI shape: {}".format(r_frmi.shape))

Using fMRI from: ../../data.nosync/subj08/training_split/training_fmri
Using data from: ../../data.nosync/subj08/training_split/training_features
Feature Shape: (8779, 3072)
Left fMRI shape: (8779, 18981)
Right fMRI shape: (8779, 20530)


## Train model

In [57]:
from sklearn.linear_model import Ridge, LinearRegression as Linear
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from src.utils import compute_perason_numpy
import numpy as np

scorer = {
    "median pearson": make_scorer(lambda x, y: np.median(compute_perason_numpy(x, y))),
    "mean pearson": make_scorer(lambda x, y: np.mean(compute_perason_numpy(x, y)))
}

results_left = cross_validate(Ridge(ALPHA), X=feat, y=l_frmi, cv=5, n_jobs=5, return_estimator=True,
                         scoring=scorer, verbose=1)

for k, v in results_left.items():
    if k != "estimator":
        print("<{}>:\t{}".format(k, ",\t".join(["{:.3f}".format(x) for x in v])))


[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done   2 out of   5 | elapsed:   44.5s remaining:  1.1min


<fit_time>:	20.021,	19.946,	20.204,	20.025,	20.073
<score_time>:	23.556,	23.668,	23.403,	23.435,	23.376
<test_median pearson>:	0.241,	0.252,	0.249,	0.247,	0.250
<test_mean pearson>:	0.251,	0.255,	0.254,	0.252,	0.255


[Parallel(n_jobs=5)]: Done   5 out of   5 | elapsed:   45.4s finished


In [58]:
model_left = results_left["estimator"][1]

In [59]:
results_right = cross_validate(Ridge(ALPHA), X=feat, y=r_frmi, cv=5, n_jobs=5, return_estimator=True,
                         scoring=scorer, verbose=1)

for k, v in results_right.items():
    if k != "estimator":
        print("<{}>:\t{}".format(k, ",\t".join(["{:.3f}".format(x) for x in v])))


[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.7min


<fit_time>:	34.343,	33.976,	32.665,	32.747,	34.245
<score_time>:	31.253,	31.398,	32.055,	31.943,	30.627
<test_median pearson>:	0.242,	0.255,	0.252,	0.248,	0.248
<test_mean pearson>:	0.253,	0.260,	0.261,	0.256,	0.261


[Parallel(n_jobs=5)]: Done   5 out of   5 | elapsed:  1.1min finished


In [60]:
model_right = results_right["estimator"][1]

## Prediction

### Load test data

In [61]:
test_feat, _, _ = dataset.get_features(
    DATA_PATH, EXTRACTOR, LAYER, False)

# normalize
test_feat = StandardScaler().fit_transform(test_feat)

print("Test Feature Shape: {}".format(test_feat.shape))

Using data from: ../../data.nosync/subj08/test_split/test_features
Test Feature Shape: (395, 3072)


### Predict

In [53]:
l_fmri_pred = model_left.predict(test_feat)
r_fmri_pred = model_right.predict(test_feat)

## Save

In [54]:
import os
import numpy as np

if not os.path.isdir(SAVE_PATH):
    os.makedirs(SAVE_PATH)

l_fmri_pred = l_fmri_pred.astype(np.float32)
r_fmri_pred = r_fmri_pred.astype(np.float32)

np.save(os.path.join(SAVE_PATH, 'lh_pred_test.npy'), l_fmri_pred)
np.save(os.path.join(SAVE_PATH, 'rh_pred_test.npy'), r_fmri_pred)