# CNN Extractor + Regressor

### Constant

In [12]:
SUBJ = "subj02"
DATA_PATH = "../../data.nosync/{}".format(SUBJ)
EXTRACTOR = "resnet50-imagenet1k-v2"
LAYER = ["avgpool"]

## Load data

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from src import dataset

feat, l_frmi, r_frmi = dataset.get_dataset(
    DATA_PATH, EXTRACTOR, LAYER, True)

# normalize
feat = StandardScaler().fit_transform(feat)

print("Feature Shape: {}".format(feat.shape))
print("Left fMRI shape: {}".format(l_frmi.shape))
print("Right fMRI shape: {}".format(r_frmi.shape))


Using fMRI from: ../../data.nosync/subj02/training_split/training_fmri
Using data from: ../../data.nosync/subj02/training_split/training_features
Feature Shape: (9841, 2048)
Left fMRI shape: (9841, 19004)
Right fMRI shape: (9841, 20544)


### Data split

In [14]:
X_train, X_test, l_fmri_train, l_fmri_test, r_fmri_train, r_fmri_test = train_test_split(
    feat, l_frmi, r_frmi, train_size=0.8, random_state=1001)

print("X_train shape: {}".format(X_train.shape))
print("l_fmri_train shape: {}".format(l_fmri_train.shape))
print("r_fmri_train shape: {}".format(r_fmri_train.shape))

print()

print("X_test shape: {}".format(X_test.shape))
print("l_fmri_test shape: {}".format(l_fmri_test.shape))
print("r_fmri_test shape: {}".format(r_fmri_test.shape))


X_train shape: (7872, 2048)
l_fmri_train shape: (7872, 19004)
r_fmri_train shape: (7872, 20544)

X_test shape: (1969, 2048)
l_fmri_test shape: (1969, 19004)
r_fmri_test shape: (1969, 20544)


## Modelling

### Grid Search

In [16]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.linear_model import Ridge
from src.utils import compute_perason_numpy
import numpy as np

grid_search_l = GridSearchCV(Ridge(), param_grid={"alpha": [1e4, 2e4, 5e4, 1e5]}, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_l.fit(X=feat, y=l_frmi)
print("Best Param: {}".format(grid_search_l.best_params_))
print("Best Score: {}".format(grid_search_l.best_score_))

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best Param: {'alpha': 20000.0}
Best Score: 0.3852929937208972


In [17]:
grid_search_l.cv_results_

{'mean_fit_time': array([12.1634912 , 13.20595198, 13.67043443, 15.54170017]),
 'std_fit_time': array([0.04282433, 0.07149546, 0.05646732, 0.44691152]),
 'mean_score_time': array([10.436481  , 10.75479703, 12.25679302, 11.74533134]),
 'std_score_time': array([0.00570412, 0.04838885, 0.05746849, 0.06912549]),
 'param_alpha': masked_array(data=[10000.0, 20000.0, 50000.0, 100000.0],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'alpha': 10000.0},
  {'alpha': 20000.0},
  {'alpha': 50000.0},
  {'alpha': 100000.0}],
 'split0_test_score': array([0.38383485, 0.38636998, 0.3809354 , 0.37065211]),
 'split1_test_score': array([0.38180958, 0.38638901, 0.38125038, 0.37141046]),
 'split2_test_score': array([0.38269097, 0.38774937, 0.38272636, 0.37253378]),
 'split3_test_score': array([0.37658667, 0.38060689, 0.37468729, 0.36381661]),
 'split4_test_score': array([0.3808997 , 0.38534973, 0.38116374, 0.37228272]),
 'mean_test_score': a

In [18]:
grid_search_r = GridSearchCV(Ridge(), param_grid={"alpha": [1e4, 2e4, 5e4, 1e5]}, scoring=make_scorer(
    lambda x, y: np.median(compute_perason_numpy(x, y))), cv=5, n_jobs=5, verbose=1)
grid_search_r.fit(X=feat, y=r_frmi)
print("Best Param: {}".format(grid_search_r.best_params_))
print("Best Score: {}".format(grid_search_r.best_score_))

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best Param: {'alpha': 20000.0}
Best Score: 0.3859487591126066


In [8]:
grid_search_r.cv_results_

{'mean_fit_time': array([15.0408258 , 18.1230875 , 22.76264424, 21.18571   ]),
 'std_fit_time': array([0.73177404, 0.36435809, 0.20707838, 0.09761982]),
 'mean_score_time': array([12.14140244, 16.66475091, 16.72057014, 16.55096927]),
 'std_score_time': array([0.07940256, 0.14161118, 0.17379041, 0.32515282]),
 'param_alpha': masked_array(data=[10000.0, 20000.0, 50000.0, 100000.0],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'alpha': 10000.0},
  {'alpha': 20000.0},
  {'alpha': 50000.0},
  {'alpha': 100000.0}],
 'split0_test_score': array([0.36009968, 0.3666955 , 0.36417732, 0.35575591]),
 'split1_test_score': array([0.36372728, 0.37009067, 0.36735468, 0.35866302]),
 'split2_test_score': array([0.3577856 , 0.36247264, 0.35842713, 0.34885333]),
 'split3_test_score': array([0.36567482, 0.37127636, 0.36845323, 0.35925592]),
 'split4_test_score': array([0.36039091, 0.36448771, 0.35921271, 0.35018619]),
 'mean_test_score': a

### Cross Validate

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.linear_model import Ridge
from sklearn.metrics import make_scorer
import numpy as np
from src.utils import compute_perason_numpy

model_l = cross_validate(Ridge(alpha=5e4), X=feat, y=l_frmi, cv=5, n_jobs=5,
                         scoring=make_scorer(lambda x, y: np.median(compute_perason_numpy(x, y))), verbose=1)

for k, v in model_l.items():
    print("<{}>:\t{}".format(k, ",\t".join(["{:.3f}".format(x) for x in v])))


In [None]:
model_r = cross_validate(Ridge(alpha=5e4), X=feat, y=r_frmi, cv=5, n_jobs=5,
                         scoring=make_scorer(lambda x, y: np.median(compute_perason_numpy(x, y))), verbose=1)

for k, v in model_r.items():
    print("<{}>:\t{}".format(k, ",\t".join(["{:.3f}".format(x) for x in v])))


### Standard Train/Eval

In [9]:
from sklearn.linear_model import Ridge

model_l = Ridge(alpha=5e4).fit(X=X_train, y=l_fmri_train)
y_pred_l = model_l.predict(X_test)

model_r = Ridge(alpha=5e4).fit(X=X_train, y=r_fmri_train)
y_pred_r = model_r.predict(X_test)


In [10]:
lh_correlation = compute_perason_numpy(y_pred_l, l_fmri_test)
rh_correlation = compute_perason_numpy(y_pred_r, r_fmri_test)


## Visualization

### Dev Pearson's R

In [11]:
from src.visualize import histogram, box_plot

histogram(DATA_PATH, lh_correlation, rh_correlation,
          "{}, Ridge, {}-{}".format(SUBJ, EXTRACTOR, "+".join(LAYER)))


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
box_plot(DATA_PATH, lh_correlation, rh_correlation,
         "{}, Ridge, {}-{}".format(
             SUBJ, EXTRACTOR, "+".join(LAYER)))


### Train Pearson's R

In [None]:
y_pred_l_train = model_l.predict(X_train)
y_pred_r_train = model_r.predict(X_train)

In [None]:
lh_correlation_train = compute_perason_numpy(y_pred_l_train, l_fmri_train)
rh_correlation_train = compute_perason_numpy(y_pred_r_train, r_fmri_train)

In [None]:
histogram(DATA_PATH, lh_correlation_train, rh_correlation_train,
          "{}, Ridge, {}-{}, TRAIN".format(SUBJ, EXTRACTOR, "+".join(LAYER)))


In [None]:
box_plot(DATA_PATH, lh_correlation_train, rh_correlation_train,
         "{}, Ridge, {}-{}, TRAIN".format(SUBJ, EXTRACTOR, "+".join(LAYER)))
