In [None]:
from sklearn.feature_selection import SelectKBest, mutual_info_regression
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
INCLUDE_MATRICES_AT_END = True

y = np.load("dependent.npy")
X = np.load("connectomes.npy")

#scaling features
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

# Split the dataset in two parts
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.15)

# Set the parameters by cross-validation
feats = len(X[0])
subs = len(y)

pipe = Pipeline([
    # the reduce_dim stage is populated by the param_grid
    ('reduce_dim', SelectKBest(mutual_info_regression)),
    ('svr', SVR(verbose=10))
])

#setting feature_selection params appropriately
if INCLUDE_MATRICES_AT_END:
    N_FEATURES_OPTIONS = [100]
else:
    N_FEATURES_OPTIONS = [5, "all"]

C_OPTIONS = [0.0001, 0.001, 0.1]
C_OPTIONS.reverse()
param_grid = [
    {
        'reduce_dim__k': N_FEATURES_OPTIONS,
        'svr__kernel': ['linear'],
        'svr__C': C_OPTIONS
    }
]

grid = GridSearchCV(pipe, cv=5, n_jobs=-1, param_grid=param_grid, iid=False, verbose=10)
grid.fit(X_train, y_train)

print("Best parameters set found on development set:")
print()
print(grid.best_params_)
print()
print("Grid scores on development set:")
print()
means = grid.cv_results_['mean_test_score']
stds = grid.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, grid.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on", len(y_train), "subjects.")
print("The scores are computed on", len(y_test), "subjects.")
print()
y_true, y_pred = y_test, grid.predict(X_test)
print(r2_score(y_true, y_pred))


Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done   6 out of  15 | elapsed:  5.6min remaining:  8.4min
[Parallel(n_jobs=-1)]: Done   8 out of  15 | elapsed: 10.9min remaining:  9.5min
[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed: 11.0min remaining:  5.5min
[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed: 11.0min remaining:  2.7min
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 15.7min finished
