In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import sklearn.linear_model
import sklearn.neighbors

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('talk')

import warnings
warnings.filterwarnings("ignore")

import mlxtend.feature_selection

In [2]:
full_data = pd.read_csv('full_data_short.csv')

In [3]:
train_data = pd.read_csv('train.csv')
val_data = pd.read_csv('val.csv')
test_data = pd.read_csv('test.csv')

In [4]:
list_full_data_columns = list(full_data.columns)
list_train_data_columns = list(train_data.columns)
list_dropped_features = list(set(list_full_data_columns).difference(set(list_train_data_columns)))

In [5]:
print('train_data shape:',np.shape(np.array(train_data)))
print('val_data shape:',np.shape(np.array(val_data)))
print('test_data shape:',np.shape(np.array(test_data)))
print('full_data shape:',np.shape(np.array(full_data)))

train_data shape: (202, 1139)
val_data shape: (36, 1139)
test_data shape: (42, 1139)
full_data shape: (280, 1347)


In [7]:
X_train = train_data.drop(columns='group')
y_train = train_data['group']

X_val = val_data.drop(columns='group')
y_val = val_data['group']

X_test = test_data.drop(columns='group')
y_test = test_data['group']

In [8]:
fixedfeatures = ['A0A075B7F0', 'A0A0J9YWU9', 'AK1C1', 'APOL1', 'B2L12', 'CAH11', 'CHAD', 'CLU', 'CO8A', 'DDAH1', 'ENO1', 'ENPP5', 'GLT10', 'H90B2', 'HLAC', 'HV309', 'HV374', 'HV404', 'HV428', 'HV43D', 'HV601', 'IGLO5', 'KLH21', 'KV127', 'KV240', 'KV311', 'KV621', 'KVD12', 'KVD15', 'KVD30', 'LG3BP', 'LV136', 'LV218', 'LV312', 'LV545', 'LV746', 'LV861', 'LVX54', 'MATN2', 'MMP17', 'NRP1', 'PAI1', 'PDYN', 'PLTP', 'PPIB', 'PPN', 'QSOX1', 'SHSA7', 'SIRB1', 'TAU', 'TEX51', 'TNR21', 'TSP2', 'VGF', 'VTM2B']

In [10]:
len(fixedfeatures)

55

In [13]:
X_train.shape[1]

1138

In [22]:
model = sklearn.linear_model.RidgeClassifier()
sfs_forward = sklearn.feature_selection.SequentialFeatureSelector(model, k_features=570, fixed_features=fixedfeatures,forward=True,verbose=2,n_jobs=-1)
sfs_backward = mlxtend.feature_selection.SequentialFeatureSelector(model, k_features=570, fixed_features=fixedfeatures,forward=False,verbose=2,n_jobs=-1)

In [23]:
sfs_forward = sfs_forward.fit(X_train,y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 444 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 1083 out of 1083 | elapsed:    3.6s finished

[2023-02-26 08:03:48] Features: 56/570 -- score: 0.6778048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 696 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 1082 out of 1082 | elapsed:    2.3s finished

[2023-02-26 08:03:50] Features: 57/570 -- score: 0.7178048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 696 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 1058 out of 1081 | elapsed:    2.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done 1081

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 696 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 1037 out of 1060 | elapsed:    2.5s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 1060 out of 1060 | elapsed:    2.6s finished

[2023-02-26 08:04:48] Features: 79/570 -- score: 0.7725609756097562[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 696 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 1059 out of 1059 | elapsed:    3.0s finished

[2023-02-26 08:04:51] Features: 80/570 -- score: 0.7725609756097562[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 432 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 1058 out of 1058 | elapsed:    3.2s finished

[2023-02-26 08:04:55] Features: 81/

[Parallel(n_jobs=-1)]: Done 432 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 1016 out of 1039 | elapsed:    3.5s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 1039 out of 1039 | elapsed:    3.5s finished

[2023-02-26 08:06:04] Features: 100/570 -- score: 0.7678048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 432 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 1015 out of 1038 | elapsed:    3.6s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 1038 out of 1038 | elapsed:    3.6s finished

[2023-02-26 08:06:08] Features: 101/570 -- score: 0.7678048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 432 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 1037 out of 1037 | elapsed:    3.7s finished

[2023-02-26 

[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 996 out of 1019 | elapsed:    5.8s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done 1019 out of 1019 | elapsed:    6.0s finished

[2023-02-26 08:07:42] Features: 120/570 -- score: 0.7429268292682927[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 1018 out of 1018 | elapsed:    6.1s finished

[2023-02-26 08:07:48] Features: 121/570 -- score: 0.7380487804878049[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 658 tasks      | e

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    6.9s finished

[2023-02-26 08:09:48] Features: 139/570 -- score: 0.7478048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 976 out of 999 | elapsed:    6.7s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 999 out of 999 | elapsed:    7.0s finished

[2023-02-26 08:09:55] Features: 140/570 -- score: 0.7526829268292683[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elap

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 958 out of 981 | elapsed:    7.6s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 981 out of 981 | elapsed:    7.9s finished

[2023-02-26 08:12:22] Features: 158/570 -- score: 0.733048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 980 out of 980 | elapsed:    8.0s finished

[2023-02-26 08:12:30] Features: 159/570 -- score: 0.7326829268292683[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed

[Parallel(n_jobs=-1)]: Done 963 out of 963 | elapsed:    9.2s finished

[2023-02-26 08:14:57] Features: 176/570 -- score: 0.7182926829268294[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done 962 out of 962 | elapsed:    8.9s finished

[2023-02-26 08:15:06] Features: 177/570 -- score: 0.7231707317073172[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    6.2s
[Parallel(n_jobs=-1)]: Done 938 out of 961 | elapsed:    8.7s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 961 out of 961 | elapsed:    8.9s finished

[2023-02-26 08:15:15] Features: 178/570 -

[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 944 out of 944 | elapsed:    9.7s finished

[2023-02-26 08:17:56] Features: 195/570 -- score: 0.7525609756097561[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 920 out of 943 | elapsed:    9.5s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 943 out of 943 | elapsed:    9.7s finished

[2023-02-26 08:18:06] Features: 196/570 -- score: 0.7525609756097561[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 942 out of 942 | elapse

[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 902 out of 925 | elapsed:    9.9s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 925 out of 925 | elapsed:   10.3s finished

[2023-02-26 08:21:09] Features: 214/570 -- score: 0.7426829268292683[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 924 out of 924 | elapsed:   10.6s finished

[2023-02-26 08:21:20] Features: 215/570 -- score: 0.7378048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 906 out of 906 | elapsed:   11.4s finished

[2023-02-26 08:24:41] Features: 233/570 -- score: 0.7380487804878049[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    8.2s
[Parallel(n_jobs=-1)]: Done 882 out of 905 | elapsed:   11.0s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 905 out of 905 | elapsed:   11.4s finished

[2023-02-26 08:24:52] Features: 234/570 -- score: 0.7429268292682927[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 887 out of 887 | elapsed:   12.4s finished

[2023-02-26 08:28:26] Features: 252/570 -- score: 0.7382926829268293[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done 886 out of 886 | elapsed:   12.2s finished

[2023-02-26 08:28:39] Features: 253/570 -- score: 0.7431707317073171[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.7s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    9.1s
[Paralle

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 868 out of 868 | elapsed:   12.5s finished

[2023-02-26 08:32:26] Features: 271/570 -- score: 0.7529268292682927[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    9.9s
[Parallel(n_jobs=-1)]: Done 844 out of 867 | elapsed:   12.9s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 867 out of 867 | elapsed:   13.2s finished

[2023-02-26 08:32:40] Features: 272/570 -- score: 0.7529268292682927[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 826 out of 849 | elapsed:   13.3s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 849 out of 849 | elapsed:   13.8s finished

[2023-02-26 08:36:38] Features: 290/570 -- score: 0.7531707317073171[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   10.9s
[Parallel(n_jobs=-1)]: Done 848 out of 848 | elapsed:   14.1s finished

[2023-02-26 08:36:53] Features: 291/570 -- score: 0.7580487804878049[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 830 out of 830 | elapsed:   12.9s finished

[2023-02-26 08:40:55] Features: 309/570 -- score: 0.7678048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 806 out of 829 | elapsed:   12.4s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 829 out of 829 | elapsed:   12.9s finished

[2023-02-26 08:41:08] Features: 310/570 -- score: 0.7678048780487805[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   11.7s
[Parallel(n_jobs=-1)]: Done 788 out of 811 | elapsed:   14.0s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 811 out of 811 | elapsed:   14.6s finished

[2023-02-26 08:45:15] Features: 328/570 -- score: 0.7580487804878049[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.7s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   11.8s
[Parallel(n_jobs=-1)]: Done 810 out of 810 | elapsed:   14.8s finished

[2023-02-26 08:45:30] Features: 329/570 -- score: 0.7580487804878049[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapse

[Parallel(n_jobs=-1)]: Done 793 out of 793 | elapsed:   14.6s finished

[2023-02-26 08:49:49] Features: 346/570 -- score: 0.7480487804878048[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 792 out of 792 | elapsed:   14.7s finished

[2023-02-26 08:50:04] Features: 347/570 -- score: 0.7480487804878048[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 791 out of 791 | elapsed:   14.6s finished

[2023-02-26 08:50:19] Features: 348/570 -- score: 0.748170731707317[Parallel(n_jobs=-1)]: Using backend LokyBackend with 1

[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   12.6s
[Parallel(n_jobs=-1)]: Done 774 out of 774 | elapsed:   14.9s finished

[2023-02-26 08:54:46] Features: 365/570 -- score: 0.7679268292682926[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.0s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   12.9s
[Parallel(n_jobs=-1)]: Done 750 out of 773 | elapsed:   14.7s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 773 out of 773 | elapsed:   15.2s finished

[2023-02-26 08:55:01] Features: 366/570 -- score: 0.7679268292682926[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   12.8s
[Parallel(n_jobs=-1)]: Done 732 out of 755 | elapsed:   14.2s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done 755 out of 755 | elapsed:   14.8s finished

[2023-02-26 08:59:48] Features: 384/570 -- score: 0.7679268292682926[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.2s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   12.9s
[Parallel(n_jobs=-1)]: Done 754 out of 754 | elapsed:   14.8s finished

[2023-02-26 09:00:03] Features: 385/570 -- score: 0.7630487804878048[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapse

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   14.2s
[Parallel(n_jobs=-1)]: Done 736 out of 736 | elapsed:   16.3s finished

[2023-02-26 09:04:41] Features: 403/570 -- score: 0.753170731707317[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.8s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   14.4s
[Parallel(n_jobs=-1)]: Done 712 out of 735 | elapsed:   15.5s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done 735 out of 735 | elapsed:   16.2s finished

[2023-02-26 09:04:58] Features: 404/570 -- score: 0.753170731707317[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   14.6s
[Parallel(n_jobs=-1)]: Done 694 out of 717 | elapsed:   15.4s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done 717 out of 717 | elapsed:   16.4s finished

[2023-02-26 09:09:51] Features: 422/570 -- score: 0.763170731707317[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:   14.6s
[Parallel(n_jobs=-1)]: Done 716 out of 716 | elapsed:   16.3s finished

[2023-02-26 09:10:07] Features: 423/570 -- score: 0.763170731707317[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.3s
[Parallel(n_jobs=-1)]: Done 698 out of 698 | elapsed:   16.4s finished

[2023-02-26 09:15:11] Features: 441/570 -- score: 0.7680487804878048[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done 674 out of 697 | elapsed:   16.1s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 697 out of 697 | elapsed:   16.7s finished

[2023-02-26 09:15:28] Features: 442/570 -- score: 0.7680487804878048[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done 696 out of 696 | elapsed:   17.0s finished

[2023-02-26 09:15:45] Features: 443/570 -

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 654 out of 677 | elapsed:   16.2s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 677 out of 677 | elapsed:   16.9s finished

[2023-02-26 09:21:12] Features: 462/570 -- score: 0.7828048780487804[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done 676 out of 676 | elapsed:   17.5s finished

[2023-02-26 09:21:30] Features: 463/570 -- score: 0.7828048780487804[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 652 out of 675 | elapsed:   16.4s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 675 

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 632 out of 655 | elapsed:   17.0s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 655 out of 655 | elapsed:   17.7s finished

[2023-02-26 09:27:55] Features: 484/570 -- score: 0.7876829268292682[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 654 out of 654 | elapsed:   17.1s finished

[2023-02-26 09:28:12] Features: 485/570 -- score: 0.7876829268292682[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 630 out of 653 | elapsed:   17.6s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 653 

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 610 out of 633 | elapsed:   16.8s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 633 out of 633 | elapsed:   17.6s finished

[2023-02-26 09:34:33] Features: 506/570 -- score: 0.7826829268292683[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 632 out of 632 | elapsed:   17.5s finished

[2023-02-26 09:34:51] Features: 507/570 -- score: 0.7876829268292682[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    7.4s
[Parallel(n_jobs=-1)]: Done 608 out of 631 | elapsed:   17.0s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done 631 

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done 588 out of 611 | elapsed:   17.7s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done 611 out of 611 | elapsed:   18.6s finished

[2023-02-26 09:41:31] Features: 528/570 -- score: 0.7876829268292682[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    7.9s
[Parallel(n_jobs=-1)]: Done 610 out of 610 | elapsed:   18.3s finished

[2023-02-26 09:41:49] Features: 529/570 -- score: 0.792560975609756[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 586 out of 609 | elapsed:   17.8s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done 609 o

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 566 out of 589 | elapsed:   18.1s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done 589 out of 589 | elapsed:   19.2s finished

[2023-02-26 09:48:27] Features: 550/570 -- score: 0.8026829268292683[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 588 out of 588 | elapsed:   19.2s finished

[2023-02-26 09:48:46] Features: 551/570 -- score: 0.8026829268292683[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 564 out of 587 | elapsed:   18.2s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done 587 

In [24]:
sfs_forward_list = list(sfs_forward.k_feature_names_)

Train ridge classifier based on only these 500 proteins

In [30]:
X_train500 = train_data[sfs_forward_list]
y_train500 = train_data['group']


X_val500 = val_data[sfs_forward_list]
y_val500 = val_data['group']

X_test500 = test_data[sfs_forward_list]
y_test500 = test_data['group']

In [36]:
model = sklearn.linear_model.RidgeClassifier()
model.fit(X_train500,y_train500)
print('Training score 500 proteins:',model.score(X_train500,y_train500))
print('Val score 500 proteins:',model.score(X_val500,y_val500))
print('Test score 500 proteins:',model.score(X_test500,y_test500))

model = sklearn.linear_model.RidgeClassifier()
model.fit(X_train,y_train)
print('\nTraining score all proteins:',model.score(X_train,y_train))
print('Val score all proteins:',model.score(X_val,y_val))
print('Test score all proteins:',model.score(X_test,y_test))

Training score 500 proteins: 1.0
Val score 500 proteins: 0.6944444444444444
Test score 500 proteins: 0.5

Training score all proteins: 1.0
Val score all proteins: 0.6944444444444444
Test score all proteins: 0.6904761904761905


I thought about trying to exclude or otherwise utilize the lists of the bottom20 performing biomarkers, but it got conceptually murky

In [20]:
sfs_backward = sfs_backward.fit(X_train,y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 204 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 610 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 1060 out of 1083 | elapsed:    9.4s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 1083 out of 1083 | elapsed:    9.5s finished

[2023-02-26 06:41:05] Features: 1137/1077 -- score: 0.7085365853658537[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 252 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 658 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 1082 out of 1082 | elapsed:    7.9s finished

[2023-02-26 06:41:13] Features: 1136/1077 -- score: 0.7185365853658537[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Don