# Integration pipeline of clinical data only

## Parameter definition

In [1]:
results_path = "path"
permute = "_label_permutation" # "", "_label_permutation", "_feature_permutation";  "" if you do not want permutation

NUMBER_REPETITIONS = 200
NUMBER_TEST_SAMPLES = 5 # per class
NUMBER_TRAIN_SAMPLES = 30 # per class per fold execution
NUMBER_INTERNAL_FOLDS = 5
var_threshold = 0.0 # 0.0 to remove only constant features
classifier = "SVM" # "SVM", "NN", "RF"

## Data loading and minimal preprocessing

In [2]:
import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from scipy.io import savemat

random_state = 1
random.seed(random_state)
rng = np.random.RandomState(random_state)

# load age and gender of the samples
sampleData = pd.read_csv("path\sampleData.csv")
ages = sampleData["Age"].to_numpy().reshape(-1, 1)
print("Age, NaNs: ", sum(np.isnan(ages)))
genders = OneHotEncoder(sparse=False).fit_transform(sampleData["Sex"].to_numpy().reshape(-1, 1))
print("Gender, NaNs: ", sum(np.isnan(genders)))

# load targets
targets = sampleData["Status"] == "Tumor" # ids are in the same order
targets = LabelEncoder().fit_transform(targets)

# concatenate all data
data = np.concatenate((ages, genders), axis=1)
uni_data, _, unique_inverse = np.unique(data, return_index=True, return_inverse=True, axis=0)
print((uni_data[unique_inverse] != data).any()) # because of the presence of NaNs, we will take care of them later
print(data.shape)     

Age, NaNs:  [3]
Gender, NaNs:  [0 0]
True
(151, 3)


## Integration analysis and permutation analysis

In [3]:
from sklearn.utils import shuffle
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.impute import KNNImputer
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, matthews_corrcoef

tumor_samples = data[targets==1,:]
healthy_samples = data[targets==0,:]
cv = StratifiedKFold(n_splits=NUMBER_INTERNAL_FOLDS, shuffle=True, random_state=random_state)

if classifier == "SVM":
    mdl = SVC(kernel="linear", random_state=rng)
    params = {
        "model__C": np.logspace(-10, 10, num=10)
    }
elif classifier == "NN":
    mdl = MLPClassifier(solver="lbfgs", random_state=rng)
    params = {
        "model__alpha": 10.0 ** -np.arange(1, 7)
    }
else:
    mdl = RandomForestClassifier(random_state=rng)
    params = {
        "model__n_estimators": [10, 100, 500, 1000],
        "model__max_depth": [3, 5, 8, None]
    }

pipe = Pipeline(steps=[
    ("oversampling", RandomOverSampler(sampling_strategy="minority", random_state=random_state)), 
    ("undersampling", RandomUnderSampler(sampling_strategy={0:NUMBER_TRAIN_SAMPLES, 1:NUMBER_TRAIN_SAMPLES}, 
                                         random_state=random_state)),
    ("separate_pipelines", ColumnTransformer([
        ("ages", KNNImputer(n_neighbors=5, weights="uniform", metric="nan_euclidean"), [0]), # ages
        ("genders", "passthrough", [1, 2])
    ])),
    ("standardise", StandardScaler()), 
    ('model',  mdl)
])


params = {
    "svc__C": lambdas
}

accuracies = np.zeros(NUMBER_REPETITIONS)
MCCs = np.zeros(NUMBER_REPETITIONS)
Ytrue = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
Ypred = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
testSamples = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
betas = list()    

for rep in range(NUMBER_REPETITIONS):
    test_idx1 = random.sample(range(len(healthy_samples)), NUMBER_TEST_SAMPLES)
    test_idx2 = random.sample(range(len(tumor_samples)), NUMBER_TEST_SAMPLES)
    testSamples[rep] = np.concatenate((test_idx1, test_idx2)) + 1 # for use in MATLAB
       
    Xtest = np.concatenate((healthy_samples[test_idx1,:], tumor_samples[test_idx2,:]))
    ytest = [i for i in range(2) for _ in range(NUMBER_TEST_SAMPLES)]
    
    train_healthy = np.delete(healthy_samples, test_idx1, axis=0)
    train_tumor = np.delete(tumor_samples, test_idx2, axis=0)
    Xtrain = np.concatenate((train_healthy, train_tumor))
    Xtrain, ytrain = shuffle(Xtrain, np.concatenate(([0] * len(train_healthy), [1] * len(train_tumor))), 
                             random_state=random_state)
    
    if permute == "_label_permutation":
        ys = np.concatenate((ytrain, ytest))
        random.shuffle(ys)
        ytrain, ytest = ys[:len(ytrain)], ys[-len(ytest):]
    elif permute == "_feature_permutation":
        raise NotImplementedError
    else:
        pass
    
    gs = GridSearchCV(pipe, param_grid=params, cv=cv, refit=True, n_jobs=-1) 
    gs.fit(Xtrain, ytrain)  
    print("Run", rep + 1, gs.best_params_, gs.best_score_) 
    ypred = gs.predict(Xtest)
    accuracies[rep] = accuracy_score(ytest, ypred)
    MCCs[rep] = matthews_corrcoef(ytest, ypred)
    Ytrue[rep] = ytest
    Ypred[rep] = ypred

    if classifier == "SVM":                              
        betas.append({"Beta": gs.best_estimator_["model"].coef_[0]})
    elif classifier == "NN": 
        betas.append({"Beta": np.squeeze(gs.best_estimator_["model"].coefs_[0].dot(gs.best_estimator_["model"].coefs_[1]))})
    else:
        betas.append({"Beta": np.squeeze(np.zeros((Xtrain.shape[0], 1)))}) # too computational expensive to calculate feature
                                                                           # importance
    
print("Average accuracy: {} with standard deviation: {}".format(np.mean(accuracies), np.std(accuracies)))
print("Average MCC: {} with standard deviation: {}".format(np.mean(MCCs), np.std(MCCs)))

Run 1 {'svc__C': 59948425.03189421} 0.5179802955665025
Run 2 {'svc__C': 12.915496650148826} 0.4596059113300493
Run 3 {'svc__C': 1e-10} 0.6187192118226601
Run 4 {'svc__C': 10000000000.0} 0.5958128078817733
Run 5 {'svc__C': 0.07742636826811278} 0.5261083743842365
Run 6 {'svc__C': 359381.36638046405} 0.41847290640394086
Run 7 {'svc__C': 12.915496650148826} 0.49556650246305417
Run 8 {'svc__C': 359381.36638046405} 0.6169950738916257
Run 9 {'svc__C': 59948425.03189421} 0.5598522167487685
Run 10 {'svc__C': 59948425.03189421} 0.7598522167487685
Run 11 {'svc__C': 10000000000.0} 0.5091133004926107
Run 12 {'svc__C': 359381.36638046405} 0.5674876847290641
Run 13 {'svc__C': 1e-10} 0.5243842364532019
Run 14 {'svc__C': 12.915496650148826} 0.4455665024630542
Run 15 {'svc__C': 10000000000.0} 0.6738916256157635
Run 16 {'svc__C': 1e-10} 0.5815270935960591
Run 17 {'svc__C': 59948425.03189421} 0.6241379310344828


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 18 {'svc__C': 359381.36638046405} 0.5608374384236454
Run 19 {'svc__C': 12.915496650148826} 0.49679802955665026
Run 20 {'svc__C': 59948425.03189421} 0.6236453201970443
Run 21 {'svc__C': 10000000000.0} 0.6315270935960591
Run 22 {'svc__C': 12.915496650148826} 0.553448275862069


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 23 {'svc__C': 1e-10} 0.6086206896551725
Run 24 {'svc__C': 359381.36638046405} 0.632512315270936
Run 25 {'svc__C': 359381.36638046405} 0.7231527093596058
Run 26 {'svc__C': 1e-10} 0.5810344827586207
Run 27 {'svc__C': 0.07742636826811278} 0.5886699507389161


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 28 {'svc__C': 359381.36638046405} 0.5391625615763547


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 29 {'svc__C': 359381.36638046405} 0.48251231527093597
Run 30 {'svc__C': 10000000000.0} 0.44408866995073887
Run 31 {'svc__C': 1e-10} 0.5879310344827585
Run 32 {'svc__C': 359381.36638046405} 0.5536945812807882
Run 33 {'svc__C': 0.07742636826811278} 0.4677339901477833
Run 34 {'svc__C': 12.915496650148826} 0.5320197044334976
Run 35 {'svc__C': 59948425.03189421} 0.5807881773399015


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 36 {'svc__C': 1e-10} 0.6184729064039409
Run 37 {'svc__C': 1e-10} 0.4972906403940886
Run 38 {'svc__C': 10000000000.0} 0.6958128078817735
Run 39 {'svc__C': 12.915496650148826} 0.48226600985221674


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 40 {'svc__C': 12.915496650148826} 0.6017241379310345
Run 41 {'svc__C': 59948425.03189421} 0.5524630541871921
Run 42 {'svc__C': 10000000000.0} 0.4972906403940887
Run 43 {'svc__C': 1e-10} 0.5179802955665024
Run 44 {'svc__C': 59948425.03189421} 0.553448275862069
Run 45 {'svc__C': 59948425.03189421} 0.7874384236453202
Run 46 {'svc__C': 12.915496650148826} 0.5317733990147783
Run 47 {'svc__C': 12.915496650148826} 0.6098522167487685
Run 48 {'svc__C': 0.07742636826811278} 0.5748768472906405
Run 49 {'svc__C': 0.07742636826811278} 0.5317733990147783
Run 50 {'svc__C': 10000000000.0} 0.5472906403940886


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 51 {'svc__C': 10000000000.0} 0.46059113300492605
Run 52 {'svc__C': 10000000000.0} 0.5830049261083744
Run 53 {'svc__C': 10000000000.0} 0.5613300492610838


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 54 {'svc__C': 10000000000.0} 0.5307881773399015


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 55 {'svc__C': 1e-10} 0.5758620689655172
Run 56 {'svc__C': 59948425.03189421} 0.5110837438423645


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 57 {'svc__C': 0.07742636826811278} 0.48842364532019705
Run 58 {'svc__C': 359381.36638046405} 0.5406403940886699
Run 59 {'svc__C': 12.915496650148826} 0.5657635467980295
Run 60 {'svc__C': 12.915496650148826} 0.404679802955665
Run 61 {'svc__C': 12.915496650148826} 0.4817733990147784
Run 62 {'svc__C': 359381.36638046405} 0.566256157635468
Run 63 {'svc__C': 1e-10} 0.6876847290640395


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 64 {'svc__C': 10000000000.0} 0.43842364532019706
Run 65 {'svc__C': 1e-10} 0.5596059113300492
Run 66 {'svc__C': 59948425.03189421} 0.5386699507389162
Run 67 {'svc__C': 10000000000.0} 0.4620689655172413


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 68 {'svc__C': 359381.36638046405} 0.4834975369458128
Run 69 {'svc__C': 10000000000.0} 0.5667487684729065
Run 70 {'svc__C': 12.915496650148826} 0.6798029556650247
Run 71 {'svc__C': 0.07742636826811278} 0.6810344827586208
Run 72 {'svc__C': 359381.36638046405} 0.5241379310344827
Run 73 {'svc__C': 12.915496650148826} 0.6482758620689655
Run 74 {'svc__C': 59948425.03189421} 0.454679802955665


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 75 {'svc__C': 10000000000.0} 0.552463054187192
Run 76 {'svc__C': 0.07742636826811278} 0.5958128078817734
Run 77 {'svc__C': 359381.36638046405} 0.4820197044334975


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 78 {'svc__C': 0.07742636826811278} 0.46206896551724147
Run 79 {'svc__C': 10000000000.0} 0.5049261083743841


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 80 {'svc__C': 10000000000.0} 0.5940886699507388


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 81 {'svc__C': 359381.36638046405} 0.5891625615763546
Run 82 {'svc__C': 10000000000.0} 0.4169950738916256


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 83 {'svc__C': 59948425.03189421} 0.5524630541871921
Run 84 {'svc__C': 59948425.03189421} 0.601231527093596
Run 85 {'svc__C': 10000000000.0} 0.6379310344827586
Run 86 {'svc__C': 359381.36638046405} 0.4889162561576355
Run 87 {'svc__C': 10000000000.0} 0.7519704433497537
Run 88 {'svc__C': 10000000000.0} 0.5256157635467981
Run 89 {'svc__C': 12.915496650148826} 0.5817733990147783
Run 90 {'svc__C': 0.07742636826811278} 0.45443349753694584
Run 91 {'svc__C': 59948425.03189421} 0.5539408866995074
Run 92 {'svc__C': 12.915496650148826} 0.5167487684729064
Run 93 {'svc__C': 1e-10} 0.8157635467980295
Run 94 {'svc__C': 10000000000.0} 0.6036945812807882
Run 95 {'svc__C': 1e-10} 0.5665024630541872


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 96 {'svc__C': 59948425.03189421} 0.4916256157635468
Run 97 {'svc__C': 0.07742636826811278} 0.5613300492610838


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 98 {'svc__C': 10000000000.0} 0.4098522167487684


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 99 {'svc__C': 10000000000.0} 0.5830049261083744


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 100 {'svc__C': 59948425.03189421} 0.6519704433497537
Run 101 {'svc__C': 0.07742636826811278} 0.6024630541871921
Run 102 {'svc__C': 0.07742636826811278} 0.5667487684729063
Run 103 {'svc__C': 359381.36638046405} 0.454679802955665
Run 104 {'svc__C': 10000000000.0} 0.5041871921182266


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 105 {'svc__C': 0.07742636826811278} 0.6152709359605911
Run 106 {'svc__C': 59948425.03189421} 0.5913793103448276


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 107 {'svc__C': 10000000000.0} 0.5736453201970444


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 108 {'svc__C': 0.07742636826811278} 0.5236453201970444


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 109 {'svc__C': 0.07742636826811278} 0.42512315270935963
Run 110 {'svc__C': 12.915496650148826} 0.539655172413793


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 111 {'svc__C': 359381.36638046405} 0.4677339901477833
Run 112 {'svc__C': 10000000000.0} 0.5197044334975369
Run 113 {'svc__C': 12.915496650148826} 0.5800492610837439
Run 114 {'svc__C': 1e-10} 0.5455665024630542
Run 115 {'svc__C': 359381.36638046405} 0.3906403940886699


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 116 {'svc__C': 0.07742636826811278} 0.6160098522167488
Run 117 {'svc__C': 10000000000.0} 0.4763546798029557
Run 118 {'svc__C': 1e-10} 0.5891625615763546
Run 119 {'svc__C': 10000000000.0} 0.6231527093596059
Run 120 {'svc__C': 12.915496650148826} 0.44778325123152707
Run 121 {'svc__C': 10000000000.0} 0.7371921182266009
Run 122 {'svc__C': 59948425.03189421} 0.4413793103448276
Run 123 {'svc__C': 0.07742636826811278} 0.44064039408866995
Run 124 {'svc__C': 10000000000.0} 0.6248768472906403
Run 125 {'svc__C': 1e-10} 0.6596059113300493
Run 126 {'svc__C': 59948425.03189421} 0.48940886699507385


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 127 {'svc__C': 59948425.03189421} 0.551231527093596


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 128 {'svc__C': 1e-10} 0.48251231527093597
Run 129 {'svc__C': 10000000000.0} 0.49827586206896557
Run 130 {'svc__C': 10000000000.0} 0.4960591133004926
Run 131 {'svc__C': 10000000000.0} 0.5940886699507388


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 132 {'svc__C': 359381.36638046405} 0.42586206896551726
Run 133 {'svc__C': 1e-10} 0.48275862068965514
Run 134 {'svc__C': 359381.36638046405} 0.4958128078817734


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 135 {'svc__C': 0.07742636826811278} 0.4677339901477833
Run 136 {'svc__C': 0.07742636826811278} 0.5889162561576354
Run 137 {'svc__C': 359381.36638046405} 0.33374384236453203
Run 138 {'svc__C': 59948425.03189421} 0.49408866995073886


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 139 {'svc__C': 1e-10} 0.7088669950738916


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 140 {'svc__C': 10000000000.0} 0.4332512315270936


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 141 {'svc__C': 59948425.03189421} 0.4682266009852218
Run 142 {'svc__C': 12.915496650148826} 0.475615763546798
Run 143 {'svc__C': 1e-10} 0.524630541871921


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 144 {'svc__C': 1e-10} 0.6519704433497537
Run 145 {'svc__C': 359381.36638046405} 0.6093596059113301
Run 146 {'svc__C': 59948425.03189421} 0.5970443349753695


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 147 {'svc__C': 10000000000.0} 0.5258620689655171


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 148 {'svc__C': 0.07742636826811278} 0.6480295566502463


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 149 {'svc__C': 59948425.03189421} 0.5894088669950739


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 150 {'svc__C': 1e-10} 0.4527093596059113
Run 151 {'svc__C': 1e-10} 0.5046798029556651
Run 152 {'svc__C': 10000000000.0} 0.4958128078817734
Run 153 {'svc__C': 12.915496650148826} 0.5891625615763546
Run 154 {'svc__C': 359381.36638046405} 0.594088669950739


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 155 {'svc__C': 10000000000.0} 0.5246305418719212
Run 156 {'svc__C': 0.07742636826811278} 0.5869458128078817
Run 157 {'svc__C': 10000000000.0} 0.5536945812807882


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 158 {'svc__C': 12.915496650148826} 0.5615763546798029


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 159 {'svc__C': 1e-10} 0.5596059113300492
Run 160 {'svc__C': 59948425.03189421} 0.5024630541871921
Run 161 {'svc__C': 10000000000.0} 0.6753694581280788




Run 162 {'svc__C': 12.915496650148826} 0.6517241379310345


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 163 {'svc__C': 359381.36638046405} 0.4403940886699507
Run 164 {'svc__C': 12.915496650148826} 0.5182266009852217
Run 165 {'svc__C': 59948425.03189421} 0.503448275862069
Run 166 {'svc__C': 10000000000.0} 0.5667487684729065
Run 167 {'svc__C': 359381.36638046405} 0.4408866995073891
Run 168 {'svc__C': 0.07742636826811278} 0.45369458128078816
Run 169 {'svc__C': 12.915496650148826} 0.43275862068965515
Run 170 {'svc__C': 1e-10} 0.6105911330049261
Run 171 {'svc__C': 0.07742636826811278} 0.617487684729064
Run 172 {'svc__C': 12.915496650148826} 0.5165024630541872
Run 173 {'svc__C': 59948425.03189421} 0.48251231527093597
Run 174 {'svc__C': 10000000000.0} 0.505911330049261
Run 175 {'svc__C': 0.07742636826811278} 0.6657635467980295
Run 176 {'svc__C': 1e-10} 0.6753694581280788
Run 177 {'svc__C': 10000000000.0} 0.602216748768473
Run 178 {'svc__C': 0.07742636826811278} 0.5113300492610837
Run 179 {'svc__C': 12.915496650148826} 0.5036945812807883


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 180 {'svc__C': 1e-10} 0.6169950738916257


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 181 {'svc__C': 59948425.03189421} 0.6401477832512315
Run 182 {'svc__C': 12.915496650148826} 0.5660098522167487
Run 183 {'svc__C': 59948425.03189421} 0.6662561576354679
Run 184 {'svc__C': 10000000000.0} 0.5972906403940886
Run 185 {'svc__C': 59948425.03189421} 0.5118226600985223
Run 186 {'svc__C': 59948425.03189421} 0.5320197044334976


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 187 {'svc__C': 12.915496650148826} 0.5886699507389161
Run 188 {'svc__C': 59948425.03189421} 0.6330049261083743
Run 189 {'svc__C': 1e-10} 0.49556650246305417
Run 190 {'svc__C': 12.915496650148826} 0.5679802955665025
Run 191 {'svc__C': 0.07742636826811278} 0.4891625615763546


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 192 {'svc__C': 359381.36638046405} 0.5948275862068966
Run 193 {'svc__C': 12.915496650148826} 0.5246305418719212
Run 194 {'svc__C': 359381.36638046405} 0.496551724137931
Run 195 {'svc__C': 12.915496650148826} 0.5467980295566501
Run 196 {'svc__C': 59948425.03189421} 0.6238916256157635
Run 197 {'svc__C': 59948425.03189421} 0.4896551724137931
Run 198 {'svc__C': 59948425.03189421} 0.6298029556650246
Run 199 {'svc__C': 359381.36638046405} 0.5403940886699508
Run 200 {'svc__C': 12.915496650148826} 0.404679802955665
Average accuracy: 0.48200000000000004 with standard deviation: 0.21535087647836496
Average MCC: -0.01598057192093086 with standard deviation: 0.29265075809356844


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


## Result saving

In [4]:
if classifier == "SVM":
    SVM_results = {
        "predicted": Ypred, 
        "true": Ytrue, 
        "mdl": betas, 
        "accuracies": accuracies, 
        "MCCs": MCCs, 
        "testSamples": testSamples
    }

    savemat(results_path + r"\Int2SVMl2" + "_results_" + "clinical_only" + permute + ".mat", 
            {"SVM_results": SVM_results}, oned_as="column")
elif classifier == "NN":
    NN_results = {
        "predicted": Ypred, 
        "true": Ytrue, 
        "mdl": betas, 
        "accuracies": accuracies, 
        "MCCs": MCCs, 
        "testSamples": testSamples
    }

    savemat(results_path + r"\Int2NN" + "_results_" + "clinical_only" + permute + ".mat", 
            {"NN_results": NN_results}, oned_as="column")
else:
    RF_results = {
        "predicted": Ypred, 
        "true": Ytrue, 
        "mdl": betas, 
        "accuracies": accuracies, 
        "MCCs": MCCs, 
        "testSamples": testSamples
    }

    savemat(results_path + r"\Int2RF" + "_results_" + "clinical_only" + permute + ".mat", 
            {"RF_results": RF_results}, oned_as="column")