# SVM pipeline for flux analysis and permutation flux analysis

## Parameter definition

In [1]:
results_path = "path"
features = "genes" # "FVA", "genes"
permute = "_label_permutation" # "", "_label_permutation", "_feature_permutation";  "" if you do not want permutation

NUMBER_REPETITIONS = 200
NUMBER_TEST_SAMPLES = 5 # per class
NUMBER_TRAIN_SAMPLES = 30 # per class per fold execution
NUMBER_INTERNAL_FOLDS = 5
var_threshold = 0.0 # 0.0 to remove only constant features
classifier = "SVM" # "SVM", "NN", "RF"

## Data loading and minimal preprocessing

In [2]:
import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy.io import loadmat, savemat

random_state = 1
random.seed(random_state)
rng = np.random.RandomState(random_state)

# load dataset
if features == "FVA":
    data = pd.read_csv("path\maxFluxes.csv")
    # load metabolic model
    model = loadmat("path\recon2.2.mat")
    rxn_list = model["model"][0][0][0]
    pathway_list = model["model"][0][0][13]
    # check for NANs and for identical rows
    print(data.isnull().values.any())
    print(data.duplicated(subset=data.columns[:-1]).any())
    sample_ids = data["sample_id"]
    data = data[data.columns[1:]]
    
    data = data.round(decimals=6)   # done here because it is element-wise so no data leakage can happen
    print(data.isnull().values.any())                        
    print(data.duplicated(subset=data.columns[:-1]).any())
else:
    data = loadmat("path\combined_dataset.mat")
    data = np.transpose(data["geneData_combat_p"])
    print(np.sum(np.isnan(data)))
    uni_data, _, unique_inverse = np.unique(data, return_index=True, return_inverse=True, axis=0)
    print((uni_data[unique_inverse] != data).any()) # because rows are sorted after np.unique
    gene_list = pd.read_csv("path\gene_exp_final_m.csv")
    gene_list = gene_list["hgnc_id"].to_numpy()
    
    data = np.around(data, decimals=6)
    print(np.sum(np.isnan(data)))
    uni_data, _, unique_inverse = np.unique(data, return_index=True, return_inverse=True, axis=0)
    print((uni_data[unique_inverse] != data).any())   

    
targets = pd.read_csv("path\sampleData.csv")  # ids are in the same order
targets = targets["Status"] == "Tumor"
targets = LabelEncoder().fit_transform(targets)

0
False
0
False


## Util definition

In [3]:
# custom transformer for removing the identical features
from sklearn.base import BaseEstimator, TransformerMixin

class RedundantTrimmer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
        
    def fit(self, X, y=None):
        _, unique_indices = np.unique(X, return_index=True, axis=1)  #  sorted, unlike in the MATLAB version
        self.indices = unique_indices
        return self
    
    def transform(self, X, y=None):
        return X[:,self.indices]
    

# function to retrieve the rxns and subsystems used
def getFeatures(features, thresholded, redundant):
    features = features.reshape(-1, 1)
    features = features[remove_indices]
    unique_red = list(set(redundant))
    array = np.zeros(len(unique_red), dtype=object)
    for i in unique_red:
        array[i] = [features[j] for j, k in enumerate(redundant) if k == i]
    return array

## Model building and validation analyses

In [4]:
from sklearn.utils import shuffle
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, matthews_corrcoef

if features == "FVA":
    tumor_samples = data.to_numpy()[targets==1,:]
    healthy_samples = data.to_numpy()[targets==0,:]
else:
    tumor_samples = data[targets==1,:]
    healthy_samples = data[targets==0,:]
cv = StratifiedKFold(n_splits=NUMBER_INTERNAL_FOLDS, shuffle=True, random_state=random_state)

if classifier == "SVM": 
    mdl = SVC(kernel="linear", random_state=rng)
    params = {
        "model__C": np.logspace(-10, 10, num=10)
    }
elif classifier == "NN":
    mdl = MLPClassifier(solver="lbfgs", random_state=rng)
    params = {
        "model__alpha": 10.0 ** -np.arange(1, 7)
    }
else:
    mdl = RandomForestClassifier(random_state=rng)
    params = {
        "model__n_estimators": [10, 100, 500, 1000],
        "model__max_depth": [3, 5, 8, None]
    }

pipe = Pipeline(steps=[
    ("oversampling", RandomOverSampler(sampling_strategy="minority", random_state=random_state)), 
    ("undersampling", RandomUnderSampler(sampling_strategy={0:NUMBER_TRAIN_SAMPLES, 1:NUMBER_TRAIN_SAMPLES}, 
                                         random_state=random_state)),
    ("var_selection", VarianceThreshold(threshold=var_threshold)), 
    ("reduntant_trim", RedundantTrimmer()), 
    ("standardise", StandardScaler()), 
    ('model',  mdl)
])

params = {
    "svc__C": lambdas
}

accuracies = np.zeros(NUMBER_REPETITIONS)
MCCs = np.zeros(NUMBER_REPETITIONS)
Ytrue = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
Ypred = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
testSamples = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
betas = list()
if features == "FVA":
    predictors =  np.zeros(NUMBER_REPETITIONS, dtype=object)
    pathways =  np.zeros(NUMBER_REPETITIONS, dtype=object)
else:
    genes =  np.zeros(NUMBER_REPETITIONS, dtype=object)

for rep in range(NUMBER_REPETITIONS):
    test_idx1 = random.sample(range(len(healthy_samples)), NUMBER_TEST_SAMPLES)
    test_idx2 = random.sample(range(len(tumor_samples)), NUMBER_TEST_SAMPLES)  
    testSamples[rep] = np.concatenate((test_idx1, test_idx2)) + 1 # for use in MATLAB
        
    Xtest = np.concatenate((healthy_samples[test_idx1,:], tumor_samples[test_idx2,:]))
    ytest = [i for i in range(2) for _ in range(NUMBER_TEST_SAMPLES)]
    
    train_healthy = np.delete(healthy_samples, test_idx1, axis=0)
    train_tumor = np.delete(tumor_samples, test_idx2, axis=0)
    Xtrain = np.concatenate((train_healthy, train_tumor))
    Xtrain, ytrain = shuffle(Xtrain, np.concatenate(([0] * len(train_healthy), [1] * len(train_tumor))), 
                             random_state=random_state)
    
    if permute == "_label_permutation":
        ys = np.concatenate((ytrain, ytest))
        random.shuffle(ys)
        ytrain, ytest = ys[:len(ytrain)], ys[-len(ytest):]
    elif permute == "_feature_permutation":
        raise NotImplementedError
    else:
        pass
    
    gs = GridSearchCV(pipe, param_grid=params, cv=cv, refit=True, n_jobs=5) 
    gs.fit(Xtrain, ytrain)  
    print("Run", rep + 1, gs.best_params_, gs.best_score_) 
    ypred = gs.predict(Xtest)
    accuracies[rep] = accuracy_score(ytest, ypred)
    MCCs[rep] = matthews_corrcoef(ytest, ypred)
    Ytrue[rep] = ytest
    Ypred[rep] = ypred
    
    Xtrain = Xtrain[gs.best_estimator_["oversampling"].sample_indices_, :]
    Xtrain = Xtrain[gs.best_estimator_["undersampling"].sample_indices_, :]
    remove_indices = gs.best_estimator_["var_selection"].get_support()
    Xtrain = Xtrain[:,remove_indices]
    _, _, collapse_indices = np.unique(Xtrain, return_index=True, return_inverse=1, axis=1)
    
    if features == "FVA":
        predictors[rep] = getFeatures(rxn_list, remove_indices, collapse_indices)
        pathways[rep] = getFeatures(pathway_list, remove_indices, collapse_indices)
    else:
        genes[rep] = getFeatures(gene_list, remove_indices, collapse_indices)

    if classifier == "SVM":                      
        betas.append({"Beta": gs.best_estimator_["model"].coef_[0]})
    elif classifier == "NN": 
        betas.append({"Beta": np.squeeze(gs.best_estimator_["model"].coefs_[0].dot(gs.best_estimator_["model"].coefs_[1]))})
    else:
        betas.append({"Beta": np.squeeze(np.zeros((Xtrain.shape[0], 1)))}) # too computational expensive to calculate feature
                                                                           # importance     

print("Average accuracy: {} with standard deviation: {}".format(np.mean(accuracies), np.std(accuracies)))
print("Average MCC: {} with standard deviation: {}".format(np.mean(MCCs), np.std(MCCs)))

Run 1 {'svc__C': 0.0004641588833612782} 0.624384236453202
Run 2 {'svc__C': 0.0004641588833612782} 0.6948275862068967
Run 3 {'svc__C': 0.0004641588833612782} 0.744088669950739


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 4 {'svc__C': 0.0004641588833612782} 0.566256157635468
Run 5 {'svc__C': 0.0004641588833612782} 0.6667487684729065
Run 6 {'svc__C': 1e-10} 0.6729064039408867
Run 7 {'svc__C': 0.0004641588833612782} 0.6950738916256156
Run 8 {'svc__C': 0.0004641588833612782} 0.6948275862068966
Run 9 {'svc__C': 1e-10} 0.5958128078817735
Run 10 {'svc__C': 0.0004641588833612782} 0.7019704433497537
Run 11 {'svc__C': 0.0004641588833612782} 0.8009852216748768


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 12 {'svc__C': 0.0004641588833612782} 0.6741379310344828
Run 13 {'svc__C': 0.0004641588833612782} 0.7086206896551724
Run 14 {'svc__C': 0.0004641588833612782} 0.6810344827586208
Run 15 {'svc__C': 0.0004641588833612782} 0.6812807881773401
Run 16 {'svc__C': 0.0004641588833612782} 0.7305418719211823
Run 17 {'svc__C': 0.0004641588833612782} 0.6812807881773398
Run 18 {'svc__C': 0.0004641588833612782} 0.6588669950738917


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 19 {'svc__C': 1e-10} 0.7300492610837439
Run 20 {'svc__C': 0.0004641588833612782} 0.666256157635468
Run 21 {'svc__C': 0.0004641588833612782} 0.6598522167487684
Run 22 {'svc__C': 0.0004641588833612782} 0.7098522167487685
Run 23 {'svc__C': 0.0004641588833612782} 0.624384236453202
Run 24 {'svc__C': 0.07742636826811278} 0.6396551724137931
Run 25 {'svc__C': 0.0004641588833612782} 0.5596059113300493
Run 26 {'svc__C': 0.0004641588833612782} 0.6950738916256157
Run 27 {'svc__C': 0.0004641588833612782} 0.6371921182266009


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 28 {'svc__C': 1e-10} 0.5886699507389161


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 29 {'svc__C': 0.0004641588833612782} 0.6733990147783251
Run 30 {'svc__C': 1e-10} 0.6103448275862069
Run 31 {'svc__C': 1e-10} 0.6524630541871922
Run 32 {'svc__C': 0.0004641588833612782} 0.674384236453202
Run 33 {'svc__C': 0.0004641588833612782} 0.6524630541871921
Run 34 {'svc__C': 0.0004641588833612782} 0.6738916256157635
Run 35 {'svc__C': 0.07742636826811278} 0.5541871921182266


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 36 {'svc__C': 0.0004641588833612782} 0.702463054187192
Run 37 {'svc__C': 0.0004641588833612782} 0.5953201970443349
Run 38 {'svc__C': 0.0004641588833612782} 0.6598522167487685
Run 39 {'svc__C': 0.0004641588833612782} 0.6463054187192119


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 40 {'svc__C': 0.0004641588833612782} 0.6879310344827586
Run 41 {'svc__C': 0.0004641588833612782} 0.5825123152709359
Run 42 {'svc__C': 0.0004641588833612782} 0.6736453201970444
Run 43 {'svc__C': 0.0004641588833612782} 0.6450738916256157
Run 44 {'svc__C': 0.0004641588833612782} 0.6381773399014778
Run 45 {'svc__C': 0.0004641588833612782} 0.680295566502463
Run 46 {'svc__C': 1e-10} 0.6312807881773399
Run 47 {'svc__C': 0.0004641588833612782} 0.7027093596059114
Run 48 {'svc__C': 0.0004641588833612782} 0.58128078817734


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 49 {'svc__C': 0.0004641588833612782} 0.6874384236453203
Run 50 {'svc__C': 1e-10} 0.6963054187192118
Run 51 {'svc__C': 0.0004641588833612782} 0.6322660098522167
Run 52 {'svc__C': 0.0004641588833612782} 0.680295566502463
Run 53 {'svc__C': 0.0004641588833612782} 0.6532019704433497


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 54 {'svc__C': 0.0004641588833612782} 0.6514778325123153
Run 55 {'svc__C': 0.0004641588833612782} 0.7305418719211823
Run 56 {'svc__C': 0.0004641588833612782} 0.6667487684729065
Run 57 {'svc__C': 0.0004641588833612782} 0.6741379310344827
Run 58 {'svc__C': 0.0004641588833612782} 0.6810344827586208
Run 59 {'svc__C': 0.0004641588833612782} 0.7024630541871921


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 60 {'svc__C': 0.0004641588833612782} 0.7088669950738916
Run 61 {'svc__C': 0.0004641588833612782} 0.6948275862068967
Run 62 {'svc__C': 1e-10} 0.7233990147783251
Run 63 {'svc__C': 0.0004641588833612782} 0.6667487684729064
Run 64 {'svc__C': 0.0004641588833612782} 0.702216748768473
Run 65 {'svc__C': 0.0004641588833612782} 0.6238916256157635
Run 66 {'svc__C': 0.0004641588833612782} 0.6591133004926107
Run 67 {'svc__C': 0.0004641588833612782} 0.5884236453201971


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 68 {'svc__C': 0.07742636826811278} 0.6955665024630543
Run 69 {'svc__C': 0.0004641588833612782} 0.6665024630541871
Run 70 {'svc__C': 0.0004641588833612782} 0.6598522167487684
Run 71 {'svc__C': 0.0004641588833612782} 0.7093596059113301
Run 72 {'svc__C': 0.0004641588833612782} 0.7019704433497537
Run 73 {'svc__C': 0.0004641588833612782} 0.6669950738916256
Run 74 {'svc__C': 0.0004641588833612782} 0.6884236453201971


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 75 {'svc__C': 0.0004641588833612782} 0.7514778325123153
Run 76 {'svc__C': 1e-10} 0.6463054187192119
Run 77 {'svc__C': 0.0004641588833612782} 0.6881773399014778
Run 78 {'svc__C': 0.0004641588833612782} 0.6667487684729064
Run 79 {'svc__C': 0.0004641588833612782} 0.6807881773399015


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 80 {'svc__C': 0.0004641588833612782} 0.7231527093596058


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 81 {'svc__C': 0.0004641588833612782} 0.5889162561576355


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 82 {'svc__C': 0.07742636826811278} 0.5891625615763547


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 83 {'svc__C': 0.0004641588833612782} 0.6529556650246306


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 84 {'svc__C': 0.0004641588833612782} 0.7379310344827585
Run 85 {'svc__C': 1e-10} 0.7096059113300492
Run 86 {'svc__C': 0.0004641588833612782} 0.6741379310344827
Run 87 {'svc__C': 0.0004641588833612782} 0.6315270935960591
Run 88 {'svc__C': 0.0004641588833612782} 0.6665024630541871
Run 89 {'svc__C': 0.0004641588833612782} 0.695320197044335
Run 90 {'svc__C': 0.0004641588833612782} 0.6096059113300493
Run 91 {'svc__C': 0.0004641588833612782} 0.5667487684729065
Run 92 {'svc__C': 0.0004641588833612782} 0.7157635467980296
Run 93 {'svc__C': 0.0004641588833612782} 0.6871921182266011
Run 94 {'svc__C': 0.0004641588833612782} 0.7583743842364533
Run 95 {'svc__C': 0.0004641588833612782} 0.7024630541871921
Run 96 {'svc__C': 0.0004641588833612782} 0.6876847290640395
Run 97 {'svc__C': 1e-10} 0.7293103448275862


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 98 {'svc__C': 0.0004641588833612782} 0.6465517241379309
Run 99 {'svc__C': 0.0004641588833612782} 0.581527093596059


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 100 {'svc__C': 0.0004641588833612782} 0.7298029556650245
Run 101 {'svc__C': 1e-10} 0.5889162561576354


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 102 {'svc__C': 0.0004641588833612782} 0.7014778325123153
Run 103 {'svc__C': 0.0004641588833612782} 0.6374384236453201
Run 104 {'svc__C': 0.0004641588833612782} 0.6596059113300493


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 105 {'svc__C': 0.0004641588833612782} 0.7169950738916256
Run 106 {'svc__C': 0.0004641588833612782} 0.6884236453201971


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 107 {'svc__C': 0.0004641588833612782} 0.6879310344827587


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 108 {'svc__C': 0.0004641588833612782} 0.6386699507389162


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 109 {'svc__C': 1e-10} 0.7726600985221674
Run 110 {'svc__C': 0.0004641588833612782} 0.6874384236453203
Run 111 {'svc__C': 0.0004641588833612782} 0.65935960591133
Run 112 {'svc__C': 0.0004641588833612782} 0.6945812807881773


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 113 {'svc__C': 0.0004641588833612782} 0.7019704433497538
Run 114 {'svc__C': 0.0004641588833612782} 0.6960591133004927
Run 115 {'svc__C': 0.0004641588833612782} 0.6596059113300493


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 116 {'svc__C': 0.0004641588833612782} 0.6955665024630542
Run 117 {'svc__C': 0.0004641588833612782} 0.6798029556650247
Run 118 {'svc__C': 0.0004641588833612782} 0.7805418719211822
Run 119 {'svc__C': 0.0004641588833612782} 0.6598522167487685
Run 120 {'svc__C': 1e-10} 0.6953201970443349
Run 121 {'svc__C': 0.0004641588833612782} 0.7315270935960592
Run 122 {'svc__C': 1e-10} 0.7598522167487685
Run 123 {'svc__C': 0.0004641588833612782} 0.7019704433497538
Run 124 {'svc__C': 0.0004641588833612782} 0.6876847290640393
Run 125 {'svc__C': 0.0004641588833612782} 0.7238916256157635
Run 126 {'svc__C': 0.0004641588833612782} 0.7024630541871921


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 127 {'svc__C': 0.0004641588833612782} 0.6241379310344828


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 128 {'svc__C': 0.0004641588833612782} 0.6655172413793103
Run 129 {'svc__C': 0.07742636826811278} 0.6948275862068967
Run 130 {'svc__C': 0.0004641588833612782} 0.6098522167487685
Run 131 {'svc__C': 0.0004641588833612782} 0.7438423645320198
Run 132 {'svc__C': 0.0004641588833612782} 0.6455665024630541
Run 133 {'svc__C': 0.0004641588833612782} 0.595320197044335
Run 134 {'svc__C': 0.0004641588833612782} 0.6869458128078817


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 135 {'svc__C': 0.0004641588833612782} 0.6950738916256157
Run 136 {'svc__C': 0.0004641588833612782} 0.6598522167487685
Run 137 {'svc__C': 0.0004641588833612782} 0.7231527093596058
Run 138 {'svc__C': 0.0004641588833612782} 0.6943349753694581
Run 139 {'svc__C': 0.0004641588833612782} 0.6948275862068966
Run 140 {'svc__C': 0.0004641588833612782} 0.6098522167487685
Run 141 {'svc__C': 0.0004641588833612782} 0.7600985221674877
Run 142 {'svc__C': 0.0004641588833612782} 0.6381773399014778
Run 143 {'svc__C': 0.0004641588833612782} 0.68128078817734


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 144 {'svc__C': 0.0004641588833612782} 0.6886699507389162
Run 145 {'svc__C': 0.0004641588833612782} 0.6598522167487686
Run 146 {'svc__C': 0.0004641588833612782} 0.7229064039408867


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 147 {'svc__C': 0.07742636826811278} 0.5958128078817735
Run 148 {'svc__C': 0.0004641588833612782} 0.5463054187192118


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 149 {'svc__C': 0.0004641588833612782} 0.6450738916256158


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 150 {'svc__C': 1e-10} 0.6317733990147782
Run 151 {'svc__C': 0.0004641588833612782} 0.6093596059113301
Run 152 {'svc__C': 1e-10} 0.7233990147783251
Run 153 {'svc__C': 0.0004641588833612782} 0.6879310344827587


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 154 {'svc__C': 0.0004641588833612782} 0.6458128078817734
Run 155 {'svc__C': 0.0004641588833612782} 0.6810344827586207
Run 156 {'svc__C': 0.0004641588833612782} 0.5810344827586207
Run 157 {'svc__C': 0.07742636826811278} 0.695320197044335


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 158 {'svc__C': 0.0004641588833612782} 0.623399014778325


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 159 {'svc__C': 0.0004641588833612782} 0.7024630541871921
Run 160 {'svc__C': 0.0004641588833612782} 0.7019704433497538
Run 161 {'svc__C': 0.07742636826811278} 0.6381773399014778
Run 162 {'svc__C': 1e-10} 0.653448275862069


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 163 {'svc__C': 0.0004641588833612782} 0.6167487684729064
Run 164 {'svc__C': 0.0004641588833612782} 0.6672413793103449
Run 165 {'svc__C': 0.0004641588833612782} 0.7665024630541872
Run 166 {'svc__C': 0.0004641588833612782} 0.6450738916256158
Run 167 {'svc__C': 0.0004641588833612782} 0.653448275862069
Run 168 {'svc__C': 0.0004641588833612782} 0.5532019704433498
Run 169 {'svc__C': 0.0004641588833612782} 0.6603448275862069
Run 170 {'svc__C': 0.0004641588833612782} 0.6876847290640394
Run 171 {'svc__C': 0.0004641588833612782} 0.6669950738916256
Run 172 {'svc__C': 0.0004641588833612782} 0.7514778325123153
Run 173 {'svc__C': 0.0004641588833612782} 0.6807881773399015
Run 174 {'svc__C': 0.0004641588833612782} 0.7445812807881774
Run 175 {'svc__C': 0.0004641588833612782} 0.5743842364532019
Run 176 {'svc__C': 0.07742636826811278} 0.7736453201970444
Run 177 {'svc__C': 1e-10} 0.6529556650246304
Run 178 {'svc__C': 0.0004641588833612782} 0.666256157635468
Run 179 {'svc__C': 0.0004641588833612782} 0.

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 180 {'svc__C': 0.0004641588833612782} 0.6517241379310346
Run 181 {'svc__C': 0.0004641588833612782} 0.7160098522167487
Run 182 {'svc__C': 0.0004641588833612782} 0.7091133004926109
Run 183 {'svc__C': 0.0004641588833612782} 0.7017241379310345
Run 184 {'svc__C': 0.0004641588833612782} 0.6746305418719213
Run 185 {'svc__C': 0.0004641588833612782} 0.6246305418719211


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 186 {'svc__C': 0.0004641588833612782} 0.6312807881773399


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 187 {'svc__C': 0.0004641588833612782} 0.6940886699507389
Run 188 {'svc__C': 0.0004641588833612782} 0.6386699507389162
Run 189 {'svc__C': 0.0004641588833612782} 0.7236453201970444
Run 190 {'svc__C': 0.0004641588833612782} 0.6598522167487685
Run 191 {'svc__C': 0.0004641588833612782} 0.702216748768473


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 192 {'svc__C': 1e-10} 0.7027093596059114
Run 193 {'svc__C': 0.0004641588833612782} 0.6029556650246305
Run 194 {'svc__C': 0.0004641588833612782} 0.6600985221674878
Run 195 {'svc__C': 0.0004641588833612782} 0.6655172413793103


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 196 {'svc__C': 0.0004641588833612782} 0.6667487684729064
Run 197 {'svc__C': 0.0004641588833612782} 0.6820197044334975
Run 198 {'svc__C': 0.0004641588833612782} 0.6238916256157635
Run 199 {'svc__C': 0.0004641588833612782} 0.7088669950738916
Run 200 {'svc__C': 1e-10} 0.5330049261083744


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Average accuracy: 0.633 with standard deviation: 0.1712045560141435
Average MCC: 0.0017851544578240076 with standard deviation: 0.31436534176836983


## Result saving

In [5]:
if classifier == "SVM": 
    SVM_results = {
        "predicted": Ypred, 
        "true": Ytrue, 
        "mdl": betas, 
        "accuracies": accuracies, 
        "MCCs": MCCs, 
        "testSamples": testSamples
    }

    if features == "FVA":
        SVM_results["predictors"] = predictors
        SVM_results["pathways"] = pathways
    else:
        SVM_results["genes"] = genes
    
    savemat(results_path + r"\SVMl2" + "_results_" + features + permute + ".mat", 
            {"SVM_results": SVM_results}, oned_as="column")
elif classifier == "NN": 
    NN_results = {
        "predicted": Ypred, 
        "true": Ytrue, 
        "mdl": betas, 
        "accuracies": accuracies, 
        "MCCs": MCCs, 
        "testSamples": testSamples
    }

    if features == "FVA":
        NN_results["predictors"] = predictors
        NN_results["pathways"] = pathways
    else:
        NN_results["genes"] = genes
    savemat(results_path + r"\NN" + "_results_" + features + permute + ".mat", 
            {"NN_results": NN_results}, oned_as="column")
else: 
    RF_results = {
        "predicted": Ypred, 
        "true": Ytrue, 
        "mdl": betas, 
        "accuracies": accuracies, 
        "MCCs": MCCs, 
        "testSamples": testSamples
    }

    if features == "FVA":
        RF_results["predictors"] = predictors
        RF_results["pathways"] = pathways
    else:
        RF_results["genes"] = genes
    
    savemat(results_path + r"\RF" + "_results_" + features + permute + ".mat", 
            {"RF_results": RF_results}, oned_as="column")