# Integration pipeline of transcriptomics and fluxomics

## Parameter definition

In [1]:
results_path = "path"
permute = "_label_permutation" # "", "_label_permutation", "_feature_permutation";  "" if you do not want permutation

NUMBER_REPETITIONS = 200
NUMBER_TEST_SAMPLES = 5 # per class
NUMBER_TRAIN_SAMPLES = 30 # per class per fold execution
NUMBER_INTERNAL_FOLDS = 5
var_threshold = 0.0 # 0.0 to remove only constant features

## Data loading and minimal preprocessing

In [2]:
import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy.io import loadmat, savemat

random_state = 1
random.seed(random_state)
rng = np.random.RandomState(random_state)

# load fluxomic dataset
fluxes = pd.read_csv("path\maxFluxes.csv")
# load metabolic model
model = loadmat("path\recon2.2.mat")
rxn_list = model["model"][0][0][0]
pathway_list = model["model"][0][0][13]
# check for NANs and for identical rows
print(fluxes.isnull().values.any())
print(fluxes.duplicated(subset=fluxes.columns[:-1]).any())
sample_ids = fluxes["sample_id"]
fluxes = fluxes[fluxes.columns[1:]].to_numpy()
fluxes_length = fluxes.shape[1]
print("Length fluxes: ", fluxes_length)
    
# load transcriptomic dataset
genes = loadmat("path\combined_dataset.mat")
genes = np.transpose(genes["geneData_combat_p"])
print(np.sum(np.isnan(genes)))
uni_genes, _, unique_inverse = np.unique(genes, return_index=True, return_inverse=True, axis=0)
print((uni_genes[unique_inverse] != genes).any()) # because rows are sorted after np.unique
genes_length = genes.shape[1]
print("Length genes: ", genes_length)
gene_list = pd.read_csv("path\gene_exp_final_m.csv")
gene_list = gene_list["hgnc_id"].to_numpy()

# concatenate both datasets
fulldata = np.concatenate((fluxes, genes), axis=1)
fulldata = np.around(fulldata, decimals=6)  # done here because it is element-wise so no data leakage can happen
print(np.sum(np.isnan(fulldata)))
uni_data, _, unique_inverse = np.unique(fulldata, return_index=True, return_inverse=True, axis=0)
print((uni_data[unique_inverse] != fulldata).any())  
print(fulldata.shape)                               
omics_length = fulldata.shape[1]

# load targets
targets = pd.read_csv("path\sampleData.csv")  # ids are in the same order
targets = targets["Status"] == "Tumor"
targets = LabelEncoder().fit_transform(targets)

False
False
Length fluxes:  7785
0
False
Length genes:  12712
0
False
(151, 20497)


## Util definition

In [3]:
# custom transformer for removing the identical features
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin

class RedundantTrimmer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
        
    def fit(self, X, y=None):
        _, unique_indices = np.unique(X, return_index=True, axis=1)  #  sorted, unlike in the MATLAB version
        self.indices = unique_indices
        return self
    
    def transform(self, X, y=None):
        return X[:,self.indices]
    

# function to retrieve the rxns and subsystems used
def getFeatures(features, thresholded, redundant):
    features = features.reshape(-1, 1)
    features = features[remove_indices]
    unique_red = list(set(redundant))
    array = np.zeros(len(unique_red), dtype=object)
    for i in unique_red:
        array[i] = [features[j] for j, k in enumerate(redundant) if k == i]
    return array


# custom implementation of a PLSDA transformer
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from sklearn.cross_decomposition import PLSRegression

class PLSDiscriminantAnalysis(BaseEstimator, ClassifierMixin):
    def __init__(self, max_iter=500, tol=1e-06):
        self.max_iter = max_iter
        self.tol = tol
        
    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        # Build class matrix
        class_matrix = np.zeros((len(y), y.max() + 1))   
        class_matrix[np.arange(len(y)), y] = 1
        # Return the classifier
        self.PLSDA = PLSRegression(n_components=len(self.classes_), max_iter=self.max_iter, tol=self.tol)
        self.PLSDA.fit(X, class_matrix)
        return self
    
    def transform(self, X, y=None):
        # Check if fit() had been called
        check_is_fitted(self)
      
        # Input validation
        X = check_array(X)
      
        return self.PLSDA.transform(X)


## Integration analysis and permutation analysis

In [4]:
from sklearn.utils import shuffle
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, matthews_corrcoef

lambdas = np.logspace(-10, 10, num=10)
tumor_samples = fulldata[targets==1,:]
healthy_samples = fulldata[targets==0,:]
cv = StratifiedKFold(n_splits=NUMBER_INTERNAL_FOLDS, shuffle=True, random_state=random_state)

separate_pipeline = Pipeline(steps=[
    ("var_selection", VarianceThreshold(threshold=var_threshold)), 
    ("reduntant_trim", RedundantTrimmer()), 
    ("plsda", PLSDiscriminantAnalysis()) # standardises automatically
])

pipe = Pipeline(steps=[
    ("oversampling", RandomOverSampler(sampling_strategy="minority", random_state=random_state)), 
    ("undersampling", RandomUnderSampler(sampling_strategy={0:NUMBER_TRAIN_SAMPLES, 1:NUMBER_TRAIN_SAMPLES}, 
                                         random_state=random_state)),
    ("separate_pipelines", ColumnTransformer([
        ("fluxes", separate_pipeline, list(range(fluxes_length))), # fluxes
        ("genes", separate_pipeline, list(range(fluxes_length, omics_length))) #genes
    ])),
    ("standardise", StandardScaler()), 
    ('svc', SVC(kernel="linear", random_state=rng))
])


params = {
    "svc__C": lambdas
}

accuracies = np.zeros(NUMBER_REPETITIONS)
MCCs = np.zeros(NUMBER_REPETITIONS)
predictors =  np.zeros(NUMBER_REPETITIONS, dtype=object)
pathways =  np.zeros(NUMBER_REPETITIONS, dtype=object)
genes =  np.zeros(NUMBER_REPETITIONS, dtype=object)
plsda_scores_f = np.zeros(NUMBER_REPETITIONS, dtype=object)
plsda_scores_g = np.zeros(NUMBER_REPETITIONS, dtype=object)
Ytrue = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
Ypred = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
testSamples = np.zeros((NUMBER_REPETITIONS, NUMBER_TEST_SAMPLES * 2))
betas = list()

for rep in range(NUMBER_REPETITIONS):
    test_idx1 = random.sample(range(len(healthy_samples)), NUMBER_TEST_SAMPLES)
    test_idx2 = random.sample(range(len(tumor_samples)), NUMBER_TEST_SAMPLES)  
    testSamples[rep] = np.concatenate((test_idx1, test_idx2)) + 1 # for use in MATLAB
       
    Xtest = np.concatenate((healthy_samples[test_idx1,:], tumor_samples[test_idx2,:]))
    ytest = [i for i in range(2) for _ in range(NUMBER_TEST_SAMPLES)]
    
    train_healthy = np.delete(healthy_samples, test_idx1, axis=0)
    train_tumor = np.delete(tumor_samples, test_idx2, axis=0)
    Xtrain = np.concatenate((train_healthy, train_tumor))
    Xtrain, ytrain = shuffle(Xtrain, np.concatenate(([0] * len(train_healthy), [1] * len(train_tumor))), 
                             random_state=random_state)
    
    if permute == "_label_permutation":
        ys = np.concatenate((ytrain, ytest))
        random.shuffle(ys)
        ytrain, ytest = ys[:len(ytrain)], ys[-len(ytest):]
    elif permute == "_feature_permutation":
        raise NotImplementedError
    else:
        pass
    
    gs = GridSearchCV(pipe, param_grid=params, cv=cv, refit=True, n_jobs=-3) 
    gs.fit(Xtrain, ytrain)  
    print("Run", rep + 1, gs.best_params_, gs.best_score_) 
    ypred = gs.predict(Xtest)
    accuracies[rep] = accuracy_score(ytest, ypred)
    MCCs[rep] = matthews_corrcoef(ytest, ypred)
    Ytrue[rep] = ytest
    Ypred[rep] = ypred
    
    Xtrain = Xtrain[gs.best_estimator_["oversampling"].sample_indices_, :]
    Xtrain = Xtrain[gs.best_estimator_["undersampling"].sample_indices_, :]
    # fluxes
    flux_train = Xtrain[:, :fluxes_length]
    remove_indices = gs.best_estimator_["separate_pipelines"].transformers_[0][1]["var_selection"].get_support()
    flux_train = flux_train[:,remove_indices]
    _, _, collapse_indices = np.unique(flux_train, return_index=True, return_inverse=1, axis=1)    
    predictors[rep] = getFeatures(rxn_list, remove_indices, collapse_indices)
    pathways[rep] = getFeatures(pathway_list, remove_indices, collapse_indices)
    plsda_scores_f[rep] = gs.best_estimator_["separate_pipelines"].transformers_[0][1]["plsda"].PLSDA.x_rotations_
    # genes
    genes_train = Xtrain[:, fluxes_length:]
    remove_indices = gs.best_estimator_["separate_pipelines"].transformers_[1][1]["var_selection"].get_support()
    genes_train = genes_train[:,remove_indices]
    _, _, collapse_indices = np.unique(genes_train, return_index=True, return_inverse=1, axis=1) 
    genes[rep] = getFeatures(gene_list, remove_indices, collapse_indices)
    plsda_scores_g[rep] = gs.best_estimator_["separate_pipelines"].transformers_[1][1]["plsda"].PLSDA.x_rotations_
    betas.append({"Beta": gs.best_estimator_["svc"].coef_[0]})
    
print("Average accuracy: {} with standard deviation: {}".format(np.mean(accuracies), np.std(accuracies)))
print("Average MCC: {} with standard deviation: {}".format(np.mean(MCCs), np.std(MCCs)))

Run 1 {'svc__C': 2154.4346900318865} 0.7100985221674877
Run 2 {'svc__C': 1e-10} 0.7027093596059113
Run 3 {'svc__C': 2154.4346900318865} 0.7514778325123153
Run 4 {'svc__C': 0.07742636826811278} 0.5810344827586207
Run 5 {'svc__C': 0.07742636826811278} 0.723152709359606
Run 6 {'svc__C': 12.915496650148826} 0.6874384236453202
Run 7 {'svc__C': 2154.4346900318865} 0.7310344827586206
Run 8 {'svc__C': 2154.4346900318865} 0.666256157635468
Run 9 {'svc__C': 0.07742636826811278} 0.617487684729064
Run 10 {'svc__C': 12.915496650148826} 0.6874384236453202
Run 11 {'svc__C': 1e-10} 0.7591133004926107


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 12 {'svc__C': 0.07742636826811278} 0.6958128078817734
Run 13 {'svc__C': 0.07742636826811278} 0.7238916256157635
Run 14 {'svc__C': 12.915496650148826} 0.6889162561576355
Run 15 {'svc__C': 59948425.03189421} 0.6886699507389162


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 16 {'svc__C': 0.07742636826811278} 0.6876847290640394
Run 17 {'svc__C': 0.07742636826811278} 0.7241379310344828
Run 18 {'svc__C': 1e-10} 0.666256157635468


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 19 {'svc__C': 0.07742636826811278} 0.7017241379310345


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 20 {'svc__C': 0.07742636826811278} 0.6736453201970443
Run 21 {'svc__C': 0.07742636826811278} 0.6741379310344827
Run 22 {'svc__C': 359381.36638046405} 0.6812807881773398
Run 23 {'svc__C': 2154.4346900318865} 0.6022167487684729
Run 24 {'svc__C': 0.07742636826811278} 0.6820197044334976
Run 25 {'svc__C': 10000000000.0} 0.617487684729064
Run 26 {'svc__C': 0.07742636826811278} 0.6463054187192118
Run 27 {'svc__C': 0.07742636826811278} 0.6800492610837439


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 28 {'svc__C': 12.915496650148826} 0.5672413793103448


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 29 {'svc__C': 0.07742636826811278} 0.6657635467980295
Run 30 {'svc__C': 0.07742636826811278} 0.6817733990147785
Run 31 {'svc__C': 0.07742636826811278} 0.6667487684729064
Run 32 {'svc__C': 1e-10} 0.6891625615763546
Run 33 {'svc__C': 0.07742636826811278} 0.6448275862068965
Run 34 {'svc__C': 0.07742636826811278} 0.7241379310344828
Run 35 {'svc__C': 12.915496650148826} 0.6258620689655172


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 36 {'svc__C': 59948425.03189421} 0.6665024630541871
Run 37 {'svc__C': 0.07742636826811278} 0.6657635467980295


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 38 {'svc__C': 0.07742636826811278} 0.6810344827586208
Run 39 {'svc__C': 1e-10} 0.6677339901477832


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 40 {'svc__C': 0.07742636826811278} 0.7307881773399015
Run 41 {'svc__C': 2154.4346900318865} 0.6805418719211823
Run 42 {'svc__C': 0.07742636826811278} 0.6674876847290641
Run 43 {'svc__C': 0.07742636826811278} 0.7096059113300492
Run 44 {'svc__C': 0.07742636826811278} 0.6465517241379309


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 45 {'svc__C': 59948425.03189421} 0.638177339901478
Run 46 {'svc__C': 0.07742636826811278} 0.6669950738916257
Run 47 {'svc__C': 0.07742636826811278} 0.65935960591133


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 48 {'svc__C': 0.07742636826811278} 0.6241379310344828


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 49 {'svc__C': 0.07742636826811278} 0.6514778325123153
Run 50 {'svc__C': 0.07742636826811278} 0.7243842364532019
Run 51 {'svc__C': 0.07742636826811278} 0.738423645320197
Run 52 {'svc__C': 0.07742636826811278} 0.6736453201970444
Run 53 {'svc__C': 0.07742636826811278} 0.6320197044334975


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 54 {'svc__C': 0.07742636826811278} 0.6731527093596059
Run 55 {'svc__C': 0.07742636826811278} 0.6450738916256158
Run 56 {'svc__C': 12.915496650148826} 0.6386699507389162
Run 57 {'svc__C': 12.915496650148826} 0.737192118226601
Run 58 {'svc__C': 0.07742636826811278} 0.7093596059113301
Run 59 {'svc__C': 12.915496650148826} 0.6884236453201971
Run 60 {'svc__C': 0.07742636826811278} 0.6874384236453202


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 61 {'svc__C': 0.07742636826811278} 0.6733990147783252
Run 62 {'svc__C': 0.07742636826811278} 0.7310344827586206
Run 63 {'svc__C': 0.07742636826811278} 0.6381773399014777
Run 64 {'svc__C': 0.07742636826811278} 0.6879310344827585


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 65 {'svc__C': 2154.4346900318865} 0.6886699507389162
Run 66 {'svc__C': 0.07742636826811278} 0.6657635467980295
Run 67 {'svc__C': 359381.36638046405} 0.5603448275862069


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 68 {'svc__C': 0.07742636826811278} 0.6529556650246305
Run 69 {'svc__C': 0.07742636826811278} 0.6738916256157635
Run 70 {'svc__C': 1e-10} 0.7172413793103448
Run 71 {'svc__C': 0.07742636826811278} 0.7169950738916256
Run 72 {'svc__C': 2154.4346900318865} 0.7236453201970443
Run 73 {'svc__C': 0.07742636826811278} 0.6817733990147785
Run 74 {'svc__C': 2154.4346900318865} 0.730295566502463


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 75 {'svc__C': 0.07742636826811278} 0.7165024630541872
Run 76 {'svc__C': 12.915496650148826} 0.6807881773399016
Run 77 {'svc__C': 10000000000.0} 0.6879310344827585
Run 78 {'svc__C': 2154.4346900318865} 0.6524630541871921
Run 79 {'svc__C': 12.915496650148826} 0.6800492610837439


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 80 {'svc__C': 0.07742636826811278} 0.7024630541871921


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 81 {'svc__C': 0.07742636826811278} 0.5958128078817734
Run 82 {'svc__C': 0.07742636826811278} 0.7027093596059113


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 83 {'svc__C': 2154.4346900318865} 0.6810344827586208
Run 84 {'svc__C': 2154.4346900318865} 0.7300492610837439
Run 85 {'svc__C': 359381.36638046405} 0.7017241379310344
Run 86 {'svc__C': 12.915496650148826} 0.6746305418719213
Run 87 {'svc__C': 0.07742636826811278} 0.653448275862069
Run 88 {'svc__C': 0.07742636826811278} 0.7167487684729064
Run 89 {'svc__C': 0.07742636826811278} 0.752463054187192
Run 90 {'svc__C': 0.07742636826811278} 0.6086206896551725
Run 91 {'svc__C': 2154.4346900318865} 0.6100985221674877
Run 92 {'svc__C': 10000000000.0} 0.7512315270935961


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 93 {'svc__C': 2154.4346900318865} 0.6583743842364532


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 94 {'svc__C': 0.07742636826811278} 0.737192118226601
Run 95 {'svc__C': 0.07742636826811278} 0.6889162561576354
Run 96 {'svc__C': 1e-10} 0.6876847290640394
Run 97 {'svc__C': 12.915496650148826} 0.6736453201970444


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 98 {'svc__C': 0.07742636826811278} 0.5967980295566502
Run 99 {'svc__C': 59948425.03189421} 0.6667487684729064


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 100 {'svc__C': 0.07742636826811278} 0.680295566502463
Run 101 {'svc__C': 1e-10} 0.6945812807881773
Run 102 {'svc__C': 0.07742636826811278} 0.6588669950738917
Run 103 {'svc__C': 10000000000.0} 0.6729064039408867
Run 104 {'svc__C': 0.07742636826811278} 0.6876847290640394


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 105 {'svc__C': 12.915496650148826} 0.7379310344827585
Run 106 {'svc__C': 0.07742636826811278} 0.7098522167487685


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 107 {'svc__C': 0.07742636826811278} 0.6529556650246306


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 108 {'svc__C': 10000000000.0} 0.6943349753694582


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 109 {'svc__C': 0.07742636826811278} 0.680049261083744
Run 110 {'svc__C': 0.07742636826811278} 0.6591133004926107


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 111 {'svc__C': 0.07742636826811278} 0.7165024630541872
Run 112 {'svc__C': 0.07742636826811278} 0.730295566502463
Run 113 {'svc__C': 0.07742636826811278} 0.6519704433497537


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 114 {'svc__C': 0.07742636826811278} 0.6807881773399014
Run 115 {'svc__C': 0.07742636826811278} 0.6307881773399014


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 116 {'svc__C': 0.07742636826811278} 0.6603448275862068
Run 117 {'svc__C': 0.07742636826811278} 0.6448275862068966
Run 118 {'svc__C': 0.07742636826811278} 0.7810344827586206
Run 119 {'svc__C': 2154.4346900318865} 0.5889162561576354
Run 120 {'svc__C': 2154.4346900318865} 0.6522167487684729
Run 121 {'svc__C': 0.07742636826811278} 0.7233990147783251
Run 122 {'svc__C': 1e-10} 0.7655172413793103
Run 123 {'svc__C': 0.07742636826811278} 0.7160098522167487
Run 124 {'svc__C': 0.07742636826811278} 0.680295566502463
Run 125 {'svc__C': 59948425.03189421} 0.7450738916256159
Run 126 {'svc__C': 0.07742636826811278} 0.7379310344827585


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 127 {'svc__C': 12.915496650148826} 0.6667487684729064


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 128 {'svc__C': 0.07742636826811278} 0.7519704433497536
Run 129 {'svc__C': 12.915496650148826} 0.6455665024630541
Run 130 {'svc__C': 0.07742636826811278} 0.6310344827586206


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 131 {'svc__C': 0.07742636826811278} 0.7233990147783251
Run 132 {'svc__C': 0.07742636826811278} 0.6463054187192119
Run 133 {'svc__C': 0.07742636826811278} 0.5948275862068966
Run 134 {'svc__C': 0.07742636826811278} 0.6376847290640395


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 135 {'svc__C': 0.07742636826811278} 0.6886699507389162
Run 136 {'svc__C': 0.07742636826811278} 0.7093596059113301
Run 137 {'svc__C': 0.07742636826811278} 0.7443349753694581
Run 138 {'svc__C': 2154.4346900318865} 0.7310344827586206
Run 139 {'svc__C': 59948425.03189421} 0.7231527093596058
Run 140 {'svc__C': 0.07742636826811278} 0.6307881773399014
Run 141 {'svc__C': 0.07742636826811278} 0.7029556650246306
Run 142 {'svc__C': 0.07742636826811278} 0.6527093596059114
Run 143 {'svc__C': 59948425.03189421} 0.6529556650246305


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 144 {'svc__C': 0.07742636826811278} 0.8088669950738916
Run 145 {'svc__C': 0.07742636826811278} 0.6889162561576354
Run 146 {'svc__C': 0.07742636826811278} 0.7160098522167487


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 147 {'svc__C': 0.07742636826811278} 0.6519704433497537
Run 148 {'svc__C': 12.915496650148826} 0.5608374384236454


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 149 {'svc__C': 12.915496650148826} 0.6876847290640394


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 150 {'svc__C': 0.07742636826811278} 0.702216748768473
Run 151 {'svc__C': 0.07742636826811278} 0.6665024630541871
Run 152 {'svc__C': 0.07742636826811278} 0.6667487684729064
Run 153 {'svc__C': 59948425.03189421} 0.7231527093596058


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 154 {'svc__C': 0.07742636826811278} 0.667487684729064
Run 155 {'svc__C': 0.07742636826811278} 0.7093596059113301
Run 156 {'svc__C': 59948425.03189421} 0.6236453201970443
Run 157 {'svc__C': 0.07742636826811278} 0.6600985221674878


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 158 {'svc__C': 0.07742636826811278} 0.602463054187192


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 159 {'svc__C': 12.915496650148826} 0.6876847290640394
Run 160 {'svc__C': 1e-10} 0.7231527093596058
Run 161 {'svc__C': 2154.4346900318865} 0.7093596059113301


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 162 {'svc__C': 12.915496650148826} 0.6603448275862068


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 163 {'svc__C': 0.07742636826811278} 0.6529556650246306
Run 164 {'svc__C': 12.915496650148826} 0.7032019704433499
Run 165 {'svc__C': 359381.36638046405} 0.7100985221674876
Run 166 {'svc__C': 0.07742636826811278} 0.7091133004926109
Run 167 {'svc__C': 12.915496650148826} 0.7310344827586206
Run 168 {'svc__C': 0.07742636826811278} 0.5955665024630542
Run 169 {'svc__C': 0.07742636826811278} 0.6746305418719213
Run 170 {'svc__C': 0.07742636826811278} 0.666256157635468
Run 171 {'svc__C': 0.07742636826811278} 0.6891625615763547
Run 172 {'svc__C': 1e-10} 0.7017241379310345
Run 173 {'svc__C': 0.07742636826811278} 0.6669950738916256
Run 174 {'svc__C': 0.07742636826811278} 0.695320197044335
Run 175 {'svc__C': 0.07742636826811278} 0.5950738916256157
Run 176 {'svc__C': 2154.4346900318865} 0.6876847290640395
Run 177 {'svc__C': 0.07742636826811278} 0.6886699507389162
Run 178 {'svc__C': 0.07742636826811278} 0.6948275862068966
Run 179 {'svc__C': 0.07742636826811278} 0.6820197044334976


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 180 {'svc__C': 0.07742636826811278} 0.6455665024630541
Run 181 {'svc__C': 0.07742636826811278} 0.6657635467980296
Run 182 {'svc__C': 59948425.03189421} 0.6746305418719212
Run 183 {'svc__C': 12.915496650148826} 0.7233990147783251
Run 184 {'svc__C': 0.07742636826811278} 0.7091133004926109
Run 185 {'svc__C': 0.07742636826811278} 0.6955665024630541
Run 186 {'svc__C': 0.07742636826811278} 0.6455665024630541


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 187 {'svc__C': 0.07742636826811278} 0.7448275862068966
Run 188 {'svc__C': 0.07742636826811278} 0.6657635467980295
Run 189 {'svc__C': 0.07742636826811278} 0.6950738916256158
Run 190 {'svc__C': 10000000000.0} 0.6800492610837438
Run 191 {'svc__C': 0.07742636826811278} 0.6741379310344828


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Run 192 {'svc__C': 0.07742636826811278} 0.7386699507389162
Run 193 {'svc__C': 0.07742636826811278} 0.6536945812807883
Run 194 {'svc__C': 0.07742636826811278} 0.6889162561576354
Run 195 {'svc__C': 0.07742636826811278} 0.6943349753694582
Run 196 {'svc__C': 0.07742636826811278} 0.6600985221674877
Run 197 {'svc__C': 0.07742636826811278} 0.6532019704433498
Run 198 {'svc__C': 12.915496650148826} 0.6800492610837439
Run 199 {'svc__C': 1e-10} 0.6305418719211823
Run 200 {'svc__C': 0.07742636826811278} 0.6381773399014777


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Average accuracy: 0.6659999999999999 with standard deviation: 0.1576197957110718
Average MCC: 0.04114296419960483 with standard deviation: 0.2999281853391168


## Result saving

In [5]:
SVM_results = {
    "predicted": Ypred, 
    "true": Ytrue, 
    "predictors": predictors, 
    "pathways": pathways,
    "genes": genes,
    "plsda_scores_f": plsda_scores_f,
    "plsda_scores_g": plsda_scores_g,
    "mdl": betas, 
    "accuracies": accuracies, 
    "MCCs": MCCs,
    "testSamples": testSamples
}

savemat(results_path + r"\Int2SVMl2" + "_results" + permute + ".mat", 
        {"SVM_results": SVM_results}, oned_as="column")