### Imports

In [1]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import cobra
import riptide

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

print('Imports Complete')

Imports Complete


### Read transcriptomics file

In [2]:
data = pd.read_csv ('data/matrix.csv')

print(data.shape)
data.head()

(138745, 14)


Unnamed: 0,ensembl_id,ID_REF,EED-Control-1,EED-Control-2,EED-Control-3,EED-ND-1,EED-ND-2,EED-ND-3,Healthy-Control-1,Healthy-Control-2,Healthy-Control-3,Healthy-ND-1,Healthy-ND-2,Healthy-ND-3
0,,AFFX-BkGr-GC03_st,2.50991,2.111326,1.777033,2.309331,1.728352,1.860814,1.867672,1.919417,1.841486,2.13463,1.974421,2.187432
1,,AFFX-BkGr-GC04_st,2.295783,1.90088,1.797043,1.938862,1.621349,1.958245,1.757484,1.827596,1.828458,1.971875,1.976003,2.063035
2,,AFFX-BkGr-GC05_st,2.257641,1.879776,1.755351,1.967621,1.672276,1.901808,1.839612,1.860259,1.871003,1.968703,2.036299,2.033751
3,,AFFX-BkGr-GC06_st,2.323094,1.934329,1.815905,2.030437,1.698393,1.954793,1.822267,1.857776,1.87271,2.000145,2.089354,2.102391
4,,AFFX-BkGr-GC07_st,2.361259,1.94532,1.874545,1.99578,1.718467,1.982185,1.852268,1.898476,1.911582,2.041037,2.106554,2.105908


In [3]:
# Create dataframes of the patients
dfs = {
    'EED': pd.DataFrame(),
    'Healthy': pd.DataFrame()
}

id_map = {
    'EED': ['EED-ND-1','EED-ND-2','EED-ND-3'],
    'Healthy': ['Healthy-Control-1','Healthy-Control-2','Healthy-Control-3']
}

data = data[data['ensembl_id'].notna()]

dfs['EED'] = data[['ensembl_id',
                   'EED-ND-1',
                   'EED-ND-2',
                   'EED-ND-3']]

dfs['Healthy'] = data[['ensembl_id',
                       'Healthy-Control-1',
                       'Healthy-Control-2',
                       'Healthy-Control-3']]

In [4]:
# Gene Map with all the genes present in Recon3D
gene_name_num = pd.read_csv('data/gene_name_number.tsv', sep='\t')
gene_name_num = gene_name_num[['gene_number', 'symbol', 'ensembl_gene']]
gene_name_num

for keys in dfs.keys():
    
    # Merge gene number annotations to dataframe
    dfs[keys] = pd.merge(gene_name_num, dfs[keys], left_on='ensembl_gene', right_on='ensembl_id')

In [5]:
print(dfs['EED'].shape)
print(dfs['Healthy'].shape)

(3585, 7)
(3585, 7)


### Run Riptide for all patients

In [6]:
# Load Model
model = cobra.io.load_matlab_model('data/Recon3D_301.mat')
cobra.util.array.create_stoichiometric_matrix(model).shape

def run_riptide(fname, pat_name):
    
    transcript_abundances = riptide.read_transcription_file(fname,  norm=False)
    riptide_object = riptide.contextualize(model=model, transcriptome=transcript_abundances, fraction=1.)

    riptide_fva = riptide_object.flux_samples
    riptide_fva['sample_number'] = pat_name
    
    return riptide_fva

No defined compartments in model Recon3D. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, g, i, l, m, n, r, x


In [7]:
# Run for each patient
for keys in dfs.keys():
    print('Running for - {}'.format(keys))
    for pat_id in id_map[keys]:
        dfs[keys][['gene_number', pat_id]].to_csv('sample.tsv', header=False, index=False, sep='\t')
        pat_output = run_riptide('sample.tsv', pat_id)
        pat_output.to_csv('output/{}.csv'.format(pat_id), index=False)
        
# Combine flux samples  
for keys in dfs.keys():
    list_of_pat_flux = []
    for pat_id in id_map[keys]:
        list_of_pat_flux.append(pd.read_csv('output/{}.csv'.format(pat_id)))
    pd.concat(list_of_pat_flux, join='inner', ignore_index=0).to_csv(\
    'output/riptide_{}_flux_sample.csv'.format(keys), index=False) 

Running for - EED

Initializing model and integrating transcriptomic data...
Pruning zero flux subnetworks...
Analyzing context-specific flux distributions...

Reactions pruned to 818 from 13543 (93.96% change)
Metabolites pruned to 767 from 8399 (90.87% change)
Flux through the objective DECREASED to ~995.1377 from ~1000.0 (0.49% change)
Context-specific metabolism correlates with transcriptome (r=-0.18, p<0.001 *)

RIPTiDe completed in, 3 minutes and 6 seconds 


Initializing model and integrating transcriptomic data...
Pruning zero flux subnetworks...
Analyzing context-specific flux distributions...

Reactions pruned to 1173 from 13543 (91.34% change)
Metabolites pruned to 1185 from 8399 (85.89% change)
Flux through the objective DECREASED to ~992.8036 from ~1000.0 (0.72% change)
Context-specific metabolism correlates with transcriptome (r=-0.081, p=0.006 *)

RIPTiDe completed in, 3 minutes and 21 seconds 


Initializing model and integrating transcriptomic data...
Pruning zero flux

### Run ML model on aggregated flux files

In [8]:
df_eed = pd.read_csv('output/riptide_EED_flux_sample.csv')
df_healthy = pd.read_csv('output/riptide_Healthy_flux_sample.csv')

df_eed['label'] = 1
df_healthy['label'] = 0

df = pd.concat([df_eed, df_healthy], join='inner')
df = df.reset_index(drop=True)

#### Model instructions

- Running the model 90 times with 2 patients (1 healthy + 1 EED) in test and 4 patients (2 healthy + 2 EED) in train.
- The test patients are each randomly selected in every 3rd iteration and the next patient is taken in the next two iterations.
- Only keeping top reactions from the runs where gains are greater than 0.2. This rule can be modified based on project goals.

In [9]:
imp_list = []
accuracy_tracker = []
f1_tracker = []
precision_tracker = []
recall_tracker = []

val_distribution_tracker = []
imp_df = []

i = 0
x = 0
y = 0

# Run model 90 times
for _ in tqdm(range(90)):
    
    # Select the test sample
    if (i%3 == 0):
        x = random.randint(0, 2)
        y = random.randint(0, 2)
        i += 1
        print(i,x,y)
    else:
        x = (x+1)%3
        y = (y+1)%3
        i += 1
        print(i,x,y)
    
    test_sample = [id_map['EED'][x], id_map['Healthy'][y]]
    train_sample = list(set(list(df_eed['sample_number'].unique()) +\
                            list(df_healthy['sample_number'].unique())) - set(test_sample))
    
    X_train, y_train = df.loc[df['sample_number'].isin(train_sample), list(set(df.columns)-set(['sample_number', 'label']))].reset_index(drop=True),\
    df.loc[df['sample_number'].isin(train_sample), ['label']].reset_index(drop=True)

    X_test, y_test = df.loc[df['sample_number'].isin(test_sample), list(set(df.columns)-set(['sample_number', 'label']))].reset_index(drop=True),\
    df.loc[df['sample_number'].isin(test_sample), ['label']].reset_index(drop=True)

    y_train = y_train['label']
    y_test = y_test['label']
    
    # Train random forest
    rf = RandomForestClassifier(n_estimators=100)
    rf.fit(X_train, y_train)

    y_train_pred = rf.predict(X_train)
    print('Training accuracy on selected features: %.3f' % accuracy_score(y_train, y_train_pred))    

    y_test_pred = rf.predict(X_test)    
    print('Testing accuracy on selected features: %.3f' % accuracy_score(y_test, y_test_pred))    
    print('F1 Score: {}'.format(f1_score(y_test, y_test_pred)))
    print('Distribution: ', max(y_test.mean(), (1-y_test.mean())))
    
    selected_cols = X_train.columns[np.argsort(rf.feature_importances_)[::-1]]
    
    val_distribution_tracker.append(y_test.mean())
    f1_tracker.append(f1_score(y_test, y_test_pred))
    precision_tracker.append(precision_score(y_test, y_test_pred))    
    recall_tracker.append(recall_score(y_test, y_test_pred))        
    accuracy_tracker.append(accuracy_score(y_test, y_test_pred))
    gain = accuracy_score(y_test, y_test_pred)-max(y_test.mean(), (1-y_test.mean()))
    
    if gain>0.2:
        imp_list += list(selected_cols[:50])
        imp_df.append(pd.DataFrame({'reactions': selected_cols[:50], 'rank': range(1, 51)}))
        print('Imp features: ', selected_cols[:10])
        print('-'*20)
        print('Gain: ', gain)
        print('-'*20)
    else:
        print('Failed to gain: ', gain)
        
    print('*'*20)
    print('*'*20)

  0%|          | 0/90 [00:00<?, ?it/s]

1 1 1


  1%|          | 1/90 [00:01<02:22,  1.60s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.781
F1 Score: 0.8068935041979673
Distribution:  0.5
Imp features:  Index(['HMR_0393', 'ACACT1x', 'GLYLYSCYSt', 'HMR_0327', 'sink_tyr_L[c]',
       'NADtpu', 'NADtx', 'HMGCOAtx', 'r0463', 'HISCYSCYSt'],
      dtype='object')
--------------------
Gain:  0.2815
--------------------
********************
********************
2 2 2


  2%|▏         | 2/90 [00:03<02:17,  1.56s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.061
F1 Score: 0.11336797354747284
Distribution:  0.5
Failed to gain:  -0.4385
********************
********************
3 0 0


  3%|▎         | 3/90 [00:05<02:35,  1.78s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.847
F1 Score: 0.8668407310704961
Distribution:  0.5
Imp features:  Index(['RTOT4', 'sink_arg_L[c]', 'sink_tyr_L[c]', 'r0463', 'EX_pcrn[e]',
       'C140CPT1', 'NADtpu', 'GLUt2m', 'HMR_0393', 'HMGCOAtx'],
      dtype='object')
--------------------
Gain:  0.347
--------------------
********************
********************
4 1 1


  4%|▍         | 4/90 [00:06<02:25,  1.69s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.711
F1 Score: 0.7340376665135508
Distribution:  0.5
Imp features:  Index(['HMR_0327', 'r0463', 'NADtx', 'ACACT1x', 'GLYLYSCYSt', 'HMR_0393',
       'NADtpu', 'sink_tyr_L[c]', 'GLYLYSCYSr', 'HMGCOAtx'],
      dtype='object')
--------------------
Gain:  0.21050000000000002
--------------------
********************
********************
5 2 2


  6%|▌         | 5/90 [00:08<02:20,  1.65s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.057
F1 Score: 0.10446343779677114
Distribution:  0.5
Failed to gain:  -0.443
********************
********************
6 0 0


  7%|▋         | 6/90 [00:10<02:33,  1.83s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.731
F1 Score: 0.7875443437130469
Distribution:  0.5
Imp features:  Index(['RTOT4', 'ACACT1x', 'sink_arg_L[c]', 'HMGCOAtx', 'r0463', 'CATp',
       'GLUt2m', 'DATPtn', 'EX_pcrn[e]', 'HMR_0632'],
      dtype='object')
--------------------
Gain:  0.23050000000000004
--------------------
********************
********************
7 2 2


  8%|▊         | 7/90 [00:12<02:23,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.047
F1 Score: 0.08803827751196172
Distribution:  0.5
Failed to gain:  -0.453
********************
********************
8 0 0


  9%|▉         | 8/90 [00:14<02:32,  1.86s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.793
F1 Score: 0.8285595682855956
Distribution:  0.5
Imp features:  Index(['RTOT4', 'r0463', 'HMGCOAtx', 'ACACT1x', 'HMR_0327', 'CATp',
       'TTDCRNNAt', 'C140CPT1', 'GLUt2m', 'r0181'],
      dtype='object')
--------------------
Gain:  0.2935
--------------------
********************
********************
9 1 1


 10%|█         | 9/90 [00:15<02:22,  1.76s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.810
F1 Score: 0.8202939781887151
Distribution:  0.5
Imp features:  Index(['HMR_0393', 'HMR_0327', 'HMGCOAtx', 'NADtpu', 'GLYLYSCYSt',
       'GLYLYSCYSr', 'sink_tyr_L[c]', 'r0463', 'EX_pcrn[e]', 'ACACT1x'],
      dtype='object')
--------------------
Gain:  0.3105
--------------------
********************
********************
10 0 2


 11%|█         | 10/90 [00:17<02:17,  1.71s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.383
F1 Score: 0.5533453887884269
Distribution:  0.5
Failed to gain:  -0.1175
********************
********************
11 1 0


 12%|█▏        | 11/90 [00:18<02:10,  1.65s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.002
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4985
********************
********************
12 2 1


 13%|█▎        | 12/90 [00:20<02:14,  1.72s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.514
F1 Score: 0.6664378860672615
Distribution:  0.5
Failed to gain:  0.014000000000000012
********************
********************
13 0 0


 14%|█▍        | 13/90 [00:22<02:24,  1.87s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.771
F1 Score: 0.814000814000814
Distribution:  0.5
Imp features:  Index(['RTOT4', 'ACACT1x', 'sink_arg_L[c]', 'CATp', 'HMGCOAtx', 'r0463',
       'EX_ttdcrn[e]', 'NADtx', 'r0202', 'HMR_7898'],
      dtype='object')
--------------------
Gain:  0.27149999999999996
--------------------
********************
********************
14 1 1


 16%|█▌        | 14/90 [00:24<02:14,  1.77s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.527
F1 Score: 0.3891402714932126
Distribution:  0.5
Failed to gain:  0.02749999999999997
********************
********************
15 2 2


 17%|█▋        | 15/90 [00:25<02:06,  1.69s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.034
F1 Score: 0.0648282535074988
Distribution:  0.5
Failed to gain:  -0.4665
********************
********************
16 1 2


 18%|█▊        | 16/90 [00:27<02:02,  1.66s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.499
F1 Score: 0.10053859964093356
Distribution:  0.5
Failed to gain:  -0.0010000000000000009
********************
********************
17 2 0


 19%|█▉        | 17/90 [00:29<02:15,  1.86s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.463
F1 Score: 0.5262230057293962
Distribution:  0.5
Failed to gain:  -0.03749999999999998
********************
********************
18 0 1


 20%|██        | 18/90 [00:31<02:08,  1.79s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.501
F1 Score: 0.6609996599795988
Distribution:  0.5
Failed to gain:  0.0014999999999999458
********************
********************
19 0 0


 21%|██        | 19/90 [00:33<02:14,  1.89s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.889
F1 Score: 0.8999098286744815
Distribution:  0.5
Imp features:  Index(['RTOT4', 'sink_arg_L[c]', 'EX_pcrn[e]', 'ACACT1x', 'HMR_7898', 'NADtpu',
       'C140CPT1', 'HMR_0632', 'r1428', 'HMGCOAtx'],
      dtype='object')
--------------------
Gain:  0.389
--------------------
********************
********************
20 1 1


 22%|██▏       | 20/90 [00:35<02:05,  1.80s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.590
F1 Score: 0.42946490618485056
Distribution:  0.5
Failed to gain:  0.08950000000000002
********************
********************
21 2 2


 23%|██▎       | 21/90 [00:36<01:59,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.048
F1 Score: 0.0872065165309056
Distribution:  0.5
Failed to gain:  -0.4525
********************
********************
22 0 1


 24%|██▍       | 22/90 [00:38<01:55,  1.70s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.519
F1 Score: 0.6691884456671252
Distribution:  0.5
Failed to gain:  0.019000000000000017
********************
********************
23 1 2


 26%|██▌       | 23/90 [00:40<01:52,  1.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.421
F1 Score: 0.19359331476323122
Distribution:  0.5
Failed to gain:  -0.07900000000000001
********************
********************
24 2 0


 27%|██▋       | 24/90 [00:42<02:00,  1.83s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.492
F1 Score: 0.5388459791004089
Distribution:  0.5
Failed to gain:  -0.007500000000000007
********************
********************
25 1 1


 28%|██▊       | 25/90 [00:43<01:54,  1.77s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.748
F1 Score: 0.7449392712550607
Distribution:  0.5
Imp features:  Index(['HMR_0393', 'NADtpu', 'HMR_0327', 'r0463', 'GLYLYSCYSr', 'HMGCOAtx',
       'sink_tyr_L[c]', 'EX_pcrn[e]', 'ACACT1x', 'NADtx'],
      dtype='object')
--------------------
Gain:  0.248
--------------------
********************
********************
26 2 2


 29%|██▉       | 26/90 [00:45<01:49,  1.71s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.015
F1 Score: 0.029556650246305417
Distribution:  0.5
Failed to gain:  -0.485
********************
********************
27 0 0


 30%|███       | 27/90 [00:47<01:54,  1.81s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.923
F1 Score: 0.9285051067780873
Distribution:  0.5
Imp features:  Index(['RTOT4', 'r0202', 'sink_arg_L[c]', 'sink_thr_L[c]', 'HMGCOAtx',
       'TTDCRNNAt', 'r0463', 'CATp', 'C140CPT1', 'ACACT1x'],
      dtype='object')
--------------------
Gain:  0.42300000000000004
--------------------
********************
********************
28 1 0


 31%|███       | 28/90 [00:49<01:47,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.001
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4995
********************
********************
29 2 1


 32%|███▏      | 29/90 [00:50<01:47,  1.77s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.508
F1 Score: 0.6625472021970477
Distribution:  0.5
Failed to gain:  0.008499999999999952
********************
********************
30 0 2


 33%|███▎      | 30/90 [00:52<01:43,  1.72s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.424
F1 Score: 0.595998595998596
Distribution:  0.5
Failed to gain:  -0.07550000000000001
********************
********************
31 2 1


 34%|███▍      | 31/90 [00:54<01:43,  1.76s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.502
F1 Score: 0.6589041095890411
Distribution:  0.5
Failed to gain:  0.0020000000000000018
********************
********************
32 0 2


 36%|███▌      | 32/90 [00:55<01:38,  1.70s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.384
F1 Score: 0.5549132947976878
Distribution:  0.5
Failed to gain:  -0.11599999999999999
********************
********************
33 1 0


 37%|███▋      | 33/90 [00:57<01:33,  1.65s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.000
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.5
********************
********************
34 0 0


 38%|███▊      | 34/90 [00:59<01:40,  1.79s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.909
F1 Score: 0.9164370982552802
Distribution:  0.5
Imp features:  Index(['RTOT4', 'sink_arg_L[c]', 'ACACT1x', 'r1428', 'HMGCOAtx',
       'sink_tyr_L[c]', 'C140CPT1', 'CATp', 'r0463', 'HMR_0393'],
      dtype='object')
--------------------
Gain:  0.40900000000000003
--------------------
********************
********************
35 1 1


 39%|███▉      | 35/90 [01:01<01:35,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.668
F1 Score: 0.6841353025250119
Distribution:  0.5
Failed to gain:  0.16849999999999998
********************
********************
36 2 2


 40%|████      | 36/90 [01:02<01:30,  1.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.020
F1 Score: 0.034482758620689655
Distribution:  0.5
Failed to gain:  -0.48
********************
********************
37 2 1


 41%|████      | 37/90 [01:04<01:32,  1.74s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.524
F1 Score: 0.6694444444444445
Distribution:  0.5
Failed to gain:  0.02400000000000002
********************
********************
38 0 2


 42%|████▏     | 38/90 [01:06<01:27,  1.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.364
F1 Score: 0.533724340175953
Distribution:  0.5
Failed to gain:  -0.136
********************
********************
39 1 0


 43%|████▎     | 39/90 [01:07<01:21,  1.59s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.001
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4995
********************
********************
40 1 1


 44%|████▍     | 40/90 [01:09<01:19,  1.59s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.540
F1 Score: 0.43515673017824213
Distribution:  0.5
Failed to gain:  0.04049999999999998
********************
********************
41 2 2


 46%|████▌     | 41/90 [01:10<01:17,  1.58s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.032
F1 Score: 0.06110572259941804
Distribution:  0.5
Failed to gain:  -0.46799999999999997
********************
********************
42 0 0


 47%|████▋     | 42/90 [01:12<01:23,  1.74s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.735
F1 Score: 0.7900355871886121
Distribution:  0.5
Imp features:  Index(['RTOT4', 'r0463', 'ACACT1x', 'HMGCOAtx', 'r0202', 'C140CPT1',
       'TTDCRNNAt', 'r1428', 'DTMPKm', 'NADtpu'],
      dtype='object')
--------------------
Gain:  0.23450000000000004
--------------------
********************
********************
43 2 2


 48%|████▊     | 43/90 [01:14<01:18,  1.67s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.029
F1 Score: 0.05360623781676414
Distribution:  0.5
Failed to gain:  -0.471
********************
********************
44 0 0


 49%|████▉     | 44/90 [01:16<01:23,  1.81s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.838
F1 Score: 0.860344827586207
Distribution:  0.5
Imp features:  Index(['RTOT4', 'r0463', 'r0202', 'HMGCOAtx', 'ACACT1x', 'NADtx',
       'sink_tyr_L[c]', 'sink_arg_L[c]', 'r1428', 'C140CPT1'],
      dtype='object')
--------------------
Gain:  0.33799999999999997
--------------------
********************
********************
45 1 1


 50%|█████     | 45/90 [01:18<01:18,  1.75s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.509
F1 Score: 0.33850303438975055
Distribution:  0.5
Failed to gain:  0.009499999999999953
********************
********************
46 2 0


 51%|█████     | 46/90 [01:20<01:23,  1.89s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.707
F1 Score: 0.6740823136818688
Distribution:  0.5
Imp features:  Index(['r0202', 'RTOT4', 'r0430', 'r1428', 'EX_docosdiac[e]', 'HMR_7898',
       'RE0578C', 'O2tm', 'r0431', 'r0509'],
      dtype='object')
--------------------
Gain:  0.20699999999999996
--------------------
********************
********************
47 0 1


 52%|█████▏    | 47/90 [01:21<01:16,  1.79s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.491
F1 Score: 0.6546811397557667
Distribution:  0.5
Failed to gain:  -0.009000000000000008
********************
********************
48 1 2


 53%|█████▎    | 48/90 [01:23<01:12,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.460
F1 Score: 0.06890611541774333
Distribution:  0.5
Failed to gain:  -0.04049999999999998
********************
********************
49 0 2


 54%|█████▍    | 49/90 [01:24<01:08,  1.67s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.459
F1 Score: 0.6287281453548166
Distribution:  0.5
Failed to gain:  -0.04149999999999998
********************
********************
50 1 0


 56%|█████▌    | 50/90 [01:26<01:04,  1.62s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.003
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4975
********************
********************
51 2 1


 57%|█████▋    | 51/90 [01:28<01:05,  1.69s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.520
F1 Score: 0.6662030598052852
Distribution:  0.5
Failed to gain:  0.020000000000000018
********************
********************
52 2 1


 58%|█████▊    | 52/90 [01:30<01:05,  1.72s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.511
F1 Score: 0.6618257261410788
Distribution:  0.5
Failed to gain:  0.01100000000000001
********************
********************
53 0 2


 59%|█████▉    | 53/90 [01:31<01:02,  1.69s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.340
F1 Score: 0.5074626865671642
Distribution:  0.5
Failed to gain:  -0.15999999999999998
********************
********************
54 1 0


 60%|██████    | 54/90 [01:33<00:58,  1.64s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.000
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.5
********************
********************
55 2 1


 61%|██████    | 55/90 [01:34<00:58,  1.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.520
F1 Score: 0.6673596673596673
Distribution:  0.5
Failed to gain:  0.020000000000000018
********************
********************
56 0 2


 62%|██████▏   | 56/90 [01:36<00:56,  1.66s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.396
F1 Score: 0.5673352435530086
Distribution:  0.5
Failed to gain:  -0.10399999999999998
********************
********************
57 1 0


 63%|██████▎   | 57/90 [01:38<00:52,  1.60s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.001
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4995
********************
********************
58 2 2


 64%|██████▍   | 58/90 [01:39<00:50,  1.59s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.017
F1 Score: 0.03055692459339576
Distribution:  0.5
Failed to gain:  -0.4835
********************
********************
59 0 0


 66%|██████▌   | 59/90 [01:41<00:54,  1.76s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.920
F1 Score: 0.9262180974477957
Distribution:  0.5
Imp features:  Index(['RTOT4', 'sink_arg_L[c]', 'r0463', 'r1428', 'HMGCOAtx', 'CATp',
       'NADtpu', 'DATPtn', 'NADtx', 'GLUt2m'],
      dtype='object')
--------------------
Gain:  0.4205
--------------------
********************
********************
60 1 1


 67%|██████▋   | 60/90 [01:43<00:51,  1.71s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.752
F1 Score: 0.7616754935002408
Distribution:  0.5
Imp features:  Index(['HMR_0327', 'HMGCOAtx', 'NADtx', 'sink_tyr_L[c]', 'HMR_0393',
       'GLYLYSCYSr', 'GLYLYSCYSt', 'r0463', 'ACACT1x', 'HMR_2368'],
      dtype='object')
--------------------
Gain:  0.25249999999999995
--------------------
********************
********************
61 2 1


 68%|██████▊   | 61/90 [01:45<00:51,  1.76s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.506
F1 Score: 0.6611796982167352
Distribution:  0.5
Failed to gain:  0.006000000000000005
********************
********************
62 0 2


 69%|██████▉   | 62/90 [01:46<00:47,  1.69s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.373
F1 Score: 0.5438660356752821
Distribution:  0.5
Failed to gain:  -0.1265
********************
********************
63 1 0


 70%|███████   | 63/90 [01:48<00:44,  1.64s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.000
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.5
********************
********************
64 1 1


 71%|███████   | 64/90 [01:49<00:42,  1.62s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.802
F1 Score: 0.809270883631096
Distribution:  0.5
Imp features:  Index(['HMR_0327', 'r0463', 'HMR_0393', 'NADtx', 'sink_tyr_L[c]', 'EX_pcrn[e]',
       'EX_glylyscys[e]', 'NADtpu', 'ACACT1x', 'GLYLYSCYSr'],
      dtype='object')
--------------------
Gain:  0.3025
--------------------
********************
********************
65 2 2


 72%|███████▏  | 65/90 [01:51<00:39,  1.59s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.033
F1 Score: 0.06022340942204954
Distribution:  0.5
Failed to gain:  -0.4675
********************
********************
66 0 0


 73%|███████▎  | 66/90 [01:53<00:41,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.915
F1 Score: 0.921358771521638
Distribution:  0.5
Imp features:  Index(['RTOT4', 'r0463', 'NADtx', 'ACACT1x', 'HMGCOAtx', 'CATp', 'DGTPtn',
       'sink_arg_L[c]', 'NADtpu', 'r0202'],
      dtype='object')
--------------------
Gain:  0.4155
--------------------
********************
********************
67 2 1


 74%|███████▍  | 67/90 [01:55<00:40,  1.75s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.534
F1 Score: 0.676164002779708
Distribution:  0.5
Failed to gain:  0.03400000000000003
********************
********************
68 0 2


 76%|███████▌  | 68/90 [01:56<00:37,  1.69s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.394
F1 Score: 0.5647649802655185
Distribution:  0.5
Failed to gain:  -0.10649999999999998
********************
********************
69 1 0


 77%|███████▋  | 69/90 [01:58<00:34,  1.64s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.018
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4815
********************
********************
70 0 0


 78%|███████▊  | 70/90 [02:00<00:36,  1.82s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.877
F1 Score: 0.8904719501335708
Distribution:  0.5
Imp features:  Index(['RTOT4', 'r0463', 'HMGCOAtx', 'sink_arg_L[c]', 'r0202', 'GLUt2m',
       'ACACT1x', 'sink_tyr_L[c]', 'CATp', 'HMR_0393'],
      dtype='object')
--------------------
Gain:  0.377
--------------------
********************
********************
71 1 1


 79%|███████▉  | 71/90 [02:02<00:33,  1.75s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.682
F1 Score: 0.6652631578947369
Distribution:  0.5
Failed to gain:  0.18200000000000005
********************
********************
72 2 2


 80%|████████  | 72/90 [02:03<00:30,  1.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.013
F1 Score: 0.02567901234567901
Distribution:  0.5
Failed to gain:  -0.4865
********************
********************
73 2 0


 81%|████████  | 73/90 [02:05<00:31,  1.85s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.379
F1 Score: 0.3562920766442258
Distribution:  0.5
Failed to gain:  -0.1215
********************
********************
74 0 1


 82%|████████▏ | 74/90 [02:07<00:28,  1.77s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.513
F1 Score: 0.6659807956104252
Distribution:  0.5
Failed to gain:  0.013000000000000012
********************
********************
75 1 2


 83%|████████▎ | 75/90 [02:09<00:25,  1.73s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.402
F1 Score: 0.08422664624808576
Distribution:  0.5
Failed to gain:  -0.09799999999999998
********************
********************
76 2 2


 84%|████████▍ | 76/90 [02:10<00:23,  1.67s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.042
F1 Score: 0.07973102785782903
Distribution:  0.5
Failed to gain:  -0.458
********************
********************
77 0 0


 86%|████████▌ | 77/90 [02:12<00:23,  1.83s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.792
F1 Score: 0.8280149191877331
Distribution:  0.5
Imp features:  Index(['RTOT4', 'HMGCOAtx', 'ACACT1x', 'GLUt2m', 'NADtx', 'r0463', 'r0202',
       'sink_arg_L[c]', 'CATp', 'r1428'],
      dtype='object')
--------------------
Gain:  0.2925
--------------------
********************
********************
78 1 1


 87%|████████▋ | 78/90 [02:14<00:20,  1.74s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.663
F1 Score: 0.7297514033680834
Distribution:  0.5
Failed to gain:  0.16300000000000003
********************
********************
79 1 1


 88%|████████▊ | 79/90 [02:15<00:18,  1.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.602
F1 Score: 0.5443110348770726
Distribution:  0.5
Failed to gain:  0.10150000000000003
********************
********************
80 2 2


 89%|████████▉ | 80/90 [02:17<00:16,  1.65s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.017
F1 Score: 0.03151157065484983
Distribution:  0.5
Failed to gain:  -0.4835
********************
********************
81 0 0


 90%|█████████ | 81/90 [02:19<00:16,  1.79s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.904
F1 Score: 0.9126657521719251
Distribution:  0.5
Imp features:  Index(['RTOT4', 'CATp', 'sink_arg_L[c]', 'r0202', 'ACACT1x', 'r0463',
       'EX_pe_hs[e]', 'HMGCOAtx', 'FAEL183', 'TTDCRNNAt'],
      dtype='object')
--------------------
Gain:  0.40449999999999997
--------------------
********************
********************
82 1 0


 91%|█████████ | 82/90 [02:21<00:13,  1.70s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.002
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4985
********************
********************
83 2 1


 92%|█████████▏| 83/90 [02:22<00:12,  1.74s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.507
F1 Score: 0.6597653554175292
Distribution:  0.5
Failed to gain:  0.007000000000000006
********************
********************
84 0 2


 93%|█████████▎| 84/90 [02:24<00:10,  1.70s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.390
F1 Score: 0.560633321338611
Distribution:  0.5
Failed to gain:  -0.11049999999999999
********************
********************
85 1 2


 94%|█████████▍| 85/90 [02:26<00:08,  1.66s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.483
F1 Score: 0.13411567476948869
Distribution:  0.5
Failed to gain:  -0.016500000000000015
********************
********************
86 2 0


 96%|█████████▌| 86/90 [02:28<00:07,  1.83s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.247
F1 Score: 0.20317460317460317
Distribution:  0.5
Failed to gain:  -0.253
********************
********************
87 0 1


 97%|█████████▋| 87/90 [02:30<00:05,  1.79s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.527
F1 Score: 0.6733494642239889
Distribution:  0.5
Failed to gain:  0.02749999999999997
********************
********************
88 1 0


 98%|█████████▊| 88/90 [02:31<00:03,  1.72s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.011
F1 Score: 0.0
Distribution:  0.5
Failed to gain:  -0.4885
********************
********************
89 2 1


 99%|█████████▉| 89/90 [02:33<00:01,  1.78s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.519
F1 Score: 0.6685063815108658
Distribution:  0.5
Failed to gain:  0.019499999999999962
********************
********************
90 0 2


100%|██████████| 90/90 [02:35<00:00,  1.72s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.359
F1 Score: 0.528870908422214
Distribution:  0.5
Failed to gain:  -0.1405
********************
********************





In [10]:
print('Average Accuracy score: {}'.format(np.mean(accuracy_tracker)))
print('Average F1 score: {}'.format(np.mean(f1_tracker)))
print('Average validation distribution: {}'.format(np.mean(val_distribution_tracker)))
print('Average precisions score: {}'.format(np.mean(precision_tracker)))
print('Average recall score: {}'.format(np.mean(recall_tracker)))

Average Accuracy score: 0.43077777777777776
Average F1 score: 0.46211422351263665
Average validation distribution: 0.5
Average precisions score: 0.4214492050346831
Average recall score: 0.5605666666666668


In [11]:
# Top important reactions
average_rank = pd.DataFrame({'rank': pd.concat(imp_df).groupby('reactions')['rank'].mean()}).reset_index()
average_rank = average_rank.sort_values(by='rank').head(20).reset_index(drop=True)
average_rank.head(20)

Unnamed: 0,reactions,rank
0,HMGCOAtx,5.85
1,r0463,6.809524
2,RTOT4,7.285714
3,ACACT1x,8.047619
4,GLYLYSCYSr,8.166667
5,GLYLYSCYSt,8.5
6,sink_tyr_L[c],12.65
7,NADtx,13.095238
8,NADtpu,13.142857
9,CATp,13.285714


In [12]:
pd.DataFrame(imp_list)[0].value_counts().head(20)

RTOT4            21
ACACT1x          21
r1428            21
C140CPT1         21
NADtpu           21
NADtx            21
CATp             21
r0463            21
r0202            20
HMR_0393         20
HMR_0327         20
HMGCOAtx         20
sink_tyr_L[c]    20
EX_pcrn[e]       19
sink_arg_L[c]    18
GLUt2m           17
RE0578C          16
EX_ttdcrn[e]     15
r0181            14
TTDCRNNAt        13
Name: 0, dtype: int64