In [1]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import GEOparse
import cobra
import riptide

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

### Read transcriptomics file

In [20]:
# Download from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57945
fname = 'data/GSE57945_family.soft.gz'
folder_name = 'data/GSE57945_RAW/'

# Read GEOparse
gse = GEOparse.get_GEO(filepath=fname, silent=True)

# Get all diagnosis
diagnosis_data = pd.DataFrame()

for key, value in gse.gsms.items():
    gsm = key
    name = value.metadata['description'][0]
    diagnosis = value.metadata['characteristics_ch1'][4].split(':')[-1].strip()
    
    temp = pd.DataFrame([[gsm, name, diagnosis]])
    diagnosis_data = diagnosis_data.append(temp)
    
diagnosis_data = diagnosis_data.reset_index(drop=True)
diagnosis_data.columns = ['gsm', 'name', 'diagnosis']

In [100]:
dfs = {
    'CD': pd.DataFrame(),
    'UC': pd.DataFrame(),
    'Not IBD': pd.DataFrame(),
    'not IBD': pd.DataFrame()
}

for index, row in diagnosis_data.iterrows():
    tmp = pd.read_csv(folder_name + row['gsm'] + '_' + row['name'] + '.txt', sep='\t')
    
    if len(dfs[row['diagnosis']]) == 0:
        tmp = tmp[['Gene ID', 'Gene Symbol', row['name']]]
        dfs[row['diagnosis']] = tmp
    else:
        tmp = tmp[['Gene ID', row['name']]]
        dfs[row['diagnosis']] = pd.merge(dfs[row['diagnosis']], tmp, on='Gene ID')
        
# Combining not IBD and Not IBD        
dfs['Not IBD'] = pd.merge(dfs['Not IBD'], dfs['not IBD'], on=['Gene ID', 'Gene Symbol'])     
del dfs['not IBD']


# Gene Map with all the genes present in Recon3D
gene_name_num = pd.read_csv('data/gene_name_number.tsv', sep='\t')
gene_name_num = gene_name_num[['gene_number', 'symbol']]

id_map = {}
for keys in dfs.keys():
    # Generate ID map
    id_map[keys] = diagnosis_data.loc[diagnosis_data['diagnosis']==keys, 'name'].tolist()
    
    # merge gene number annotations to dataframe
    dfs[keys] = pd.merge(gene_name_num, dfs[keys], left_on='symbol', right_on='Gene Symbol')
    
id_map['Not IBD'] += diagnosis_data.loc[diagnosis_data['diagnosis']=='not IBD', 'name'].tolist()    

### Run Riptide for all patients

In [None]:
# Load Model
model = cobra.io.load_matlab_model('data/Recon3D_301.mat')
cobra.util.array.create_stoichiometric_matrix(model).shape

def run_riptide(fname, pat_name):
    
    transcript_abundances = riptide.read_transcription_file(fname, replicates=False, norm=False)
    riptide_object = riptide.contextualize(model=model, transcriptome=transcript_abundances, fraction=1.)

    riptide_fva = riptide_object.flux_samples
    riptide_fva['sample_number'] = pat_name
    
    return riptide_fva

In [110]:
for keys in dfs.keys():
    print('Running for - {}'.format(keys))
    for pat_id in id_map[keys]:
        dfs['CD'][['gene_number', pat_id]].to_csv('sample.tsv', header=False, index=False, sep='\t')
        pat_output = run_riptide('sample.tsv', pat_id)
        pat_output.to_csv('output/{}.csv'.format(pat_id), index=False)
        
# Combine flux samples  
for keys in dfs.keys():
    list_of_pat_flux = []
    for pat_id in id_map[keys]:
        list_of_pat_flux.append(pd.read_csv('output/{}.csv'.format(pat_id)))
        break
    pd.concat(list_of_pat_flux, join='inner', ignore_index=0).to_csv(\
    'output/riptide_{}_flux_sample.csv'.format(keys), index=False)        

Running for - CD

Initializing model and integrating transcriptomic data...
Pruning zero flux subnetworks...
Analyzing context-specific flux distributions...

Reactions pruned to 817 from 13543 (93.97% change)
Metabolites pruned to 773 from 8399 (90.8% change)
Flux through the objective DECREASED to ~991.2 from ~1000.0 (0.88% change)
Context-specific metabolism correlates with transcriptome (r=-0.191, p<0.001 *)

RIPTiDe completed in, 5 minutes and 51 seconds 



### Run ML model on aggregated flux files

In [125]:
df_cd = pd.read_csv('output/riptide_CD_flux_sample.csv')
df_cd = pd.read_csv('output/riptide_Not IBD_flux_sample.csv')

# Using only iCD patients
cd_ibd = pd.read_csv('data/2-7-2021 cd_uc_split.csv')
df_cd = df_cd.loc[df_cd['sample_number'].isin(cd_ibd.loc[cd_ibd['value']=='iCD', 'name'].tolist())].reset_index(drop=True)

df_cd['label'] = 1
df_not_ibd['label'] = 0

df = pd.concat([df_cd, df_not_ibd], join='inner') 

##### Model instructions
 - Run model multiple times with different train-val split 
 - Only keeping top reactions from the runs where gains are greater than 0.2. This rule can be modified based on project goals.

In [134]:
imp_list = []
accuracy_tracker = []
f1_tracker = []
precision_tracker = []
recall_tracker = []

val_distribution_tracker = []
imp_df = []

# Run model 100 times
for _ in tqdm(range(100)):

    # Keeping train-val split ratio 80-20
    test_sample = random.sample(list(df_cd['sample_number'].unique()), 10) +\
    random.sample(list(df_not_ibd['sample_number'].unique()), 8)

    # Undersample CD patients to balance the split
    train_sample = list(set(list(random.sample(list(df_cd['sample_number'].unique()), 50)) +\
                            list(df_not_ibd['sample_number'].unique())) - set(test_sample))
    
    df = pd.concat([df_cd, df_not_ibd], join='inner') 
    df = df.reset_index(drop=True)
    
    X_train, y_train = df.loc[df['sample_number'].isin(train_sample), list(set(df.columns)-set(['sample_number', 'label']))].reset_index(drop=True),\
    df.loc[df['sample_number'].isin(train_sample), ['label']].reset_index(drop=True)

    X_test, y_test = df.loc[df['sample_number'].isin(test_sample), list(set(df.columns)-set(['sample_number', 'label']))].reset_index(drop=True),\
    df.loc[df['sample_number'].isin(test_sample), ['label']].reset_index(drop=True)

    y_train = y_train['label']
    y_test = y_test['label']

    rf = RandomForestClassifier(n_estimators=100)
    rf.fit(X_train, y_train)

    y_train_pred = rf.predict(X_train)
    print('Training accuracy on selected features: %.3f' % accuracy_score(y_train, y_train_pred))    

    y_test_pred = rf.predict(X_test)    
    print('Testing accuracy on selected features: %.3f' % accuracy_score(y_test, y_test_pred))    
    print('F1 Score: {}'.format(f1_score(y_test, y_test_pred)))
    print('Distribution: ', max(y_test.mean(), (1-y_test.mean())))
    
    selected_cols = X_train.columns[np.argsort(rf.feature_importances_)[::-1]]
    
    val_distribution_tracker.append(y_test.mean())
    f1_tracker.append(f1_score(y_test, y_test_pred))
    precision_tracker.append(precision_score(y_test, y_test_pred))    
    recall_tracker.append(recall_score(y_test, y_test_pred))        
    accuracy_tracker.append(accuracy_score(y_test, y_test_pred))
    gain = accuracy_score(y_test, y_test_pred)-max(y_test.mean(), (1-y_test.mean()))
    
    if gain>0.2:
        imp_list += list(selected_cols[:50])
        imp_df.append(pd.DataFrame({'reactions': selected_cols[:50], 'rank': range(1, 51)}))
        print('Imp features: ', selected_cols[:10])
        print('-'*20)
        print('Gain: ', gain)
        print('-'*20)
    else:
        print('Failed to gain: ', gain)
        
    print('*'*20)
    print('*'*20)    

  1%|          | 1/100 [00:06<10:34,  6.41s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.907
F1 Score: 0.9100410600970511
Distribution:  0.5246723207401697
Imp features:  Index(['ATP2ter', 'HMGCOAtm', 'EX_uri[e]', 'LPS2', 'FACOAL245_2',
       'sink_lnlncacoa[c]', 'URIt', 'r1466', 'EX_Rtotal[e]', 'MEV_Rt'],
      dtype='object')
--------------------
Gain:  0.3824209714726291
--------------------
********************
********************


  2%|▏         | 2/100 [00:11<09:39,  5.91s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.816
F1 Score: 0.7967618236046017
Distribution:  0.5374228395061729
Imp features:  Index(['ATP2ter', 'PEt', 'EX_pe_hs[e]', 'sink_lnlncacoa[c]', 'r1004', 'r1466',
       'r0431', 'r0430', 'EX_uri[e]', 'FACOAL245_2'],
      dtype='object')
--------------------
Gain:  0.27854938271604934
--------------------
********************
********************


  3%|▎         | 3/100 [00:18<09:50,  6.09s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.884
F1 Score: 0.9128500392567389
Distribution:  0.6127793296089385
Imp features:  Index(['FACOAL245_2', 'ATP2ter', 'HMGCOAtm', 'EX_mev_R[e]', 'r0488', 'MEV_Rt',
       'LPS2', 'EX_uri[e]', 'RTOTALt', 'URIt'],
      dtype='object')
--------------------
Gain:  0.270949720670391
--------------------
********************
********************


  4%|▍         | 4/100 [00:24<09:41,  6.06s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.712
F1 Score: 0.7525083612040133
Distribution:  0.608167233835683
Failed to gain:  0.10403500243072439
********************
********************


  5%|▌         | 5/100 [00:29<09:13,  5.83s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.726
F1 Score: 0.6688632619439867
Distribution:  0.5479763528876762
Failed to gain:  0.17780809458844926
********************
********************


  6%|▌         | 6/100 [00:35<09:06,  5.81s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.855
F1 Score: 0.8703170028818443
Distribution:  0.5570739549839229
Imp features:  Index(['r0488', 'MEV_Rt', 'HMGCOAtm', 'EX_mev_R[e]', 'ATP2ter', 'FACOAL245_2',
       'PEt', 'EX_sphgn[e]', 'LPS2', 'EX_pe_hs[e]'],
      dtype='object')
--------------------
Gain:  0.29823151125401925
--------------------
********************
********************


  7%|▋         | 7/100 [00:40<08:40,  5.59s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.811
F1 Score: 0.8259109311740891
Distribution:  0.5114503816793893
Imp features:  Index(['MEV_Rt', 'sink_lnlc[c]', 'r1050', 'ATP2ter', 'sink_lnlncacoa[c]',
       'NDPK4', 'LPS2', 'PGLYCt', 'PEt', 'CLS_hs'],
      dtype='object')
--------------------
Gain:  0.29917792131532595
--------------------
********************
********************


  8%|▊         | 8/100 [00:46<08:39,  5.65s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.695
F1 Score: 0.6418242491657398
Distribution:  0.510194404931247
Failed to gain:  0.1844476055002371
********************
********************


  9%|▉         | 9/100 [00:51<08:19,  5.49s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.876
F1 Score: 0.8628708901363271
Distribution:  0.5142960550126674
Imp features:  Index(['FACOAL245_2', 'URIt', 'HMGCOAtm', 'EX_mev_R[e]', 'r1466',
       'sink_lnlncacoa[c]', 'r0488', 'ATP2ter', 'EX_uri[e]', 'sink_lnlc[c]'],
      dtype='object')
--------------------
Gain:  0.36192544335866805
--------------------
********************
********************


 10%|█         | 10/100 [00:57<08:22,  5.58s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.831
F1 Score: 0.8447796744739976
Distribution:  0.5493934142114385
Imp features:  Index(['ATP2ter', 'MEV_Rt', 'sink_lnlncacoa[c]', 'HMGCOAtm', 'EX_mev_R[e]',
       'r0488', 'r1050', 'FACOAL245_2', 'EX_Rtotal[e]', 'RTOTALt'],
      dtype='object')
--------------------
Gain:  0.2811958405545927
--------------------
********************
********************


 11%|█         | 11/100 [01:03<08:21,  5.64s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.697
F1 Score: 0.666359021688971
Distribution:  0.5016778523489933
Failed to gain:  0.1950503355704698
********************
********************


 12%|█▏        | 12/100 [01:08<08:20,  5.68s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.814
F1 Score: 0.7832238504295097
Distribution:  0.5075790385448247
Imp features:  Index(['ATP2ter', 'EX_uri[e]', 'FACOAL245_2', 'URIt', 'r1466',
       'sink_lnlncacoa[c]', 'sink_lnlc[c]', 'EX_mev_R[e]', 'HMGCOAtm',
       'MEV_Rt'],
      dtype='object')
--------------------
Gain:  0.3066262451277608
--------------------
********************
********************


 13%|█▎        | 13/100 [01:15<08:26,  5.82s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.759
F1 Score: 0.8089005235602095
Distribution:  0.5625774473358116
Failed to gain:  0.19619991738950848
********************
********************


 14%|█▍        | 14/100 [01:21<08:31,  5.95s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.816
F1 Score: 0.8169528681037631
Distribution:  0.5377565982404692
Imp features:  Index(['ATP2ter', 'r0488', 'sink_lnlncacoa[c]', 'EX_mev_R[e]', 'HMGCOAtm',
       'FACOAL245_2', 'r1466', 'r1004', 'RTOTALt', 'DNDPt9m'],
      dtype='object')
--------------------
Gain:  0.278592375366569
--------------------
********************
********************


 15%|█▌        | 15/100 [01:27<08:44,  6.18s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.730
F1 Score: 0.7789248843004628
Distribution:  0.6165870603560574
Failed to gain:  0.11376465479808939
********************
********************


 16%|█▌        | 16/100 [01:34<08:46,  6.27s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.903
F1 Score: 0.9099848714069593
Distribution:  0.5291970802919708
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'URIt', 'sink_lnlncacoa[c]', 'GLDBRAN',
       'DNDPt9m', 'EX_dgsn[e]', 'DGTPtn', 'PGLYCt', 'r0431'],
      dtype='object')
--------------------
Gain:  0.37429034874290357
--------------------
********************
********************


 17%|█▋        | 17/100 [01:40<08:30,  6.15s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.862
F1 Score: 0.8515433610975014
Distribution:  0.5367747830059388
Imp features:  Index(['FACOAL245_2', 'r0488', 'ATP2ter', 'biomass_reaction', 'URIt',
       'SPHGNte', 'EX_Rtotal[e]', 'HMGCOAtm', 'r0407', 'HMR_0753'],
      dtype='object')
--------------------
Gain:  0.3248058474189127
--------------------
********************
********************


 18%|█▊        | 18/100 [01:46<08:17,  6.07s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.748
F1 Score: 0.7741450582487786
Distribution:  0.633696563285834
Failed to gain:  0.11441743503772006
********************
********************


 19%|█▉        | 19/100 [01:52<08:27,  6.26s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.835
F1 Score: 0.8722589627567003
Distribution:  0.6174617461746175
Imp features:  Index(['ATP2ter', 'EX_sphgn[e]', 'HMGCOAtm', 'FACOAL245_2', 'HMR_0692',
       'MEV_Rt', 'RTOTALt', 'r1050', 'LPS2', 'EX_mev_R[e]'],
      dtype='object')
--------------------
Gain:  0.21737173717371738
--------------------
********************
********************


 20%|██        | 20/100 [01:59<08:22,  6.28s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.886
F1 Score: 0.9053638537751963
Distribution:  0.5897647544366488
Imp features:  Index(['ATP2ter', 'sink_lnlc[c]', 'FACOAL245_2', 'GLPASE1', 'EX_pe_hs[e]',
       'NDPK4', 'EX_mev_R[e]', 'r1466', 'r0488', 'sink_lnlncacoa[c]'],
      dtype='object')
--------------------
Gain:  0.2959141560049525
--------------------
********************
********************


 21%|██        | 21/100 [02:05<08:27,  6.42s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.630
F1 Score: 0.686652977412731
Distribution:  0.6631067961165048
Failed to gain:  -0.033495145631067924
********************
********************


 22%|██▏       | 22/100 [02:12<08:24,  6.47s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.876
F1 Score: 0.8608608608608609
Distribution:  0.54014272970562
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'EX_pglyc_hs[e]', 'DGTPtn', 'PGLYCt',
       'HMR_0753', 'DNDPt9m', 'CLS_hs', 'HMR_0692', 'EX_sphgn[e]'],
      dtype='object')
--------------------
Gain:  0.3358608385370204
--------------------
********************
********************


 23%|██▎       | 23/100 [02:19<08:18,  6.47s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.869
F1 Score: 0.8801791713325867
Distribution:  0.5538272615636513
Imp features:  Index(['ATP2ter', 'r1466', 'FACOAL245_2', 'sink_lnlncacoa[c]', 'r0430',
       'r0431', 'DTTPtn', 'HMGCOAtm', 'r1004', 'EX_sphgn[e]'],
      dtype='object')
--------------------
Gain:  0.31477691363078186
--------------------
********************
********************


 24%|██▍       | 24/100 [02:25<08:11,  6.47s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.718
F1 Score: 0.757347915242652
Distribution:  0.5646123260437376
Failed to gain:  0.1530815109343936
********************
********************


 25%|██▌       | 25/100 [02:31<07:45,  6.20s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.808
F1 Score: 0.8527801786228753
Distribution:  0.6351656626506024
Failed to gain:  0.1724397590361446
********************
********************


 26%|██▌       | 26/100 [02:37<07:48,  6.33s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.794
F1 Score: 0.8409314529353886
Distribution:  0.6519148936170213
Failed to gain:  0.14170212765957446
********************
********************


 27%|██▋       | 27/100 [02:43<07:38,  6.28s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.744
F1 Score: 0.7545001914975106
Distribution:  0.5689724110355857
Failed to gain:  0.17473010795681732
********************
********************


 28%|██▊       | 28/100 [02:49<07:23,  6.16s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.714
F1 Score: 0.7485811577752555
Distribution:  0.5554480980012895
Failed to gain:  0.15892972275951
********************
********************


 29%|██▉       | 29/100 [02:56<07:20,  6.21s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.863
F1 Score: 0.8847962382445141
Distribution:  0.5624709707385044
Imp features:  Index(['FACOAL245_2', 'ATP2ter', 'SPHGNte', 'EX_mev_R[e]', 'HMR_0753',
       'DTTPtn', 'sink_lnlncacoa[c]', 'HMGCOAtm', 'URIt', 'EX_uri[e]'],
      dtype='object')
--------------------
Gain:  0.3009753831862517
--------------------
********************
********************


 30%|███       | 30/100 [03:02<07:09,  6.13s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.797
F1 Score: 0.8251618871415356
Distribution:  0.606970509383378
Failed to gain:  0.19034852546916892
********************
********************


 31%|███       | 31/100 [03:08<07:00,  6.10s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.934
F1 Score: 0.9482439926062847
Distribution:  0.6409448818897637
Imp features:  Index(['ATP2ter', 'EX_Rtotal[e]', 'RTOTALt', 'FACOAL245_2', 'EX_mev_R[e]',
       'sink_lnlc[c]', 'r0488', 'LPS2', 'HMGCOAtm', 'EX_uri[e]'],
      dtype='object')
--------------------
Gain:  0.29291338582677173
--------------------
********************
********************


 32%|███▏      | 32/100 [03:14<07:06,  6.27s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.871
F1 Score: 0.8796263025512037
Distribution:  0.5789676425269645
Imp features:  Index(['HMGCOAtm', 'ATP2ter', 'MEV_Rt', 'EX_mev_R[e]', 'FACOAL245_2', 'r0488',
       'sink_lnlncacoa[c]', 'SPHGNte', 'EX_uri[e]', 'GLPASE1'],
      dtype='object')
--------------------
Gain:  0.29198767334360554
--------------------
********************
********************


 33%|███▎      | 33/100 [03:21<07:00,  6.28s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.905
F1 Score: 0.9048821548821548
Distribution:  0.533698399326032
Imp features:  Index(['ATP2ter', 'HMR_0692', 'EX_pe_hs[e]', 'PGLYCt', 'PEt', 'FACOAL245_2',
       'sink_lnlncacoa[c]', 'DNDPt9m', 'EX_pglyc_hs[e]', 'biomass_reaction'],
      dtype='object')
--------------------
Gain:  0.3711036225779275
--------------------
********************
********************


 34%|███▍      | 34/100 [03:27<06:55,  6.30s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.854
F1 Score: 0.8848901825879864
Distribution:  0.644175897952333
Imp features:  Index(['HMGCOAtm', 'sink_lnlncacoa[c]', 'r1466', 'r0488', 'LPS2',
       'EX_HC01361[e]', 'EX_mev_R[e]', 'ATP2ter', 'MEV_Rt', 'EX_Rtotal[e]'],
      dtype='object')
--------------------
Gain:  0.20980194696206778
--------------------
********************
********************


 35%|███▌      | 35/100 [03:33<06:40,  6.17s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.775
F1 Score: 0.8096054888507719
Distribution:  0.6025033829499323
Failed to gain:  0.17219215155615697
********************
********************


 36%|███▌      | 36/100 [03:39<06:30,  6.09s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.890
F1 Score: 0.8979739507959479
Distribution:  0.5694716242661448
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'HMGCOAtm', 'r0488', 'EX_mev_R[e]',
       'EX_pglyc_hs[e]', 'MEV_Rt', 'RTOTALt', 'SMS', 'r1466'],
      dtype='object')
--------------------
Gain:  0.32015655577299407
--------------------
********************
********************


 37%|███▋      | 37/100 [03:44<06:18,  6.01s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.723
F1 Score: 0.7185185185185186
Distribution:  0.5210867802108679
Imp features:  Index(['FACOAL245_2', 'sink_lnlncacoa[c]', 'HMGCOAtm', 'MEV_Rt', 'ATP2ter',
       'RE0344C', 'sink_lnlc[c]', 'NDPK4', 'r0488', 'CLS_hs'],
      dtype='object')
--------------------
Gain:  0.20154095701540953
--------------------
********************
********************


 38%|███▊      | 38/100 [03:50<06:11,  6.00s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.930
F1 Score: 0.9283246977547496
Distribution:  0.5205421431596781
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'EX_pglyc_hs[e]', 'FACOAL245_2',
       'HMR_0692', 'DNDPt9m', 'RTOTALt', 'PGLYCt', 'DGTPtn', 'SPHGNte'],
      dtype='object')
--------------------
Gain:  0.409148665819568
--------------------
********************
********************


 39%|███▉      | 39/100 [03:56<05:59,  5.89s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.881
F1 Score: 0.8584031760035289
Distribution:  0.5786745649759348
Imp features:  Index(['ATP2ter', 'DGTPtn', 'PGLYCt', 'HMR_0753', 'DNDPt9m', 'SMS', 'CLS_hs',
       'sink_lnlncacoa[c]', 'EX_sphgn[e]', 'DTTPtn'],
      dtype='object')
--------------------
Gain:  0.3024805627545354
--------------------
********************
********************


 40%|████      | 40/100 [04:02<05:48,  5.81s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.820
F1 Score: 0.8294601358598498
Distribution:  0.5723981900452488
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'biomass_reaction', 'PEt', 'LPS2', 'SPHGNte',
       'r1466', 'DNDPt9m', 'SMS', 'EX_sphgn[e]'],
      dtype='object')
--------------------
Gain:  0.24773755656108598
--------------------
********************
********************


 41%|████      | 41/100 [04:08<05:42,  5.81s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.704
F1 Score: 0.7145692735212386
Distribution:  0.5385567010309278
Failed to gain:  0.1649484536082475
********************
********************


 42%|████▏     | 42/100 [04:13<05:35,  5.78s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.904
F1 Score: 0.8953271028037383
Distribution:  0.5323340471092077
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'HMGCOAtm', 'EX_uri[e]', 'MEV_Rt', 'URIt',
       'sink_lnlncacoa[c]', 'sink_lnlc[c]', 'r1466', 'r0488'],
      dtype='object')
--------------------
Gain:  0.37173447537473236
--------------------
********************
********************


 43%|████▎     | 43/100 [04:19<05:29,  5.77s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.875
F1 Score: 0.839460214053048
Distribution:  0.5910740203193033
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'HMGCOAtm', 'MEV_Rt', 'EX_mev_R[e]',
       'PEt', 'RE0344C', 'FACOAL245_2', 'EX_pe_hs[e]', 'RTOTALt'],
      dtype='object')
--------------------
Gain:  0.283744557329463
--------------------
********************
********************


 44%|████▍     | 44/100 [04:25<05:27,  5.86s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.860
F1 Score: 0.8727272727272727
Distribution:  0.5763747454175153
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'sink_lnlncacoa[c]', 'EX_pe_hs[e]', 'PEt',
       'DGTPtn', 'HMR_0692', 'EX_Rtotal[e]', 'SPHGNte', 'URIt'],
      dtype='object')
--------------------
Gain:  0.2834351663272233
--------------------
********************
********************


 45%|████▌     | 45/100 [04:31<05:22,  5.87s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.705
F1 Score: 0.7146332985749044
Distribution:  0.580309019044197
Failed to gain:  0.1246855910887531
********************
********************


 46%|████▌     | 46/100 [04:37<05:20,  5.94s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.781
F1 Score: 0.8096402375130981
Distribution:  0.5834001603849238
Failed to gain:  0.19807538091419408
********************
********************


 47%|████▋     | 47/100 [04:44<05:24,  6.12s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.864
F1 Score: 0.8846153846153846
Distribution:  0.6263418662262593
Imp features:  Index(['ATP2ter', 'EX_pe_hs[e]', 'PEt', 'EX_sphgn[e]', 'HMGCOAtm', 'HMR_0753',
       'PGLYCt', 'DGTPtn', 'HMR_0692', 'NDPK4'],
      dtype='object')
--------------------
Gain:  0.23740710156895128
--------------------
********************
********************


 48%|████▊     | 48/100 [04:49<05:13,  6.02s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.781
F1 Score: 0.7622828784119108
Distribution:  0.523112128146453
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'sink_lnlncacoa[c]', 'EX_pglyc_hs[e]',
       'sink_lnlc[c]', 'EX_Rtotal[e]', 'DATPtn', 'SPHGNte', 'PGLYCt',
       'RTOTALt'],
      dtype='object')
--------------------
Gain:  0.2576659038901602
--------------------
********************
********************


 49%|████▉     | 49/100 [04:55<05:04,  5.96s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.628
F1 Score: 0.6066693451185214
Distribution:  0.605093120486507
Failed to gain:  0.022805017103762926
********************
********************


 50%|█████     | 50/100 [05:02<05:06,  6.13s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.894
F1 Score: 0.9183618117710877
Distribution:  0.6375838926174496
Imp features:  Index(['ATP2ter', 'HMGCOAtm', 'r0139', 'r0488', 'EX_mev_R[e]', 'RTOTALt',
       'sink_lnlncacoa[c]', 'EX_Rtotal[e]', 'EX_ctp[e]', 'EX_cmp[e]'],
      dtype='object')
--------------------
Gain:  0.25609325326739674
--------------------
********************
********************


 51%|█████     | 51/100 [05:08<05:02,  6.18s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.811
F1 Score: 0.7574786324786323
Distribution:  0.573094543940025
Imp features:  Index(['ATP2ter', 'sink_lnlc[c]', 'MEV_Rt', 'HMR_0753', 'FACOAL245_2',
       'EX_Rtotal[e]', 'sink_lnlncacoa[c]', 'RTOTALt', 'EX_mev_R[e]',
       'EX_uri[e]'],
      dtype='object')
--------------------
Gain:  0.23781757600999576
--------------------
********************
********************


 52%|█████▏    | 52/100 [05:14<04:54,  6.13s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.721
F1 Score: 0.7618398637137991
Distribution:  0.6389444222311076
Failed to gain:  0.08156737305077966
********************
********************


 53%|█████▎    | 53/100 [05:20<04:48,  6.15s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.902
F1 Score: 0.9127906976744187
Distribution:  0.5806188925081434
Imp features:  Index(['ATP2ter', 'HMGCOAtm', 'r1466', 'ARTFR12', 'r0488', 'RE0344C', 'URIt',
       'EX_mev_R[e]', 'r1004', 'sink_lnlncacoa[c]'],
      dtype='object')
--------------------
Gain:  0.3216612377850162
--------------------
********************
********************


 54%|█████▍    | 54/100 [05:27<04:49,  6.28s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.826
F1 Score: 0.8557723057243365
Distribution:  0.6611410948342329
Failed to gain:  0.16499614494988435
********************
********************


 55%|█████▌    | 55/100 [05:34<05:00,  6.67s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.838
F1 Score: 0.8754962107542403
Distribution:  0.5961538461538461
Imp features:  Index(['ATP2ter', 'EX_uri[e]', 'URIt', 'DNDPt9m', 'PEt', 'EX_Rtotal[e]', 'SMS',
       'r1466', 'biomass_reaction', 'r1004'],
      dtype='object')
--------------------
Gain:  0.24202626641651037
--------------------
********************
********************


 56%|█████▌    | 56/100 [05:42<05:03,  6.90s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.708
F1 Score: 0.7125581395348837
Distribution:  0.5593942262186464
Failed to gain:  0.14813061997160437
********************
********************


 57%|█████▋    | 57/100 [05:48<04:43,  6.60s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.824
F1 Score: 0.859251161519541
Distribution:  0.5597398151318042
Imp features:  Index(['FACOAL245_2', 'EX_mev_R[e]', 'ATP2ter', 'LPS2', 'MEV_Rt',
       'sink_lnlncacoa[c]', 'r0488', 'EX_Rtotal[e]', 'RTOTALt',
       'sink_lnlc[c]'],
      dtype='object')
--------------------
Gain:  0.26395070181444713
--------------------
********************
********************


 58%|█████▊    | 58/100 [05:54<04:33,  6.50s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.856
F1 Score: 0.8738077769625824
Distribution:  0.5411961522375575
Imp features:  Index(['ATP2ter', 'PEt', 'EX_uri[e]', 'FACOAL245_2', 'EX_pe_hs[e]',
       'sink_lnlc[c]', 'RE0344C', 'URIt', 'DATPtn', 'EX_mev_R[e]'],
      dtype='object')
--------------------
Gain:  0.314930991217064
--------------------
********************
********************


 59%|█████▉    | 59/100 [06:00<04:20,  6.36s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.793
F1 Score: 0.8334600760456274
Distribution:  0.6302600472813239
Failed to gain:  0.16264775413711585
********************
********************


 60%|██████    | 60/100 [06:06<04:11,  6.28s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.864
F1 Score: 0.8561776061776061
Distribution:  0.5316628701594532
Imp features:  Index(['ATP2ter', 'PEt', 'FACOAL245_2', 'HMGCOAtm', 'EX_pe_hs[e]', 'r0488',
       'EX_uri[e]', 'DNDPt9m', 'sink_lnlncacoa[c]', 'HMR_0692'],
      dtype='object')
--------------------
Gain:  0.3325740318906607
--------------------
********************
********************


 61%|██████    | 61/100 [06:13<04:11,  6.44s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.888
F1 Score: 0.9081850533807829
Distribution:  0.6288076588337685
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'biomass_reaction', 'CLS_hs', 'SMS',
       'r0430', 'r1004', 'DGTPtn', 'r1050', 'DTTPtn'],
      dtype='object')
--------------------
Gain:  0.25892080069625756
--------------------
********************
********************


 62%|██████▏   | 62/100 [06:19<04:02,  6.39s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.769
F1 Score: 0.8125257519571487
Distribution:  0.5246067985794013
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'sink_lnlncacoa[c]', 'sink_lnlc[c]', 'r1466',
       'PEt', 'EX_uri[e]', 'r0488', 'GLPASE1', 'EX_mev_R[e]'],
      dtype='object')
--------------------
Gain:  0.24454591577879248
--------------------
********************
********************


 63%|██████▎   | 63/100 [06:25<03:50,  6.22s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.890
F1 Score: 0.8872081552121012
Distribution:  0.5284891165172856
Imp features:  Index(['ATP2ter', 'r0488', 'sink_lnlncacoa[c]', 'EX_mev_R[e]', 'MEV_Rt',
       'HMGCOAtm', 'SMS', 'DNDPt9m', 'FACOAL245_2', 'SPHGNte'],
      dtype='object')
--------------------
Gain:  0.3617157490396926
--------------------
********************
********************


 64%|██████▍   | 64/100 [06:31<03:36,  6.02s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.923
F1 Score: 0.9297068173790181
Distribution:  0.5445736434108527
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'r0488', 'EX_mev_R[e]', 'MEV_Rt', 'PGLYCt',
       'biomass_reaction', 'DTTPtn', 'HMGCOAtm', 'r1466'],
      dtype='object')
--------------------
Gain:  0.3782945736434109
--------------------
********************
********************


 65%|██████▌   | 65/100 [06:36<03:28,  5.96s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.754
F1 Score: 0.74577440496723
Distribution:  0.5667556742323098
Failed to gain:  0.18724966622162886
********************
********************


 66%|██████▌   | 66/100 [06:43<03:25,  6.05s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.916
F1 Score: 0.9271050282653971
Distribution:  0.6137693631669535
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'PEt', 'sink_lnlncacoa[c]', 'r0488',
       'EX_pe_hs[e]', 'LPS2', 'EX_uri[e]', 'EX_mev_R[e]', 'NDPK4'],
      dtype='object')
--------------------
Gain:  0.30189328743545607
--------------------
********************
********************


 67%|██████▋   | 67/100 [06:49<03:22,  6.14s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.914
F1 Score: 0.931197670185657
Distribution:  0.6170018281535649
Imp features:  Index(['ATP2ter', 'HMGCOAtm', 'FACOAL245_2', 'RE3245C', 'r0488', 'r1466',
       'sink_lnlncacoa[c]', 'MEV_Rt', 'EX_Rtotal[e]', 'sink_lnlc[c]'],
      dtype='object')
--------------------
Gain:  0.296617915904936
--------------------
********************
********************


 68%|██████▊   | 68/100 [06:56<03:21,  6.29s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.754
F1 Score: 0.8063439065108513
Distribution:  0.6430594900849859
Failed to gain:  0.11048158640226624
********************
********************


 69%|██████▉   | 69/100 [07:03<03:23,  6.56s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.802
F1 Score: 0.842671194114318
Distribution:  0.5640385301462718
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'sink_lnlncacoa[c]', 'sink_lnlc[c]',
       'HMGCOAtm', 'EX_mev_R[e]', 'MEV_Rt', 'r0488', 'RE0344C', 'r1050'],
      dtype='object')
--------------------
Gain:  0.23760256867641816
--------------------
********************
********************


 70%|███████   | 70/100 [07:10<03:23,  6.78s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.728
F1 Score: 0.7419493344783168
Distribution:  0.653393665158371
Failed to gain:  0.07466063348416285
********************
********************


 71%|███████   | 71/100 [07:18<03:23,  7.02s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.834
F1 Score: 0.8496110630942093
Distribution:  0.5663167938931297
Imp features:  Index(['FACOAL245_2', 'ATP2ter', 'EX_mev_R[e]', 'MEV_Rt', 'r0488', 'HMGCOAtm',
       'sink_lnlncacoa[c]', 'EX_Rtotal[e]', 'EX_uri[e]', 'biomass_reaction'],
      dtype='object')
--------------------
Gain:  0.26765267175572527
--------------------
********************
********************


 72%|███████▏  | 72/100 [07:24<03:13,  6.91s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.851
F1 Score: 0.8877162122544708
Distribution:  0.6058365758754863
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'MEV_Rt', 'EX_uri[e]', 'RE0344C',
       'FACOAL245_2', 'HMGCOAtm', 'EX_mev_R[e]', 'EX_pglyc_hs[e]',
       'EX_sphgn[e]'],
      dtype='object')
--------------------
Gain:  0.24513618677042803
--------------------
********************
********************


 73%|███████▎  | 73/100 [07:31<03:03,  6.80s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.845
F1 Score: 0.8742556917688266
Distribution:  0.6524547803617571
Failed to gain:  0.19293712316968137
********************
********************


 74%|███████▍  | 74/100 [07:37<02:47,  6.45s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.829
F1 Score: 0.8470093177036369
Distribution:  0.5971802618328298
Imp features:  Index(['r1466', 'ATP2ter', 'HMR_0753', 'r1004', 'DNDPt9m', 'r0430',
       'FACOAL245_2', 'EX_uri[e]', 'ARTFR12', 'URIt'],
      dtype='object')
--------------------
Gain:  0.2319570325612622
--------------------
********************
********************


 75%|███████▌  | 75/100 [07:43<02:39,  6.38s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.637
F1 Score: 0.6329323931978432
Distribution:  0.6230516817063166
Failed to gain:  0.013945857260049266
********************
********************


 76%|███████▌  | 76/100 [07:49<02:29,  6.24s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.845
F1 Score: 0.8355371900826446
Distribution:  0.5297549591598599
Imp features:  Index(['FACOAL245_2', 'ATP2ter', 'RTOTALt', 'EX_mev_R[e]', 'HMGCOAtm', 'LPS2',
       'r0488', 'r1466', 'EX_Rtotal[e]', 'MEV_Rt'],
      dtype='object')
--------------------
Gain:  0.3154414624659666
--------------------
********************
********************


 77%|███████▋  | 77/100 [07:55<02:21,  6.16s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.822
F1 Score: 0.8354700854700855
Distribution:  0.5467128027681661
Imp features:  Index(['ATP2ter', 'EX_mev_R[e]', 'MEV_Rt', 'FACOAL245_2', 'HMGCOAtm', 'r0488',
       'sink_lnlncacoa[c]', 'DGTPtn', 'sink_lnlc[c]', 'RTOTALt'],
      dtype='object')
--------------------
Gain:  0.2756632064590542
--------------------
********************
********************


 78%|███████▊  | 78/100 [08:01<02:16,  6.22s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.709
F1 Score: 0.7269695706544393
Distribution:  0.5608888888888889
Failed to gain:  0.14800000000000002
********************
********************


 79%|███████▉  | 79/100 [08:07<02:07,  6.07s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.863
F1 Score: 0.8709267879813797
Distribution:  0.5594970812752582
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'EX_pe_hs[e]', 'r1050', 'PEt',
       'FACOAL245_2', 'r1466', 'EX_sphgn[e]', 'sink_tag_hs[c]', 'DGAT'],
      dtype='object')
--------------------
Gain:  0.3035473731477324
--------------------
********************
********************


 80%|████████  | 80/100 [08:13<02:01,  6.06s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.784
F1 Score: 0.8160213618157542
Distribution:  0.567584480600751
Imp features:  Index(['ATP2ter', 'RTOTALt', 'sink_lnlncacoa[c]', 'EX_mev_R[e]', 'HMGCOAtm',
       'EX_uri[e]', 'URIt', 'EX_Rtotal[e]', 'MEV_Rt', 'LPS2'],
      dtype='object')
--------------------
Gain:  0.21683354192740922
--------------------
********************
********************


 81%|████████  | 81/100 [08:19<01:55,  6.10s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.906
F1 Score: 0.9133264816718054
Distribution:  0.577123695976155
Imp features:  Index(['ATP2ter', 'sink_lnlncacoa[c]', 'FACOAL245_2', 'EX_mev_R[e]', 'MEV_Rt',
       'HMGCOAtm', 'sink_lnlc[c]', 'NDPK4', 'r0431', 'r0430'],
      dtype='object')
--------------------
Gain:  0.32861400894187776
--------------------
********************
********************


 82%|████████▏ | 82/100 [08:25<01:52,  6.23s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.745
F1 Score: 0.7739837398373984
Distribution:  0.5621274644658414
Failed to gain:  0.18294360385144426
********************
********************


 83%|████████▎ | 83/100 [08:32<01:47,  6.32s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.856
F1 Score: 0.8637372392365734
Distribution:  0.5227592679493196
Imp features:  Index(['ATP2ter', 'RTOTALt', 'LPS2', 'EX_Rtotal[e]', 'sink_lnlncacoa[c]',
       'r1050', 'DNDPt9m', 'HMGCOAtm', 'URIt', 'EX_uri[e]'],
      dtype='object')
--------------------
Gain:  0.333176912247771
--------------------
********************
********************


 84%|████████▍ | 84/100 [08:38<01:40,  6.29s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.803
F1 Score: 0.8621047174701918
Distribution:  0.7001482579688658
Failed to gain:  0.10266864343958493
********************
********************


 85%|████████▌ | 85/100 [08:43<01:29,  5.97s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.715
F1 Score: 0.7486437613019892
Distribution:  0.6372950819672131
Failed to gain:  0.07786885245901642
********************
********************


 86%|████████▌ | 86/100 [08:50<01:24,  6.07s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.758
F1 Score: 0.7844217151848938
Distribution:  0.5004420866489832
Imp features:  Index(['ATP2ter', 'EX_pe_hs[e]', 'sink_lnlncacoa[c]', 'DGAT', 'PEt', 'DTTPtn',
       'EX_Rtotal[e]', 'sink_lnlc[c]', 'LPS2', 'RTOT_3'],
      dtype='object')
--------------------
Gain:  0.2572944297082228
--------------------
********************
********************


 87%|████████▋ | 87/100 [08:56<01:19,  6.15s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.816
F1 Score: 0.8479467258601554
Distribution:  0.613666815542653
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'URIt', 'MEV_Rt', 'sink_lnlncacoa[c]',
       'EX_uri[e]', 'r0488', 'HMGCOAtm', 'EX_Rtotal[e]', 'biomass_reaction'],
      dtype='object')
--------------------
Gain:  0.2027690933452434
--------------------
********************
********************


 88%|████████▊ | 88/100 [09:03<01:14,  6.23s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.790
F1 Score: 0.7993839045051984
Distribution:  0.5982286634460547
Failed to gain:  0.19202898550724634
********************
********************


 89%|████████▉ | 89/100 [09:09<01:08,  6.20s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.665
F1 Score: 0.6878775674587193
Distribution:  0.6393939393939394
Failed to gain:  0.02510822510822508
********************
********************


 90%|█████████ | 90/100 [09:15<01:03,  6.33s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.857
F1 Score: 0.8847708894878706
Distribution:  0.5924528301886792
Imp features:  Index(['ATP2ter', 'MEV_Rt', 'sink_lnlncacoa[c]', 'FACOAL245_2', 'HMGCOAtm',
       'r0488', 'RTOTALt', 'r1466', 'sink_coa[c]', 'r0431'],
      dtype='object')
--------------------
Gain:  0.26415094339622647
--------------------
********************
********************


 91%|█████████ | 91/100 [09:21<00:55,  6.22s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.806
F1 Score: 0.8088721094856065
Distribution:  0.5832535885167464
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'sink_lnlncacoa[c]', 'r1050', 'CLS_hs',
       'sink_coa[c]', 'HMR_0753', 'EX_pglyc_hs[e]', 'SPHGNte', 'DATPtn'],
      dtype='object')
--------------------
Gain:  0.2229665071770336
--------------------
********************
********************


 92%|█████████▏| 92/100 [09:27<00:49,  6.21s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.799
F1 Score: 0.8018867924528301
Distribution:  0.5227817745803357
Imp features:  Index(['ATP2ter', 'LPS2', 'r0488', 'FACOAL245_2', 'MEV_Rt', 'EX_Rtotal[e]',
       'EX_sphgn[e]', 'EX_mev_R[e]', 'sink_lnlncacoa[c]', 'HMR_0692'],
      dtype='object')
--------------------
Gain:  0.275779376498801
--------------------
********************
********************


 93%|█████████▎| 93/100 [09:33<00:42,  6.13s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.678
F1 Score: 0.7025843188786685
Distribution:  0.5774514448128849
Failed to gain:  0.10090004737091429
********************
********************


 94%|█████████▍| 94/100 [09:40<00:36,  6.13s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.810
F1 Score: 0.8445121951219513
Distribution:  0.6442953020134228
Failed to gain:  0.16554809843400453
********************
********************


 95%|█████████▌| 95/100 [09:45<00:30,  6.07s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.816
F1 Score: 0.7903225806451614
Distribution:  0.5305359368508518
Imp features:  Index(['FACOAL245_2', 'r1466', 'ATP2ter', 'HMR_0692', 'sink_lnlncacoa[c]',
       'DTTPtn', 'HMGCOAtm', 'PGLYCt', 'MEV_Rt', 'EX_pglyc_hs[e]'],
      dtype='object')
--------------------
Gain:  0.28583298712089733
--------------------
********************
********************


 96%|█████████▌| 96/100 [09:52<00:24,  6.11s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.696
F1 Score: 0.7025210084033613
Distribution:  0.5601374570446735
Failed to gain:  0.13573883161512024
********************
********************


 97%|█████████▋| 97/100 [09:57<00:18,  6.02s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.774
F1 Score: 0.7219171243135297
Distribution:  0.5474452554744526
Imp features:  Index(['ATP2ter', 'FACOAL245_2', 'EX_uri[e]', 'RE0344C', 'LPS2', 'PEt',
       'sink_coa[c]', 'URIt', 'r0408', 'r0407'],
      dtype='object')
--------------------
Gain:  0.22668288726682884
--------------------
********************
********************


 98%|█████████▊| 98/100 [10:04<00:12,  6.20s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.635
F1 Score: 0.6946502057613169
Distribution:  0.6625676340383669
Failed to gain:  -0.02754549926217409
********************
********************


 99%|█████████▉| 99/100 [10:10<00:06,  6.10s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.892
F1 Score: 0.8875838926174497
Distribution:  0.5291985501409585
Imp features:  Index(['ATP2ter', 'r1466', 'sink_lnlncacoa[c]', 'FACOAL245_2', 'SPHGNte',
       'URIt', 'sink_lnlc[c]', 'SMS', 'EX_pe_hs[e]', 'r0430'],
      dtype='object')
--------------------
Gain:  0.3628674989931534
--------------------
********************
********************


100%|██████████| 100/100 [10:16<00:00,  6.17s/it]

Training accuracy on selected features: 1.000
Testing accuracy on selected features: 0.828
F1 Score: 0.8621526841111411
Distribution:  0.6200269723533378
Imp features:  Index(['ATP2ter', 'HMGCOAtm', 'FACOAL245_2', 'r0488', 'MEV_Rt', 'r1466',
       'EX_mev_R[e]', 'PEt', 'DTTPtn', 'biomass_reaction'],
      dtype='object')
--------------------
Gain:  0.20768712070128115
--------------------
********************
********************





In [139]:
print('Average Accuracy score: {}'.format(np.mean(accuracy_tracker)))
print('Average F1 score: {}'.format(np.mean(f1_tracker)))
print('Average validation distribution: {}'.format(np.mean(val_distribution_tracker)))
print('Average precisions score: {}'.format(np.mean(precision_tracker)))
print('Average recall score: {}'.format(np.mean(recall_tracker)))

Average Accuracy score: 0.8069434975573351
Average F1 score: 0.8201534111475917
Average validation distribution: 0.5656316648574554
Average precisions score: 0.8561159445851869
Average recall score: 0.7979496544539648


In [146]:
# Top important reactions
average_rank = pd.DataFrame({'rank': pd.concat(imp_df).groupby('reactions')['rank'].mean()}).reset_index()
average_rank = average_rank.sort_values(by='rank').head(20).reset_index(drop=True)
average_rank.head(20)

Unnamed: 0,reactions,rank
0,ATP2ter,1.596774
1,FACOAL245_2,6.564516
2,sink_lnlncacoa[c],7.5
3,HMGCOAtm,11.66129
4,MEV_Rt,14.196721
5,EX_mev_R[e],14.508197
6,r0488,15.387097
7,EX_uri[e],16.603448
8,EX_Rtotal[e],17.311475
9,r1466,17.583333


In [145]:
pd.DataFrame(imp_list)[0].value_counts().head(20)

ATP2ter              62
HMGCOAtm             62
FACOAL245_2          62
sink_lnlncacoa[c]    62
r0430                62
r0488                62
CLS_hs               61
DATPtn               61
DNDPt9m              61
EX_mev_R[e]          61
r1004                61
EX_sphgn[e]          61
RTOTALt              61
MEV_Rt               61
EX_Rtotal[e]         61
URIt                 61
r1050                61
EX_pglyc_hs[e]       60
r1466                60
LPS2                 60
Name: 0, dtype: int64