In [1]:
import sys;sys.path.extend(['E:\\reconstruction', 'E:\\cobamp', 'E:\\reconstruction\\src', 'E:\\cobamp\\src', 'E:/reconstruction'])
import cobra
import framed
import cobamp
import pandas as pd
import numpy as np
import pickle
import scipy as sci

# for testing the algorithms
from cobamp.wrappers import MatFormatReader
from cobamp.wrappers import COBRAModelObjectReader
from troppo.methods.imat import IMAT
from troppo.methods.gimme import GIMME
from troppo.methods.tINIT import tINIT
from troppo.methods.fastcore import FASTcore
from troppo.reconstruction_properties import FastcoreProperties, tINITProperties, GIMMEProperties, IMATProperties
from troppo.utilities.statistics import normalize, z_score

Here we are going to import the .mat file used for testing in the MATLAB COBRA package and will serve as basis for all the analysis and load some help functions to process the data

In [2]:
def load_glial_data(path):
    data = pd.read_csv(path, index_col=0)
    normalized_data = normalize(data)
    z = None

    return normalized_data, z

def get_index_core(reactions_ids, data, threshold):
    rx_names = data.index[data['0'] > threshold].tolist()
    index_core = [np.where(reactions_ids == r)[0][0] for r in rx_names]
    return index_core, rx_names

def get_index_values():
    pass

In [3]:
mat = sci.io.loadmat('./tests/FastCoreTest.mat')['ConsistentRecon2']
model_cobra = cobra.io.load_matlab_model('./tests/FastCoreTest.mat')

Academic license - for non-commercial use only


In [22]:
len(model_cobra.reactions)

5317

Since the .mat file contains all the information needed for the TROPPO package to work (the S matrix, lower and upper bounds), we are going to parse it

In [4]:
model = MatFormatReader(mat)
S = model.S
lb, ub = model.get_model_bounds(False, True)
rx_names = model.get_reaction_and_metabolite_ids()[0]

Since the algorithms need expression data to begin the pruning of the original model, we are going to import data from glial cells (complete the rest of the story here).
We are also going to make different inputs accordingly to the needs of the algorithms that are goint to be tested.

In [5]:
original_data,_ = load_glial_data('./tests/glioma_data/grade2_calls.csv') # import of the omics data before/after preprocessing
core, names_core = get_index_core(np.array(model.r_ids), original_data, 0.9) # this is for the FASTCORE algorithm
reactions_scores = original_data.fillna(0) # this is the input for tINIT; reactions with NaN for score were replaced with the score of 0
gimme_data = original_data.fillna(-1) #GIMME; reactions without expression values are not considered for the algorithm
iMAT_data = original_data.fillna((0.2+0.5)/2)#iMAT; fillna is used with the mean of both thresholds
# CORDA data

From here, we are going to test the already implement algorithms for tissue reconstruction. 

First, we are going to be testing the FASTcore algorithm

In [6]:
fastcore = FASTcore(S, lb, ub, FastcoreProperties(solver = 'CPLEX', core=core))
tissue_reactions = fastcore.fastcore()

J size209
[  12   43   64  155  156  185  204  244  253  254  255  256  257  258
  259  260  261  262  263  264  265  266  267  268  269  270  278  279
  287  315  317  354  402  405  483  487  488  492  495  498  502  543
  592  642  717  784  942  978  982 1441 1442 1443 1444 1496 1497 1498
 1499 1500 1701 1724 1815 1895 1935 1936 1980 2018 2022 2026 2059 2080
 2081 2114 2118 2122 2126 2129 2157 2162 2169 2185 2218 2219 2224 2257
 2271 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285
 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299
 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313
 2314 2315 2316 2317 2318 2319 2320 2321 2323 2418 2420 2421 2424 2426
 2427 2428 2429 2431 2433 2434 2435 2436 2482 2483 2553 2561 2579 2603
 2613 2619 2621 2626 2630 2644 2645 2712 2718 2728 2731 2732 2736 2755
 2893 2913 2931 2934 2935 2938 2970 2982 3046 3049 3077 3160 3197 3234
 3242 3251 3259 3264 3267 3272 3299 3347 3356 3384 3486 3488 3490 4



-0.02089999999999993
done LP7
LP9
70871.93398691867
done LP9
20 1099
before LP7
LP7
-0.0019000000000000006
done LP7
LP9
625.0
done LP9
20 1181
before LP7
LP7
0.0
done LP7
1 1181
Flipped
before LP7
LP7
-0.0001
done LP7
LP9
10.0
done LP9
1 1183
before LP7
LP7
-0.0001
done LP7
LP9
0.0
done LP9
1 1183
Flipped
before LP7
LP7
0.0
done LP7
1 1183
Error: Global network is not consistent
[46]


In [8]:
model_fastcore = model_cobra.copy() # this is done since it alters the original model_cobra; this way is to guarantee that a new model is changed instead of the original model
r_ids = [r.id for r in model_fastcore.reactions]
model_fastcore.remove_reactions([r_ids[r] for r in tissue_reactions],True) # this is to get the ids of the reactions to be removed in the model; True is to remove the pending genes/metabolites that with the removal of the reaction can no longer be connected in the network

Read LP format model from file C:\Users\Jorge\AppData\Local\Temp\tmpo6p3g58w.lp
Reading time = 0.05 seconds
: 2960 rows, 10634 columns, 42212 nonzeros


With the FASTcore algorithm, we obtained a model with 4134 reactions, 1345 genes and 2458 metabolites, not capable of producing biomass.

In [18]:
print('Genes: ' + str(len(model_fastcore.genes)))
print('Metabolites: ' + str(len(model_fastcore.metabolites)))
print('Reactions: ' + str(len(model_fastcore.reactions)))
print(model_fastcore.objective._get_expression())
print(model_fastcore.optimize())

Genes: 1345
Metabolites: 2458
Reactions: 4134
1.0*biomass_reaction - 1.0*biomass_reaction_reverse_32a6c
<Solution 0.000 at 0x181c3284780>


Now we're going to test the tINIT algorithm. For this case, as an input we are going to only provide reactions scores. The other properties of the algorithm are set to default (including the solver).

In [7]:
tinit = tINIT(S, np.array(lb), np.array(ub),
          tINITProperties(reactions_scores=list(reactions_scores['0']), present_metabolites=[], essential_reactions=[],
                          production_weight=0.5, allow_excretion=False, no_reverse_loops=False, solver = "GUROBI"))
reactions_to_remove_from_the_model = tinit.run_tINIT()

Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1

Consider calling update less frequently.

Optimize a model with 10692 rows, 26156 columns and 54200 nonzeros
Variable types: 18424 continuous, 7732 integer (7732 binary)
Coefficient statistics:
  Matrix range     [3e-03, 1e+03]
  Objective range  [1e-01, 1e+00]
  Bounds range     [1e+00, 1e+03]
  RHS range        [1e+00, 1e+00]
Presolve removed 1112 rows and 2151 columns
Presolve time: 0.23s
Presolved: 9580 rows, 24005 columns, 50512 nonzeros
Variable types: 16551 continuous, 7454 integer (7452 binary)
Presolve removed 6860 rows and 16174 columns
Presolved: 2720 rows, 7831 columns, 23386 nonzeros

Extra 3 simplex iterations after uncrush

Root relaxation: objective -1.055183e+03, 16336 iterations, 0.86 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0 -1055.1826    0  987          - 

In [41]:
reactions_to_remove_from_the_model = np.unique(np.int_(reactions_to_remove_from_the_model)).tolist()
model_tINIT = model_cobra.copy() # this is done since it alters the original model_cobra; this way is to guarantee that a new model is changed instead of the original model
r_ids = [r.id for r in model_tINIT.reactions]
to_remove_ids = [r_ids[r] for r in reactions_to_remove_from_the_model]
model_tINIT.remove_reactions(to_remove_ids,True) # this is to get the ids of the reactions to be removed in the model; True is to remove the pending genes/metabolites that with the removal of the reaction can no longer be connected in the network

Read LP format model from file C:\Users\Jorge\AppData\Local\Temp\tmp_7u2lh6i.lp
Reading time = 0.05 seconds
: 2960 rows, 10634 columns, 42212 nonzeros


In [42]:
print('Genes: ' + str(len(model_tINIT.genes)))
print('Metabolites: ' + str(len(model_tINIT.metabolites)))
print('Reactions: ' + str(len(model_tINIT.reactions)))
print(model_tINIT.objective._get_expression())
print(model_tINIT.optimize())

Genes: 1139
Metabolites: 2185
Reactions: 1915
0
<Solution 0.000 at 0x21c81340d68>


With the tINIT algorithm, we obtained X specific reactions for the data provided, generatung a model with X reactions, Y genes and Z metabolites, capable (or not) of producing biomass.

For the CORDA algorithm, we needed four different types of sets of reactions (input_data3, input_data3_1, input_data3_2, input_data3_3). 

In [1]:
# code for the CORDA test


Regarding the CORDA reconstructed model, it has X reactions, Y genes and Z metabolites, and also capable(or not) of producing biomass

For GIMME, we apply a threshold to the expression values originally from the expression dataset.

In [37]:
# code for the GIMME test
idx_objective = rx_names.index('biomass_reaction')
properties = GIMMEProperties(
	exp_vector=np.array(gimme_data['0']),
    obj_frac=0.8,
    objectives= [{idx_objective:1}],
    preprocess=True,
    flux_threshold=0.8
)
algorithm = GIMME(S, lb.astype(float), ub.astype(float), properties)
model_GIMME = algorithm.run()

'__init__'  2058.51 ms
'__init__'  2857.57 ms
'__init__'  3389.56 ms


In [38]:
model_GIMME_final = model_cobra.copy() # this is done since it alters the original model_cobra; this way is to guarantee that a new model is changed instead of the original model
r_ids = [r.id for r in model_GIMME_final.reactions]
to_remove_ids = [r_ids[r] for r in np.where(model_GIMME==0)[0]]
model_GIMME_final.remove_reactions(to_remove_ids,True) # this is to get the ids of the reactions to be removed in the model; True is to remove the pending genes/metabolites that with the removal of the reaction can no longer be connected in the network

Read LP format model from file C:\Users\Jorge\AppData\Local\Temp\tmpew68h5co.lp
Reading time = 0.05 seconds
: 2960 rows, 10634 columns, 42212 nonzeros


In [39]:
print('1\'s: ' + str(len(np.where(model_GIMME==1)[0])))
print('2\'s: ' + str(len(np.where(model_GIMME==2)[0])))
print('Genes: ' + str(len(model_GIMME_final.genes)))
print('Metabolites: ' + str(len(model_GIMME_final.metabolites)))
print('Reactions: ' + str(len(model_GIMME_final.reactions)))
print(model_GIMME_final.objective._get_expression())
print(model_GIMME_final.optimize())

1's: 5154
2's: 6
Genes: 1895
Metabolites: 2951
Reactions: 5160
1.0*biomass_reaction - 1.0*biomass_reaction_reverse_32a6c
<Solution 3.198 at 0x1fa08044f98>


With GIMME we are capable of obtaining both a flux distribution and a reconstructed model.
For the flux distribution, ...
As for the model, it is comprised with X reactions, Y genes and Z metabolites, capable (or not) of producing biomass.

For the iMAT algorihtm, we have to provide two different thresholds (...).

In [46]:
# code for the iMAT test
properties = IMATProperties(
    exp_vector = np.array(iMAT_data['0'].tolist()), # check what input to here
    exp_thresholds=(0.2,0.5)
)

method = IMAT(S, lb.astype(float), ub.astype(float), properties)
to_remove = np.setdiff1d(list(range(S.shape[1])), method.run())

In [49]:
model_iMAT = model_cobra.copy() # this is done since it alters the original model_cobra; this way is to guarantee that a new model is changed instead of the original model
r_ids = [r.id for r in model_iMAT.reactions]
to_remove_ids = [r_ids[r] for r in to_remove]
model_iMAT.remove_reactions(to_remove_ids,True) # this is to get the ids of the reactions to be removed in the model; True is to remove the pending genes/metabolites that with the removal of the reaction can no longer be connected in the network

Read LP format model from file C:\Users\Jorge\AppData\Local\Temp\tmpjw601stq.lp
Reading time = 0.05 seconds
: 2960 rows, 10634 columns, 42212 nonzeros


In [50]:
print('Genes: ' + str(len(model_iMAT.genes)))
print('Metabolites: ' + str(len(model_iMAT.metabolites)))
print('Reactions: ' + str(len(model_iMAT.reactions)))
print(model_iMAT.objective._get_expression())
print(model_iMAT.optimize())

Genes: 1253
Metabolites: 1829
Reactions: 2569
0
<Solution 0.000 at 0x1fa2238c710>


With iMAT, we obtained a reconstructed model with X reactions, Y genes and Z metabolites, and capable(or not) of producing biomass.

The following table represents the final summary of the tested algorithms.

In [1]:
# code for the table (do this with pandas or markdown)

Since we are implementing the algoirhtms based on the code of COBRA toolbox, the next table is a comparison of speed of execution. The computer used was running Windows 10 version, with a processor i7-3630QM CPU @ 2.40 GHz (8 CPU), 16GB of RAM and Python 3.5.6.

In [2]:
# code for the table with the benchmarks

With this, we can see that the implementation in Pyhton is ...
(mostly should run faster when compared to MATLAB)