In [1]:
import pandas as pd
import numpy as np
from math import log
import cobra
import re

from troppo.omics.readers.generic import TabularReader
from troppo.methods_wrappers import ModelBasedWrapper, ReconstructionWrapper
from troppo.omics.integration import ContinuousScoreIntegrationStrategy, CustomSelectionIntegrationStrategy
from troppo.methods.reconstruction.gimme import GIMME, GIMMEProperties
from cobamp.utilities.parallel import batch_run
from copy import deepcopy


The wrappers.external_wrappers module will be deprecated in a future release in favour of the wrappers module. 
    Available ModelObjectReader classes can still be loaded using cobamp.wrappers.<class>. An appropriate model 
    reader can also be created using the get_model_reader function on cobamp.wrappers
  reader can also be created using the get_model_reader function on cobamp.wrappers''')


# Exercise 2

We have shown several types of omics data which can be used to reconstruct a tissue specific model. For this exercise, we will use the breast cancer cell lines present in the CCLE panel.

a) Select the 'ACH-000019', 'ACH-000028', 'ACH-000349' samples. With them, reconstruct a tissue model for each with the FastCORE algorithm. Perform FBA and pFBA for all the 3 models reconstructed. Highlight the main differences between them.

Define the parsing rules for the GPRs that will be used later on.

In [2]:
patt = re.compile('__COBAMPGPRDOT__[0-9]{1}')
replace_alt_transcripts = lambda x: patt.sub('', x)

#### Read model and omics data

You have to download the .csv file containing the gene expression for breast cancer cell lines present in the CCLE panel. The nomenclature has been normalized for what is found in the metabolic model of this exercise.

In [4]:
model = cobra.io.load_matlab_model('data/redHUMAN_recon2_smin.mat')
model

This model seems to have metCharge instead of metCharges field. Will use metCharge for what metCharges represents.
No defined compartments in model redHUMAN_recon2_smin_02Sep2019_135437. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, l, m, n, r, x


0,1
Name,redHUMAN_recon2_smin_02Sep2019_135437
Memory address,1fa00d22948
Number of metabolites,469
Number of reactions,1396
Number of genes,699
Number of groups,49
Objective expression,1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"c, m, x, e, r, l, n"


For future usage, note that the dataset that will be used as input in *Troppo* needs to have the samples as rows and gene IDs in the columns.

In [5]:
omics_data = pd.read_csv('data/CCLE_breast_cancer_expression.csv', index_col=0)  
omics_data = omics_data.loc[['ACH-000019', 'ACH-000028', 'ACH-000349']]
omics_data

Unnamed: 0_level_0,10165,6514,51557,47,6563,3421,6898,760,9123,501,...,594,4728,8781,39,2639,5160,4724,7352,3945,2876
DepMap_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACH-000019,4.358256,0.0,0.014355,6.361944,0.097611,6.730776,0.056584,4.918863,4.339137,5.962086,...,3.161888,7.469886,0.0,5.056584,5.475085,7.593951,6.753952,0.678072,1.550901,0.650765
ACH-000028,4.808385,0.0,0.0,6.352441,0.070389,6.647171,0.214125,3.030336,3.320485,6.005625,...,2.869871,7.241173,0.0,5.128458,4.892877,7.466546,6.880808,0.895303,2.014355,0.748461
ACH-000349,3.753818,0.0,0.0,6.831624,0.432959,5.604368,1.941106,2.485427,1.918386,6.485749,...,3.50716,7.215387,1.169925,5.549669,5.663345,7.356496,6.09508,1.863938,2.127633,1.02148


In [6]:
omics_container = TabularReader(path_or_df=omics_data, nomenclature='entrez_id', omics_type='transcriptomics').to_containers()
omics_container

[<troppo.omics.core.OmicsContainer at 0x1fa10d89048>,
 <troppo.omics.core.OmicsContainer at 0x1fa10d89248>,
 <troppo.omics.core.OmicsContainer at 0x1fa10d89688>]

In [7]:
model_wrapper = ReconstructionWrapper(model=model, ttg_ratio=9999, gpr_gene_parse_function=replace_alt_transcripts)
model_wrapper



<troppo.methods_wrappers.ReconstructionWrapper at 0x1fa10d89e08>

Furthermore, is possible to reconstruct the model based on the integration results.

In [8]:
def sbml_model_reconstruction(model_template: cobra.Model, results):
    """
    This function is used to reconstruct the model based on the integration results.

    Parameters
    ----------
    model_template: cobra.Model
        The COBRA model template.
    sample: str
        The sample name.
    integration_result_dict: dict
        The integration results.
    """
    temp_model = deepcopy(model_template)

    objective = 'biomass'
    temp_model.objective = objective  

    
    if isinstance(results, list):
        
        reactions_to_deactivate = [reaction for reaction in temp_model.reactions if reaction not in [temp_model.reactions[reaction_idx] for reaction_idx in results] if reaction.id != 'biomass']
        
        temp_model.remove_reactions(reactions_to_deactivate, remove_orphans=True)
        
    if isinstance(results, dict):
        
        reactions_to_deactivate = [reaction for reaction, value in results.items() if value is False if reaction != 'biomass']
                
        temp_model.remove_reactions(reactions_to_deactivate, remove_orphans=True)
        
    temp_model.id = 'Reconstructed Model'
    print('Model Objective:', temp_model.objective)
    print('Objective Production:', temp_model.optimize())
    print(f'Model reconstruction finished.')
    
    return temp_model

In [9]:
model.reactions.get_by_id('biomass')

0,1
Reaction identifier,biomass
Name,
Memory address,0x1fa10999bc8
Stoichiometry,0.505626 ala_L_c + 0.35926 arg_L_c + 0.279425 asn_L_c + 0.352607 asp_L_c + 20.704451 atp_c + 0.020401 chsterol_c + 0.011658 clpn_hs_c + 0.039036 ctp_c + 0.046571 cys_L_c + 0.013183 datp_n +...  0.505626 L-alanine + 0.35926 L-argininium(1+) + 0.279425 L-asparagine + 0.352607 L-aspartate(1-) + 20.704451 ATP + 0.020401 cholesterol + 0.011658 cardiolipin + 0.039036 CTP + 0.046571 L-cysteine +...
GPR,
Lower bound,0.0
Upper bound,0.03536465206938496


Troppo allows the integration methods using `run_from_omics` method from the `ReconstructionWrapper` class.
Below is an example on how to use the `run_from_omics` method for the FastCore algorithm.

In [10]:
threshold = 5 * log(2)

In [11]:
parameters = {'threshold': threshold, 'reconstruction_wrapper': model_wrapper, 'algorithm': 'fastcore'}

In [14]:
def reconstruction_function_fastcore(omics_container, parameters: dict):

    protected_reactions = ['biomass']

    def integration_fx(reaction_map_scores):
        return [[k for k, v in reaction_map_scores.get_scores().items() if
                 (v is not None and v > threshold) or k in protected_reactions]]
    
    threshold, rec_wrapper, method = [parameters[parameter] for parameter in
                                      ['threshold', 'reconstruction_wrapper', 'algorithm']]
    AND_OR_FUNCS = (min, sum)    

    if method == 'fastcore':
        return rec_wrapper.run_from_omics(omics_data=omics_container, algorithm=method, and_or_funcs=AND_OR_FUNCS,
                                          integration_strategy=('custom', [integration_fx]), solver = 'GLPK')

In [15]:
batch_fastcore_res = batch_run(reconstruction_function_fastcore, omics_container, parameters, threads=3)

In [16]:
results = {c.condition: res for c, res in zip(omics_container, batch_fastcore_res)}
print(results.keys())
results['ACH-000019']

dict_keys(['ACH-000019', 'ACH-000028', 'ACH-000349'])


{'2AMACHYD': True,
 '2AMADPTm': True,
 '2DR1PP': True,
 '2HCO3_NAt': False,
 '2OXOADOXm': True,
 '2OXOADPTm': True,
 '34HPPOR': True,
 '3AIBt': True,
 '3AIBTm': True,
 '3AIBtm': True,
 '3HAO': True,
 '3HBCOAHLm': True,
 '3HKYNAKGAT': True,
 '3MOBt2im': False,
 '3MOPt2im': True,
 '3SALAASPm': True,
 '3SALATAi': True,
 '3SALATAim': True,
 '3SPYRSP': True,
 '3SPYRSPm': True,
 '4ABUTtm': True,
 '4MOPt2im': False,
 '5MTHFt': True,
 '5MTHFt2': True,
 'AACOAT': True,
 'AACTOOR': True,
 'AACTtm': True,
 'AASAD3m': True,
 'AATAi': True,
 'ABTArm': True,
 'ABUTD': True,
 'ABUTt2r': False,
 'ABUTt4_2_r': True,
 'ACACT10m': True,
 'ACACT1r': True,
 'ACACt2': True,
 'ACACt2m': False,
 'ACETONEt2': True,
 'ACETONEt2m': True,
 'ACITL': True,
 'ACOAD10m': True,
 'ACOAD9m': True,
 'ACOAHi': False,
 'ACONT': True,
 'ACONTm': True,
 'ACS': True,
 'ACSm': False,
 'ACt2m': False,
 'ACt2r': True,
 'ACTLMO': False,
 'ACTNMO': False,
 'ACYP': True,
 'ADCim': True,
 'ADEt': True,
 'ADK1': True,
 'ADK1m': True,

In [17]:
reconstructed_models = {c: sbml_model_reconstruction(model, res) for c, res in results.items()}
reconstructed_models

  warn("need to pass in a list")


Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.035 at 0x1fa0214a2c8>
Model reconstruction finished.
Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.035 at 0x1fa02f8fc88>
Model reconstruction finished.
Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.035 at 0x1fa035efd88>
Model reconstruction finished.


{'ACH-000019': <Model Reconstructed Model at 0x1fa10094608>,
 'ACH-000028': <Model Reconstructed Model at 0x1fa024bc888>,
 'ACH-000349': <Model Reconstructed Model at 0x1fa10cc8bc8>}

In [18]:
for sample, model in reconstructed_models.items():
    print(f'FBA: Model from {sample}')
    print(model.summary(model.optimize()))
    print()
    print(f'pFBA: Model from {sample}')
    print(model.summary(cobra.flux_analysis.pfba(model)))
    print('------------------------------------------------------------')
    print('\n')
    print('\n')   

FBA: Model from ACH-000019
Objective
1.0 biomass = 0.035364652069385

Uptake
------
Metabolite   Reaction      Flux  C-Number C-Flux
   arg_L_e EX_arg_L_e   0.02729         6  4.95%
   asn_L_e EX_asn_L_e   0.01511         4  1.83%
   asp_L_e EX_asp_L_e  0.009781         4  1.18%
    chol_e  EX_chol_e  0.006081         5  0.92%
   glc_D_e   EX_glc_e    0.2648         6 48.02%
   gln_L_e EX_gln_L_e   0.03021         5  4.57%
   his_L_e EX_his_L_e   0.07917         6 14.36%
   ile_L_e EX_ile_L_e   0.04197         6  7.61%
   leu_L_e EX_leu_L_e   0.02362         6  4.28%
   lys_L_e EX_lys_L_e   0.02094         6  3.80%
   met_L_e EX_met_L_e  0.007058         5  1.07%
      o2_e    EX_o2_e    0.1425         0  0.00%
   phe_L_e EX_phe_L_e   0.01482         9  4.03%
      pi_e    EX_pi_e    0.0439         0  0.00%
   thr_L_e EX_thr_L_e   0.01106         4  1.34%
   trp_L_e EX_trp_L_e 0.0004706        11  0.16%
   val_L_e EX_val_L_e   0.01247         5  1.88%

Secretion
---------
Metabolite   

b) Try to reproduce the Warburg Effect (if already not present). Use the `escher` library to view the metabolic pathway.

Hint: Change the uptake of the oxygen drain to a small value. Also, our model is the redHUMAN reconstruction based on Recon2. However, some of the reaction names overlap with Recon1 and so we will use the central carbon metabolism map for that model.

In [19]:
model_1, model_2, model_3 = [reconstructed_models[sample] for sample in ['ACH-000019', 'ACH-000028', 'ACH-000349']]

In [20]:
import escher

builder = escher.Builder(map_name='RECON1.Glycolysis TCA PPP')
with model_3 as model_:
    builder.model_ = model_
    builder.reaction_data = cobra.flux_analysis.pfba(model_).fluxes
builder

Downloading Map from https://escher.github.io/1-0-0/6/maps/Homo%20sapiens/RECON1.Glycolysis%20TCA%20PPP.json


Builder(reaction_data={'2AMADPTm': 0.0, '2DR1PP': 0.0, '2HCO3_NAt': 0.0, '2OXOADOXm': 0.009401758287453805, '2…

In [21]:
builder = escher.Builder(map_name='RECON1.Glycolysis TCA PPP')
with model_3 as model_:
    print(model_.reactions.get_by_id('EX_o2_e').bounds)
    model_.reactions.get_by_id('EX_o2_e').lower_bound = -0.1
    builder.model_ = model_
    builder.reaction_data = cobra.flux_analysis.pfba(model_).fluxes
builder

Downloading Map from https://escher.github.io/1-0-0/6/maps/Homo%20sapiens/RECON1.Glycolysis%20TCA%20PPP.json
(-2.0, 0.0)


Builder(reaction_data={'2AMADPTm': 0.0, '2DR1PP': 0.0, '2HCO3_NAt': 0.0, '2OXOADOXm': 0.018923179078202026, '2…

The Warburg Effect describes how cancer cells prefer glycolysis over oxidative phosphorylation for ATP production, even in the presence of oxygen. This is illustrated in the second image by increased glycolytic activity and reduced oxygen uptake, as shown by the high flux through glycolysis (red arrows) and decreased TCA cycle activity (gray arrows). In contrast, the first image shows normal cells with lower glycolytic activity (magenta arrows) and active oxidative phosphorylation, indicated by normal oxygen uptake. This metabolic reprogramming in cancer cells supports rapid proliferation by providing biosynthetic intermediates and reducing reliance on oxygen-dependent ATP production

c) Select a random sample. Reconstruct 3 models using fastcore with different thresholds. What are the main differences?


In [22]:
thresholds = [5 * log(2), 5, 10, 20]
thresholds

[3.4657359027997265, 5, 10, 20]

In [23]:
single_sample = omics_container[0]
single_sample

<troppo.omics.core.OmicsContainer at 0x1fa10d89048>

In [25]:
protected_reactions = ['biomass']

models = {}
for thres in thresholds:
    
    def integration_fx(reaction_map_scores):
            return [[k for k, v in reaction_map_scores.get_scores().items() if
                     (v is not None and v > thres) or k in protected_reactions]]
    
    models[f'Model_Thres_{round(thres,2)}'] = model_wrapper.run_from_omics(omics_data=single_sample, algorithm='fastcore', and_or_funcs= (min, sum) , integration_strategy=('custom', [integration_fx]), solver = 'GLPK')

J size517
[   4    9   12   16   17   24   30   39   40   41   45   51   58   59
   60   64   67   68   70   72   74   76   83   86   87   89   90   91
   92   93   94   95   97  101  102  108  109  110  128  129  130  137
  138  139  140  141  142  143  147  148  151  152  158  162  163  164
  165  166  167  168  231  233  238  245  248  249  253  264  274  275
  276  277  278  279  280  283  289  290  292  303  308  309  310  312
  313  314  315  317  332  337  342  345  346  347  350  351  353  361
  364  369  372  373  375  384  386  387  389  396  401  427  437  438
  439  440  444  445  446  447  448  450  452  455  456  457  460  463
  467  468  470  473  477  478  480  482  485  496  497  498  500  504
  505  507  508  509  512  513  515  516  517  523  525  526  527  528
  529  531  533  534  540  542  556  565  568  575  576  577  580  588
  593  595  596  598  605  613  618  619  635  653  659  669  672  793
  794  795  796  797  798  799  800  801  802  803  804  805  806  

In [26]:
models.keys()

dict_keys(['Model_Thres_3.47', 'Model_Thres_5', 'Model_Thres_10', 'Model_Thres_20'])

In [27]:
model = cobra.io.load_matlab_model('data/redHUMAN_recon2_smin.mat')

This model seems to have metCharge instead of metCharges field. Will use metCharge for what metCharges represents.
No defined compartments in model redHUMAN_recon2_smin_02Sep2019_135437. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, l, m, n, r, x


In [28]:
reconstructed_models = {c: sbml_model_reconstruction(model, res) for c, res in models.items()}
reconstructed_models

  warn("need to pass in a list")


Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.035 at 0x1fa062c8f88>
Model reconstruction finished.
Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.035 at 0x1fa065eaf48>
Model reconstruction finished.
Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.004 at 0x1fa0923cf48>
Model reconstruction finished.
Model Objective: Maximize
1.0*biomass - 1.0*biomass_reverse_01e59
Objective Production: <Solution 0.004 at 0x1fa0974cc08>
Model reconstruction finished.


{'Model_Thres_3.47': <Model Reconstructed Model at 0x1fa0cd29848>,
 'Model_Thres_5': <Model Reconstructed Model at 0x1fa063d03c8>,
 'Model_Thres_10': <Model Reconstructed Model at 0x1fa05c66e08>,
 'Model_Thres_20': <Model Reconstructed Model at 0x1fa06a77608>}

In [29]:
for sample, model in reconstructed_models.items():
    print(f'FBA: Model from {sample}')
    print(model.summary(model.optimize()))
    print()
    print(f'pFBA: Model from {sample}')
    print(model.summary(cobra.flux_analysis.pfba(model)))
    print('------------------------------------------------------------')
    print('\n')
    print('\n')   

FBA: Model from Model_Thres_3.47
Objective
1.0 biomass = 0.035364652069385

Uptake
------
Metabolite   Reaction      Flux  C-Number C-Flux
   arg_L_e EX_arg_L_e   0.02729         6  4.95%
   asn_L_e EX_asn_L_e   0.01511         4  1.83%
   asp_L_e EX_asp_L_e  0.009781         4  1.18%
    chol_e  EX_chol_e  0.006081         5  0.92%
   glc_D_e   EX_glc_e    0.2648         6 48.02%
   gln_L_e EX_gln_L_e   0.03021         5  4.57%
   his_L_e EX_his_L_e   0.07917         6 14.36%
   ile_L_e EX_ile_L_e   0.04197         6  7.61%
   leu_L_e EX_leu_L_e   0.02362         6  4.28%
   lys_L_e EX_lys_L_e   0.02094         6  3.80%
   met_L_e EX_met_L_e  0.007058         5  1.07%
      o2_e    EX_o2_e    0.1425         0  0.00%
   phe_L_e EX_phe_L_e   0.01482         9  4.03%
      pi_e    EX_pi_e    0.0439         0  0.00%
   thr_L_e EX_thr_L_e   0.01106         4  1.34%
   trp_L_e EX_trp_L_e 0.0004706        11  0.16%
   val_L_e EX_val_L_e   0.01247         5  1.88%

Secretion
---------
Metabol

In [30]:
for sample, model in reconstructed_models.items():
    print(f'Sample {sample}', '| reactions ->', len(model.reactions), ' | genes ->', len(model.genes), ' | metabolites ->', len(model.metabolites), ' | biomass ->', model.summary()._objective_value , '|')

Sample Model_Thres_3.47 | reactions -> 1038  | genes -> 646  | metabolites -> 397  | biomass -> 0.035364652069385 |
Sample Model_Thres_5 | reactions -> 608  | genes -> 572  | metabolites -> 358  | biomass -> 0.035364652069385 |
Sample Model_Thres_10 | reactions -> 337  | genes -> 515  | metabolites -> 298  | biomass -> 0.0037252660080568222 |
Sample Model_Thres_20 | reactions -> 222  | genes -> 408  | metabolites -> 229  | biomass -> 0.0037252660080568222 |
