In [5]:
import cobra
import importlib
import sys
import pandas as pd
import utils.model_maj as mj
import utils.viz_utils as vu
import utils.Model_correction as mc
from cobra.core.model import Model
from cobra.io import validate_sbml_model

sys.path.append("../../aspmcs/code/compression/")
import efmtool_link_compression

In [22]:
importlib.reload(efmtool_link_compression)

<module 'efmtool_link_compression' from '/home/muller-prokob/Documents/RBA_cutsets/models_analysis/code/../../aspmcs/code/compression/efmtool_link_compression.py'>

### Model loading

#### HCC

In [6]:
HCC, errors = validate_sbml_model("../../models_storage/HCC_corrected.xml")

SBML errors in validation, check error log for details.


Before model reduction : 

* Reactions : 5076
* genes : 2037
* groups : 133
* metabolites : 4339

compressed : 2662

After model reduction :
* Reactions : 4642
* genes : 2037
* groups : 133
* metabolites : 4339

compressed : 2191 reactions

In [4]:
HCC

0,1
Name,INITModel
Memory address,7fa5b4c47df0
Number of metabolites,4339
Number of reactions,5076
Number of genes,2037
Number of groups,133
Objective expression,1.0*biomass_components - 1.0*biomass_components_reverse_c1825
Compartments,"Cytosol, Extracellular, Endoplasmic reticulum, Mitochondria, Peroxisome, Golgi apparatus, Lysosome, Nucleus, Boundary"


### Matching gene IDs from differencial gene expression dataset to model reactions

Each reaction from the SBML model is associated to one or more genes. Some genes are described as being over or underexpressed in HCC cells.
The first step is then to find the reactions in the model that are only regulated by over/underexpressed genes from the dataset.

Under-expressed gene-associated reactions will then be removed from the model.
Over-expressed gene-associated reactions will be stored as positive constraints for the ASPEFM MCS computation.

In [13]:
# Dataset loading

downregulated_entrez_ids = pd.read_csv("../../models_storage/model_related_data/FC_HCC_DOWN.csv", sep=",")[12:].iloc[:,2].tolist()
upregulated_entrez_ids = pd.read_csv("../../models_storage/model_related_data/FC_HCC_UP.csv", sep=",")[12:].iloc[:,2].tolist()

In [23]:
# Functions definition : getting gene-associated reactions

def get_gene_associated_reaction(genes_IDs:list[str], model:Model) :
    """
    Function used to get all reactions associated to the given list of gene IDs and only to gene IDs contained in the given list
    gene_IDs:list[str]
    model:cobra.core.model.Model
    """
    # Definition of local variables to return
    gene_associated_reactions = {}
    n_errors = 0
    
    # Iterating over all model reactions. For each reaction, the function checks if their associated gene(s) are included in the list of genes.
    for reaction in model.reactions:
        associated = True
        try:
            model_reaction_genes = [g.annotation['ncbigene'] for g in reaction.genes]
        except KeyError:
            n_errors += 1
        
        if len(model_reaction_genes) > 0:
            for gene in model_reaction_genes:
                if gene not in genes_IDs:
                    associated = False
        else:
            associated = False

        # the boolean associated will only stay True if all the genes associated to a reaction are found in the given gene list.        
        if associated == True:
            gene_associated_reactions[reaction.id] = model_reaction_genes
    
    return gene_associated_reactions, n_errors

# Safely removing down-regulated genes-associated reactions :

def remove_and_check(model:Model, reactions:list[str], threshold = float(0.2), objective = "biomass_components"):

    reactions_removed = 0
    total_reactions = len(reactions)
    model.objective = objective

    for reaction in reactions:
        reaction_obj = model.reactions.get_by_id(reaction)
        model.remove_reactions([reaction_obj])
        new_val = model.optimize().objective_value
        print(f"obj_val : {new_val}; {type(new_val)}")
        if new_val <= threshold:
            model.add_reactions([reaction_obj])
        else:
            reactions_removed += 1
    print(f"{reactions_removed}/{total_reactions} reactions removed.")

def apply_medium(model:Model, medium_file_path:str):
    new_medium = {}
    with open(medium_file_path, "r") as medium_buffer:
        medium_data = medium_buffer.read()
    
    for line in medium_data.split("\n"):
        print(line)
        reaction_id = line.split(";")[0]
        reaction_activity = line.split(";")[2]
        if reaction_activity == "1":
            new_medium[reaction_id] = abs(float(line.split(";")[3]))
        else:
            new_medium[reaction_id] = float(0.0)
    
    model.medium = new_medium

def write_constraint_file(reactions_list, reaction_subset_path, outfile_path):
    # Getting compressed reactions subsets to compress the gene-associated reactions
    with open(reaction_subset_path, "r") as reaction_subset:
        data = reaction_subset.read()
        r_subsets = eval(data)

    # downregulated_reactions_list = [reaction_id for reaction_id in reactions_list]
    compressed_gene_associated_reactions = []
    for compressed_reaction_name, compressed_reaction_data in r_subsets.items():
        associated = True
        #print(f"{compressed_reaction_name} :\n")
        for reaction in compressed_reaction_data["reacs"]:
            if "rev" in reaction :
                reaction = reaction[:-4]
            if reaction not in reactions_list :
                #print(f"{reaction} not in list")
                associated = False

        if associated == True :
            print(f"{compressed_reaction_name} is gene-associated.")
            compressed_gene_associated_reactions.append(compressed_reaction_name)

    
    prefix = ":- not support"
    suffix = ".\n"

    for compressed_reaction_id in compressed_gene_associated_reactions:
        reaction_number = str(compressed_gene_associated_reactions.index(compressed_reaction_id))
        constr_out_str = ""
        constr_out_str += f'{prefix}("mcs_{compressed_reaction_id}"){suffix}'
        constr_out_str += f'target("mcs_{compressed_reaction_id}").'
        with open(outfile_path+reaction_number+"_posconstr.lp4", "w") as constr_out:
            constr_out.write(constr_out_str)

In [11]:
apply_medium(HCC, "../../models_storage/medium_HCC.csv")

Could not identify an external compartment by name and choosing one with the most boundary reactions. That might be complete nonsense or change suddenly. Consider renaming your compartments using `Model.compartments` to fix this.


EX_m4082;(+)-alpha-pinene;;
EX_m4083;(R)-3-hydroxybutanoate;;
EX_m4084;1,1-dichloroethylene;;
EX_m4085;1,2-dibromoethane;;
EX_m4086;10-formyl-THF-glu(7);;
EX_m4087;1-nitronaphthalene;;
EX_m4088;25-hydroxyvitamin D2;;
EX_m4089;2-acetyl-1-alkyl-sn-glycero-3-phosphocholine;;
EX_m4090;2-hydroxybutyrate;;
EX_m4091;4-(n-nitrosomethylamino)-1-(3-pyridyl)-1-butanone;;
EX_m4092;4-nitrophenyl-phosphate;;
EX_m4093;5-alpha-dihydrotestosterone;;
EX_m4094;5-formyl-THF;;
EX_m4095;7,12-dimethylbenz[a]anthracene;;
EX_m4096;acetone;;
EX_m4097;acetylcholine;;
EX_m4098;ADP-glucose;;
EX_m4099;ADP-mannose;;
EX_m4100;aflatoxin B1;;
EX_m4101;albumin;;
EX_m4102;alpha-tocopherol;;
EX_m4103;alpha-tocotrienol;;
EX_m4104;aquacob(III)alamin;;
EX_m4105;arginine;1;-1.5
EX_m4106;aspartate;1;-1.5
EX_m4107;benzo[a]pyrene;;
EX_m4108;beta-carotene;;
EX_m4109;betaine;;
EX_m4110;bilirubin;;
EX_m4111;biotin;;
EX_m4112;bromobenzene;;
EX_m4113;chitin-component;;
EX_m4114;cholate;;
EX_m4115;chondroitin sulfate B - dermatan sulf

In [14]:
# Getting up and downregulated model reactions dictionaries :

upregulated_reactions, up_errors = get_gene_associated_reaction(upregulated_entrez_ids, HCC)
downregulated_reactions, down_errors = get_gene_associated_reaction(downregulated_entrez_ids, HCC)

# Checking numbers :
print(f"Number of reactions being downregulated in the model : {len(downregulated_reactions.keys())}\nNumber of reactions being upregulated in the model : {len(upregulated_reactions.keys())}")

Number of reactions being downregulated in the model : 459
Number of reactions being upregulated in the model : 218


In [15]:
# Removing down-regulated genes-associated reactions from the model.
# The function checks if the removal of the reaction breaks the model. If it does, the reaction is restored.

remove_and_check(HCC, downregulated_reactions.keys())

obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.3572988209138912; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.3572988209138909; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.35729882091389104; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.35729882091389104; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.3572988209138908; <class 'float'>
obj_val : 0.357298820913891; <class 'float'>
obj_val : 0.3572988209138912; <class 'float'>
obj_val : 0.3572988209138909; <class 'float'>
obj_val : 0.35729882091389104; <class 'float'>

In [16]:
HCC.objective = "biomass_components"
HCC.optimize()

Unnamed: 0,fluxes,reduced_costs
EX_m4082,-0.000000,0.0
EX_m4083,0.000000,0.0
EX_m4084,-0.000000,0.0
EX_m4085,-0.000000,0.0
EX_m4086,0.000000,0.0
...,...,...
HMR_3156,20.844229,0.0
HMR_0741,0.357442,0.0
HMR_7599,0.000286,0.0
HMR_3984,97.910511,0.0


In [18]:
vu.print_exchanges(HCC, "all")


##########
INTAKES :

EX_m4239 : -21         ub : 1000.0	---		metabolites : 	 id : m4239 --- metabolite name : CO2 ; id : m4239
EX_m4171 : -20         ub : 1000.0	---		metabolites : 	 id : m4171 --- metabolite name : O2 ; id : m4171
EX_m4106 : -2          ub : 1000.0	---		metabolites : 	 id : m4106 --- metabolite name : aspartate ; id : m4106
EX_m4184 : -2          ub : 1000.0	---		metabolites : 	 id : m4184 --- metabolite name : serine ; id : m4184
EX_m4263 : -2          ub : 1000.0	---		metabolites : 	 id : m4263 --- metabolite name : glucose ; id : m4263
EX_m4299 : -2          ub : 1000.0	---		metabolites : 	 id : m4299 --- metabolite name : methionine ; id : m4299
EX_m4309 : -2          ub : 1000.0	---		metabolites : 	 id : m4309 --- metabolite name : Pi ; id : m4309
EX_m4323 : -2          ub : 1000.0	---		metabolites : 	 id : m4323 --- metabolite name : threonine ; id : m4323
EX_m4264 : -1          ub : 1000.0	---		metabolites : 	 id : m4264 --- metabolite name : glutamate ; id :

### Writing modified model to file and compressing it

In [19]:
import os
os.getcwd()

'/home/muller-prokob/Documents/github/RBA_cutsets/models_analysis/code'

In [20]:
cobra.io.write_sbml_model(HCC, "../../models_storage/HCC_reduced.xml")

In [22]:
efmtool_link_compression.compress_model("../../models_storage/HCC_reduced.xml", "../../aspmcs/code/compression/", "../../HCC_compressed_2/", "new")



Flipped EX_m4105
Flipped EX_m4164
Flipped EX_m4195
Flipped EX_m4223
Flipped EX_m4225
Flipped EX_m4280
Flipped EX_m4288
Flipped EX_m4292
Flipped EX_m4308
Flipped EX_m4309
Flipped EX_m4311
Flipped EX_m4327
Flipped EX_m4328
Flipped HMR_4837
Flipped HMR_8584
Flipped HMR_4128
Flipped HMR_4303
Flipped HMR_4415
Flipped HMR_4316
Flipped HMR_8498
Flipped HMR_5351
Flipped HMR_4404
Flipped HMR_4474
Flipped HMR_4501
Flipped HMR_4625
Flipped HMR_4134
Flipped HMR_4450
Flipped HMR_4804
Flipped HMR_4812
Flipped HMR_4030
Flipped HMR_4346
Flipped HMR_4485
Flipped HMR_3999
Flipped HMR_4026
Flipped HMR_4136
Flipped HMR_7879
Flipped HMR_7880
Flipped HMR_8443
Flipped HMR_8444
Flipped HMR_8488
Flipped HMR_8489
Flipped HMR_7642
Flipped HMR_9718
Flipped HMR_9486
Flipped HMR_4347
Flipped HMR_7991
Flipped HMR_7992
Flipped HMR_5259
Flipped HMR_2515
Flipped HMR_2516
Flipped HMR_2518
Flipped HMR_2520
Flipped HMR_2522
Flipped HMR_2524
Flipped HMR_3034
Flipped HMR_1448
Flipped HMR_1465
Flipped HMR_0592
Flipped HMR_06



784 internal metabolites
164 external metabolites
1414 total reactions
124 exchange reactions
1281 reversible reactions
784 internal metabolites
164 external metabolites
1414 total reactions
124 exchange reactions
1281 reversible reactions
784 internal metabolites
164 external metabolites
1414 total reactions
124 exchange reactions
1281 reversible reactions


  self.structures_check()


-- Dual network --
1414 internal metabolites
0 external metabolites
2199 total reactions
124 exchange reactions
2065 reversible reactions


In [25]:
#Writing the positive constraint file :
upregulated_reactions_list = [reaction_id for reaction_id in upregulated_reactions.keys()]
write_constraint_file(upregulated_reactions_list, "../../HCC_compressed_2/new_reactionSubsets.txt", "../../HCC_compressed_2/posconstr/")

rsub_138 is gene-associated.
rsub_138_rev is gene-associated.
rsub_146 is gene-associated.
rsub_146_rev is gene-associated.
rsub_147 is gene-associated.
rsub_147_rev is gene-associated.
rsub_163 is gene-associated.
rsub_163_rev is gene-associated.
rsub_198 is gene-associated.
rsub_198_rev is gene-associated.
rsub_210 is gene-associated.
rsub_210_rev is gene-associated.
rsub_219 is gene-associated.
rsub_219_rev is gene-associated.
rsub_266 is gene-associated.
rsub_266_rev is gene-associated.
rsub_310 is gene-associated.
rsub_310_rev is gene-associated.
rsub_311 is gene-associated.
rsub_311_rev is gene-associated.
rsub_352 is gene-associated.
rsub_352_rev is gene-associated.
rsub_460 is gene-associated.
rsub_460_rev is gene-associated.
rsub_461 is gene-associated.
rsub_461_rev is gene-associated.
rsub_463 is gene-associated.
rsub_463_rev is gene-associated.
rsub_605 is gene-associated.
rsub_605_rev is gene-associated.
rsub_632 is gene-associated.
rsub_632_rev is gene-associated.
rsub_764

In [None]:
#2661 compressed reactions