# Part 3.2.1. Reconstructing pathway for uptake of oligosaccharides from xyloglucan

In [1]:
import pandas as pd
import numpy as np
import reframed
from reframed import Metabolite, GPRAssociation,Gene,Protein, ReactionType,CBReaction,Environment,FVA
from collections import OrderedDict
import json


## Reconstructing pathway

### Metabolites
    

In [2]:
metabolites = pd.read_excel('/Users/idunmariaburgos/Documents/Work/Project/Ruminiclostridium cellulolyticum part 2/Polysaccharide degrading pathways.xlsx',
                              sheet_name="Mets. Cat. of xyloglu. oligosac", 
                              usecols="A:E").dropna(how='all')
metabolites

Unnamed: 0,Name,Identifier,Compartment,Formula,Charge
0,Four-glucosyl xyloglucan oligosaccharide XLXG,M_QLQG_e,e,C45H76O38,0.0
1,Four-glucosyl xyloglucan oligosaccharide XXLG,M_QQLG_e,e,C45H76O38,0.0
2,Four-glucosyl xyloglucan oligosaccharide XLLG,M_QLLG_e,e,C51H86O43,0.0
3,Four-glucosyl xyloglucan oligosaccharide XXXG,M_QQQG_e,e,C39H66O33,0.0
4,Four-glucosyl xyloglucan oligosaccharide XLXG,M_QLQG_c,c,C45H76O38,0.0
5,Four-glucosyl xyloglucan oligosaccharide XXLG,M_QQLG_c,c,C45H76O38,0.0
6,Four-glucosyl xyloglucan oligosaccharide XLLG,M_QLLG_c,c,C51H86O43,0.0
7,Four-glucosyl xyloglucan oligosaccharide XXXG,M_QQQG_c,c,C39H66O33,0.0
8,Four-glucosyl xyloglucan oligosaccharide GXXG,M_GQQG_c,c,C34H58O29,0.0
9,Three-glucosyl xyloglucan oligosaccharide XXG,M_QQG_c,c,C28H48O24,0.0


In [3]:
mets = []

for index, row in metabolites.iterrows():
    # Create metabolite object
    met_id=row['Identifier']
    name=row['Name']
    compartment="C_"+row['Compartment']

    met = Metabolite(met_id=met_id, name=name,compartment=compartment)
    
    # Add metadata  
    formula = row['Formula']
    charge = row['Charge']
    
    met.metadata=OrderedDict({'FORMULA':formula,
                             'CHARGE':str(charge)})                   
    mets.append(met)

In [4]:
mets

[Four-glucosyl xyloglucan oligosaccharide XLXG,
 Four-glucosyl xyloglucan oligosaccharide XXLG,
 Four-glucosyl xyloglucan oligosaccharide XLLG,
 Four-glucosyl xyloglucan oligosaccharide XXXG,
 Four-glucosyl xyloglucan oligosaccharide XLXG,
 Four-glucosyl xyloglucan oligosaccharide XXLG,
 Four-glucosyl xyloglucan oligosaccharide XLLG,
 Four-glucosyl xyloglucan oligosaccharide XXXG,
 Four-glucosyl xyloglucan oligosaccharide GXXG,
 Three-glucosyl xyloglucan oligosaccharide XXG,
 Three-glucosyl xyloglucan oligosaccharide GXG,
 Two-glucosyl xyloglucan oligosaccharide XG,
 Four-glucosyl xyloglucan oligosaccharide GXXG]

### Reactions

In [5]:
reactions = pd.read_excel('/Users/idunmariaburgos/Documents/Work/Project/Ruminiclostridium cellulolyticum part 2/Polysaccharide degrading pathways.xlsx',
                              sheet_name="Rxns. Cat. of xyloglu. oligosac", 
                              usecols="A:F")

In [6]:
reactions

Unnamed: 0,Enzyme,Identifier,Gene,Stoichiometry,Transport,Cellulosome
0,Xyloglucan oligosaccharide transport QLQG,R_QLQGabc,Ccel_2456 and Ccel_2457 and Ccel_2458,"{""M_QLQG_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""M...",1,No
1,Xyloglucan oligosaccharide transport QQLG,R_QQLGabc,Ccel_2456 and Ccel_2457 and Ccel_2458,"{""M_QQLG_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""M...",1,No
2,Xyloglucan oligosaccharide transport QLLG,R_QLLGabc,Ccel_2456 and Ccel_2457 and Ccel_2458,"{""M_QLLG_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""M...",1,No
3,Xyloglucan oligosaccharide transport QQQG,R_QQQGabc,Ccel_2456 and Ccel_2457 and Ccel_2458,"{""M_QQQG_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""M...",1,No
4,Xyloglucan oligosaccharide transport QQQG,R_GQQGabc,Ccel_2456 and Ccel_2457 and Ccel_2458,"{""M_GQQG_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""M...",1,No
5,beta-galactosidase QLQG,R_GALqlqg,Ccel_2451,"{""M_QLQG_c"":-1,""M_h2o_c"":-1,""M_QQQG_c"":1,""M_ga...",0,No
6,beta-galactosidase QQLG,R_GALqqlg,Ccel_2451,"{""M_QQLG_c"":-1,""M_h2o_c"":-1,""M_QQQG_c"":1,""M_ga...",0,No
7,beta-galactosidase QLLG,R_GALqllg,Ccel_2451,"{""M_QLLG_c"":-1,""M_h2o_c"":-2,""M_QQQG_c"":1,""M_ga...",0,No
8,alpha-xylosidase QQQG,R_XYLqqqg,Ccel_2455,"{""M_QQQG_c"":-1,""M_h2o_c"":-1,""M_GQQG_c"":1,""M_xy...",0,No
9,beta-glucosidase GQQG,R_GLUgqqg,Ccel_2454,"{""M_GQQG_c"":-1,""M_h2o_c"":-1,""M_QQG_c"":1,""M_glc...",0,No


**From gene string find GPR**

- process gene string and find all genes
    - For all genes: find protein ID. 
        - For each gene: Create Gene(gene_id=protein_id, name=None?)
 - Create Protein()
     - protein.genes= list of genes
 - Create GPRAssociation()
     - gpr.proteins = list of proteins

In [7]:
%store -r gene_protein_map 

In [8]:
gene_protein_map.head(3)

Unnamed: 0,Entry,Entry name,Protein names,Gene names,Cross-reference (RefSeq)
0,B8I4G1,LEUD_RUMCH,3-isopropylmalate dehydratase small subunit (E...,leuD Ccel_0127,G_WP_012634581_1
1,B8I8F2,UVRC_RUMCH,UvrABC system protein C (Protein UvrC) (Excinu...,uvrC Ccel_0807,G_WP_015924347_1
2,B8I567,UPP_RUMCH,Uracil phosphoribosyltransferase (EC 2.4.2.9) ...,upp Ccel_0260,G_WP_012634712_1


In [9]:
def gene_str_to_GPR(gene_string, gene_protein_map):
    # This is meant to be used when there is only one protein complex in the string (in other word it can only handle 'and' associations and not 'or')
    
    genes_unfiltered = gene_string.split(' ')
    
    genes = []
    
    # Find the gene id (actually protein id, but in this case considered as gene id). If there is no ID, keep the old one. 
    for string in genes_unfiltered:
        if string!='and':
            gene = gene_protein_map.loc[gene_protein_map['Gene names'].str.contains(string)]['Cross-reference (RefSeq)'].values[0]
            
            # If there is a matching protein Id, add this to the gene list. 
            if type(gene)!=float:
                genes.append("G_" + gene)
                
            # If there is NOT a matching protein Id, add gene ID. 
            else:
                genes.append(string)
    
    # If there are genes for the reaction
    if len(genes)!=0:
        
        # Create protein object
        protein=Protein()
        protein.genes=genes

        # Create GPR object
        gpr=GPRAssociation()
        gpr.proteins=[protein]
    
        return gpr
    else:
        #If there are no genes associated with the reaction pass None 
        pass

**Create reaction objects**

In [10]:
rxns=[]
gprs={}

for index, row in reactions.iterrows():
    
    reaction_id = row['Identifier']
    name = row['Enzyme']
    reversible = False
    stoichiometry = json.loads(row['Stoichiometry'])
    reaction_type = ReactionType.ENZYMATIC
    
    if row['Transport']==1:
        reaction_type=ReactionType.TRANSPORT 
    
    rxn = CBReaction(reaction_id=reaction_id, name=name, reversible=reversible, stoichiometry=stoichiometry, reaction_type=reaction_type)
    rxns.append(rxn)
    
    gpr=gene_str_to_GPR(row['Gene'],gene_protein_map)
    if gpr is not None:
        gprs[reaction_id] = gpr



In [11]:
rxns

[R_QLQGabc: M_QLQG_e + 2 M_atp_c + M_h2o_c --> M_QLQG_c + 2 M_adp_c + M_h_c + 2 M_pi_c,
 R_QQLGabc: M_QQLG_e + 2 M_atp_c + M_h2o_c --> M_QQLG_c + 2 M_adp_c + M_h_c + 2 M_pi_c,
 R_QLLGabc: M_QLLG_e + 2 M_atp_c + M_h2o_c --> M_QLLG_c + 2 M_adp_c + M_h_c + 2 M_pi_c,
 R_QQQGabc: M_QQQG_e + 2 M_atp_c + M_h2o_c --> M_QQQG_c + 2 M_adp_c + M_h_c + 2 M_pi_c,
 R_GQQGabc: M_GQQG_e + 2 M_atp_c + M_h2o_c --> M_GQQG_c + 2 M_adp_c + M_h_c + 2 M_pi_c,
 R_GALqlqg: M_QLQG_c + M_h2o_c --> M_QQQG_c + M_gal_c,
 R_GALqqlg: M_QQLG_c + M_h2o_c --> M_QQQG_c + M_gal_c,
 R_GALqllg: M_QLLG_c + 2 M_h2o_c --> M_QQQG_c + 2 M_gal_c,
 R_XYLqqqg: M_QQQG_c + M_h2o_c --> M_GQQG_c + M_xyl__D_c,
 R_GLUgqqg: M_GQQG_c + M_h2o_c --> M_QQG_c + M_glc__D_c,
 R_XYLqqg: M_QQG_c + M_h2o_c --> M_GQG_c + M_xyl__D_c,
 R_GLUgqg: M_GQG_c + M_h2o_c --> M_QG_c + M_glc__D_c,
 R_XYLqg: M_QG_c + M_h2o_c --> M_cellb_c + M_xyl__D_c]

### Add new metabolites and reactions to model

In [12]:
model = reframed.load_cbmodel('model_c_H10_part3_1_1.xml')

In [13]:
model.summary()

Metabolites:
C_c 855
C_e 217
C_p 184

Reactions:
enzymatic 881
transport 428
exchange 213
sink 0
other 244


In [14]:
len(model.genes)

732

In [15]:
for met in mets:
    model.add_metabolite(met)

In [16]:
for rxn in rxns:
    model.add_reaction(rxn)
    model.set_gpr_association(rxn.id,gprs[rxn.id])

In [17]:
model.summary()

Metabolites:
C_c 863
C_e 222
C_p 184

Reactions:
enzymatic 889
transport 433
exchange 213
sink 0
other 244


In [18]:
len(model.genes)

738

### Add exchange reactions for oligosaccharides

In [19]:
mets_exchange = [met.id for met in mets if met.compartment=="C_e"]
rxns_exchange = []
for met in mets_exchange:
    rxn_id = "R_EX_" + met[2:]
    name = "Exchange of " + model.metabolites[met].name
    reversible=True
    stoichiometry =OrderedDict([(met, -1.0)])
    reaction_type = ReactionType.EXCHANGE
    rxns_exchange.append(CBReaction(reaction_id=rxn_id, name=name, reversible=reversible, stoichiometry=stoichiometry, reaction_type=reaction_type))

In [20]:
rxns_exchange

[R_EX_QLQG_e: M_QLQG_e <-> ,
 R_EX_QQLG_e: M_QQLG_e <-> ,
 R_EX_QLLG_e: M_QLLG_e <-> ,
 R_EX_QQQG_e: M_QQQG_e <-> ,
 R_EX_GQQG_e: M_GQQG_e <-> ]

In [21]:
for rxn in rxns_exchange:
    model.add_reaction(rxn)

In [22]:
model.summary()

Metabolites:
C_c 863
C_e 222
C_p 184

Reactions:
enzymatic 889
transport 433
exchange 218
sink 0
other 244


## Verifying that new reactions can carry flux with FVA

**Creating an environment from all exchange reactions in the model.**

In [23]:
env = Environment.complete(model, max_uptake=10)

In [24]:
all_rxns= rxns_exchange + rxns
rxn_ids = [rxn.id for rxn in all_rxns]

**Predict flux with all exchange reactions open** 

In [25]:
sol = FVA(model,constraints=env, reactions= rxn_ids)

In [26]:
sol

{'R_EX_QLQG_e': [-10.0, 0.0],
 'R_EX_QQLG_e': [-10.0, 0.0],
 'R_EX_QLLG_e': [-10.0, 0.0],
 'R_EX_QQQG_e': [-10.0, 0.0],
 'R_EX_GQQG_e': [-10.0, 0.0],
 'R_QLQGabc': [0.0, 10.0],
 'R_QQLGabc': [0.0, 10.0],
 'R_QLLGabc': [0.0, 10.0],
 'R_QQQGabc': [0.0, 10.0],
 'R_GQQGabc': [0.0, 10.0],
 'R_GALqlqg': [0.0, 10.0],
 'R_GALqqlg': [0.0, 10.0],
 'R_GALqllg': [0.0, 10.0],
 'R_XYLqqqg': [0.0, 40.0],
 'R_GLUgqqg': [0.0, 50.0],
 'R_XYLqqg': [0.0, 50.0],
 'R_GLUgqg': [0.0, 50.0],
 'R_XYLqg': [0.0, 50.0]}

## Checking if genes included are involved in other enzymatic reactions

In [27]:
genes = [rxn.get_genes() for rxn in rxns]
genes_flat = list(set([item for sublist in genes for item in sublist]))

In [28]:
gene_reaction_dict= {}
for gene in genes_flat:
    try:
        print("Gene: " + gene + ", Reactions: " +  str(model.gene_to_reaction_lookup()[gene]))
        gene_reaction_dict[gene]=model.gene_to_reaction_lookup()[gene]
    except:
        print("Gene: " + gene + " not in model ")

Gene: Ccel_2451, Reactions: ['R_GALqlqg', 'R_GALqqlg', 'R_GALqllg']
Gene: G_G_WP_015925876_1, Reactions: ['R_QLQGabc', 'R_QQLGabc', 'R_QLLGabc', 'R_QQQGabc', 'R_GQQGabc']
Gene: G_G_WP_015925872_1, Reactions: ['R_GLUgqqg', 'R_GLUgqg']
Gene: G_G_WP_015925873_1, Reactions: ['R_XYLqqqg', 'R_XYLqqg', 'R_XYLqg']
Gene: G_G_WP_015925874_1, Reactions: ['R_QLQGabc', 'R_QQLGabc', 'R_QLLGabc', 'R_QQQGabc', 'R_GQQGabc']
Gene: G_G_WP_015925875_1, Reactions: ['R_QLQGabc', 'R_QQLGabc', 'R_QLLGabc', 'R_QQQGabc', 'R_GQQGabc']


In [29]:
def prGreen(skk): print("\033[92m {}\033[00m" .format(skk))

In [30]:
print("Green reactions are the reactions that were included in this Jupyter Notebook\n")
for key in gene_reaction_dict.keys():
    print("Gene: " + key)
    for rxn in gene_reaction_dict[key]:
        if rxn in rxn_ids:
            prGreen(" " + str(model.reactions[rxn]))
        else:
            print("  " + str(model.reactions[rxn]))

Green reactions are the reactions that were included in this Jupyter Notebook

Gene: Ccel_2451
[92m  R_GALqlqg: M_QLQG_c + M_h2o_c --> M_QQQG_c + M_gal_c[00m
[92m  R_GALqqlg: M_QQLG_c + M_h2o_c --> M_QQQG_c + M_gal_c[00m
[92m  R_GALqllg: M_QLLG_c + 2 M_h2o_c --> M_QQQG_c + 2 M_gal_c[00m
Gene: G_G_WP_015925876_1
[92m  R_QLQGabc: M_QLQG_e + 2 M_atp_c + M_h2o_c --> M_QLQG_c + 2 M_adp_c + M_h_c + 2 M_pi_c[00m
[92m  R_QQLGabc: M_QQLG_e + 2 M_atp_c + M_h2o_c --> M_QQLG_c + 2 M_adp_c + M_h_c + 2 M_pi_c[00m
[92m  R_QLLGabc: M_QLLG_e + 2 M_atp_c + M_h2o_c --> M_QLLG_c + 2 M_adp_c + M_h_c + 2 M_pi_c[00m
[92m  R_QQQGabc: M_QQQG_e + 2 M_atp_c + M_h2o_c --> M_QQQG_c + 2 M_adp_c + M_h_c + 2 M_pi_c[00m
[92m  R_GQQGabc: M_GQQG_e + 2 M_atp_c + M_h2o_c --> M_GQQG_c + 2 M_adp_c + M_h_c + 2 M_pi_c[00m
Gene: G_G_WP_015925872_1
[92m  R_GLUgqqg: M_GQQG_c + M_h2o_c --> M_QQG_c + M_glc__D_c[00m
[92m  R_GLUgqg: M_GQG_c + M_h2o_c --> M_QG_c + M_glc__D_c[00m
Gene: G_G_WP_015925873_1
[92m  R_X

## <span style="color: blue;">Summary </span>

In [31]:
model.update()

In [32]:
model.id = "model_c_H10_part3_2_1"

In [33]:
reframed.save_cbmodel(model,filename="model_c_H10_part3_2_1.xml")

In [34]:
model_new = reframed.load_cbmodel('model_c_H10_part3_2_1.xml')

In [35]:
model_prev = reframed.load_cbmodel('model_cellulolyticum_H10.xml')

In [36]:
models_dict={model.id:{} for model in [model_new,model_prev]}
models_rxn_dict={model.id:{} for model in [model_new,model_prev]}
for model in [model,model_prev]:
    models_dict[model.id]['Reactions']=len(model.reactions)
    models_dict[model.id]['Metabolites']=len(model.metabolites)
    models_dict[model.id]['Genes']=len(model.genes)
    
    models_rxn_dict[model.id]['Enzymatic']=len(model.get_reactions_by_type(reframed.ReactionType.ENZYMATIC))
    models_rxn_dict[model.id]['Exchange']=len(model.get_reactions_by_type(reframed.ReactionType.EXCHANGE))
    models_rxn_dict[model.id]['Transport']=len(model.get_reactions_by_type(reframed.ReactionType.TRANSPORT))
    models_rxn_dict[model.id]['Sink']=len(model.get_reactions_by_type(reframed.ReactionType.SINK))
    models_rxn_dict[model.id]['Other']=len(model.get_reactions_by_type(reframed.ReactionType.OTHER))
    

**Overview models**

In [37]:
pd.DataFrame(models_dict)

Unnamed: 0,model_c_H10_part3_2_1,model_cellulolyticum_H10
Reactions,1784,1811
Metabolites,1269,1250
Genes,738,733


**Overview reactions in models**

In [38]:
pd.DataFrame(models_rxn_dict)

Unnamed: 0,model_c_H10_part3_2_1,model_cellulolyticum_H10
Enzymatic,889,883
Exchange,218,210
Transport,433,475
Sink,0,0
Other,244,243


In [39]:
import cobra

In [40]:
model_cobra = cobra.io.read_sbml_model('model_c_H10_part3_2_1.xml')

In [41]:
cobra.io.save_json_model(model_cobra, "model_c_H10_part3_2_1.json")