# Reconstructing pathways cellulose degradation and updake

In [3]:
import pandas as pd
from reframed import Metabolite, GPRAssociation, Gene, Protein, ReactionType, CBReaction, Environment, FVA

## Reconstructing pathway

### Metabolites
    

In [9]:
metabolites = pd.read_excel('../input/Polysaccharide degrading pathways.xlsx', sheet_name="Mets.  Cat. of cellodextrins ", usecols="A:E").dropna()

In [10]:
mets = []

for index, row in metabolites.iterrows():
    met = Metabolite(met_id=row['Identifier'], name=row['Name'], compartment="C_"+row['Compartment'])
    met.metadata={'FORMULA':row['Formula'], 'CHARGE':str(row['Charge'])}                  
    mets.append(met)

### Reactions

In [11]:
reactions = pd.read_excel('../input/Polysaccharide degrading pathways.xlsx', sheet_name="Rxns.  Cat. of cellodextrins ", usecols="A:F")

**From gene string find GPR**

- process gene string and find all genes
    - For all genes: find protein ID. 
        - For each gene: Create Gene(gene_id=protein_id, name=None?)
 - Create Protein()
     - protein.genes= list of genes
 - Create GPRAssociation()
     - gpr.proteins = list of proteins

In [12]:
reactions

Unnamed: 0,Enzyme,Identifier,Gene,Stoichiometry,Transport,Type
0,ABC transporter cellobiose,R_Cellbabc,Ccel_2112 and Ccel_2111 and Ccel_2110,"{""M_cellb_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""...",1,ABC-transporter
1,ABC transporter cellodextrin G3,R_cell3abc,Ccel_2112 and Ccel_2111 and Ccel_2110,"{""M_cell3_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""...",1,ABC-transporter
2,ABC transporter cellodextrin G4,R_cell4abc,Ccel_2112 and Ccel_2111 and Ccel_2110,"{""M_cell4_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""...",1,ABC-transporter
3,ABC transporter cellodextrin G5,R_cell5abc,Ccel_2112 and Ccel_2111 and Ccel_2110,"{""M_cell5_e"":-1, ""M_atp_c"":-2, ""M_h2o_c"":-1, ""...",1,ABC-transporter
4,"cellodextrin phosphorylase CdpA G4, CdpB",R_CEPA3,Ccel_1439 or Ccel_2354,"{""M_cell4_c"": -1,""M_pi_c"": -1, ""M_cell3_c"": 1,...",0,Glycosyl hydrolases Family 94
5,cellodextrin phosphorylase CdpA G5,R_CEPA4,Ccel_1439,"{""M_cell5_c"": -1,""M_pi_c"": -1, ""M_cell4_c"": 1,...",0,Glycosyl hydrolases Family 94
6,cellodextrin phisphorylase CdpC,R_CEPA2,Ccel_3412,"{""M_cell3_c"": -1,""M_pi_c"": -1, ""M_cellb_c"": 1,...",0,Glycosyl hydrolases Family 94


In [None]:
gene_protein_map.head(3)

In [None]:
def gene_str_to_GPR(gene_string, gene_protein_map):
    # This is meant to be used when there is only one protein complex in the string (in other word it can only handle 'and' associations and not 'or')

    genes_unfiltered = gene_string.split(' ')
    gpr=GPRAssociation()
    proteins=[]
    genes = []

    # Find the gene id (actually protein id, but in this case considered as gene id). If there is no ID, keep the old one. 
    i =0
    while i<len(genes_unfiltered):
        
        # If the substring is a gene id
        if genes_unfiltered[i]!='and' and genes_unfiltered[i]!='or':
            gene = gene_protein_map.loc[gene_protein_map['Gene names'].str.contains(genes_unfiltered[i])]['Cross-reference (RefSeq)'].values[0]

            # If there is a matching protein Id, add this to the gene list. 
            if type(gene)!=float:
                genes.append(gene)

            # If there is NOT a matching protein Id, add gene ID. 
            else:
                genes.append("G_" + genes_unfiltered[i])
                
        # If it's time to make a new protein        
        if genes_unfiltered[i]=="or" or i==len(genes_unfiltered)-1:
            # Create protein object
            protein=Protein()
            protein.genes=genes

            # Add protein to list of proteins
            proteins.append(protein) 
            genes=[]
        i=i+1

    gpr.proteins=proteins
                
    return gpr

 

**Create reaction objects**

In [None]:
rxns=[]
gprs={}

for index, row in reactions.iterrows():
    
    reaction_id = row['Identifier']
    name = row['Enzyme']
    reversible = False
    stoichiometry = json.loads(row['Stoichiometry'])
    reaction_type = ReactionType.ENZYMATIC
    
    if row['Transport']==1:
        reaction_type=ReactionType.TRANSPORT 
    
    rxn = CBReaction(reaction_id=reaction_id, name=name, reversible=reversible, stoichiometry=stoichiometry, reaction_type=reaction_type)
    rxns.append(rxn)
    
    gprs[reaction_id] = gene_str_to_GPR(row['Gene'], gene_protein_map)


In [None]:
gprs

### Add new metabolites and reactions to model

In [None]:
model.summary()

In [None]:
len(model.genes)

In [None]:
for met in mets:
    model.add_metabolite(met)

In [None]:
for rxn in rxns:
    model.add_reaction(rxn)
    model.set_gpr_association(rxn.id,gprs[rxn.id])

In [None]:
model.summary()

In [None]:
len(model.genes)

### Add exchange reactions for cellodextrins

In [None]:
mets_exchange = [met.id for met in mets if met.compartment=="C_e"]
rxns_exchange = []
for met in mets_exchange:
    rxn_id = "R_EX_" + met[2:]
    name = "Exchange of " + model.metabolites[met].name
    reversible=True
    stoichiometry =OrderedDict([(met, -1.0)])
    reaction_type = ReactionType.EXCHANGE
    rxns_exchange.append(CBReaction(reaction_id=rxn_id, name=name, reversible=reversible, stoichiometry=stoichiometry, reaction_type=reaction_type))

In [None]:
rxns_exchange

In [None]:
for rxn in rxns_exchange:
    model.add_reaction(rxn)

In [None]:
model.summary()

## Verifying that new reactions can carry flux with FVA

**Creating an environment from all exchange reactions in the model.**

In [None]:
env = Environment.complete(model, max_uptake=10)

In [None]:
all_rxns= rxns_exchange + rxns
rxn_ids = [rxn.id for rxn in all_rxns]

**Predict flux with all exchange reactions open** 

In [None]:
sol = FVA(model,constraints=env, reactions= rxn_ids)

In [None]:
sol

## Checking if genes included are involved in other enzymatic reactions

In [None]:
genes = [rxn.get_genes() for rxn in rxns]
    

In [None]:
genes_flat = list(set([item for sublist in genes for item in sublist]))

In [None]:
genes_flat

In [None]:
gene_reaction_dict= {}
for gene in genes_flat:
    try:
        print("Gene: " + gene + ", Reactions: " +  str(model.gene_to_reaction_lookup()[gene]))
        gene_reaction_dict[gene]=model.gene_to_reaction_lookup()[gene]
    except:
        print("Gene: " + gene + " not in model ")

In [None]:
def prGreen(skk): print("\033[92m {}\033[00m" .format(skk))

In [None]:
print("Green reactions are the reactions that were included in this Jupyter Notebook\n")
for key in gene_reaction_dict.keys():
    print("Gene: " + key)
    for rxn in gene_reaction_dict[key]:
        if rxn in rxn_ids:
            prGreen(" " + str(model.reactions[rxn]))
        else:
            print("  " + str(model.reactions[rxn]))

In [None]:
model.reactions.R_GLCabc.gpr

In [None]:
model.reactions.R_ARBabc.gpr

## <span style="color: blue;">Summary </span>

In [None]:
model.update()

In [None]:
model.id = "model_c_H10_part3_1_1"

In [None]:
reframed.save_cbmodel(model,filename="model_c_H10_part3_1_1.xml")

In [None]:
model_new = reframed.load_cbmodel('model_c_H10_part3_1_1.xml')

In [None]:
model_prev = reframed.load_cbmodel('model_cellulolyticum_H10.xml')

In [None]:
models_dict={model.id:{} for model in [model_new,model_prev]}
models_rxn_dict={model.id:{} for model in [model_new,model_prev]}
for model in [model,model_prev]:
    models_dict[model.id]['Reactions']=len(model.reactions)
    models_dict[model.id]['Metabolites']=len(model.metabolites)
    models_dict[model.id]['Genes']=len(model.genes)
    
    models_rxn_dict[model.id]['Enzymatic']=len(model.get_reactions_by_type(reframed.ReactionType.ENZYMATIC))
    models_rxn_dict[model.id]['Exchange']=len(model.get_reactions_by_type(reframed.ReactionType.EXCHANGE))
    models_rxn_dict[model.id]['Transport']=len(model.get_reactions_by_type(reframed.ReactionType.TRANSPORT))
    models_rxn_dict[model.id]['Sink']=len(model.get_reactions_by_type(reframed.ReactionType.SINK))
    models_rxn_dict[model.id]['Other']=len(model.get_reactions_by_type(reframed.ReactionType.OTHER))
    

**Overview models**

In [None]:
pd.DataFrame(models_dict)

**Overview reactions in models**

In [None]:
pd.DataFrame(models_rxn_dict)