# Final CHO Model
This notebook is to asses the validity of our reconstruction and how complete it is.

In [2]:
# Import libraries
import gspread
import pandas as pd
import numpy as np
import cobra
from cobra import Model, Reaction, Metabolite
from cobra.io import validate_sbml_model, write_sbml_model
from tqdm.notebook import tqdm


### 1. Retrieve information from the Google Sheet datasets reactions and metabolites

In [3]:
# give service account details to gspread
sa = gspread.service_account(filename='credentials.json')

# sa is a gspread client, which can be used for connecting to the sheets
# by using the open method and the sheet name.
cho_recon = sa.open('CHO Network Reconstruction')

# we also need to specify the page name before getting the data. In this case we use the Rxns and Metabolites sheet.
rxns_sheet = cho_recon.worksheet('Rxns')
rxns_attributes_sheet = cho_recon.worksheet('Attributes')
metabolites_sheet = cho_recon.worksheet('Metabolites')

# We can extract the data using the get_all_records method and create pd DataFrames

# Reactions IDs, names, formulas, GPRs
rxns = pd.DataFrame(rxns_sheet.get_all_records())

# Reactions bounds
rxns_attributes = pd.DataFrame(rxns_attributes_sheet.get_all_records())

# Metabolites names, formulas and compartment
metabolites = pd.DataFrame(metabolites_sheet.get_all_records())

### 2. Build a model and feed it the information from the df generated

In [4]:
##### ----- Create a model and add reactions ----- #####
model = Model("iCHO")
lr = []
for _, row in rxns.iterrows():
    r = Reaction(row['Reaction'])
    lr.append(r)    
model.add_reactions(lr)

In [None]:
##### ----- Add information to each one of the reactions ----- #####
for i,r in enumerate(tqdm(model.reactions)):
    r.build_reaction_from_string(rxns['Reaction Formula'][i])
    r.name = rxns['Reaction Name'][i]
    r.subsystem = rxns['Subsystem'][i]
    r.gene_reaction_rule = str(rxns['GPR_final'][i])
    r.lower_bound = float(rxns_attributes['Lower bound'][i])
    r.upper_bound = float(rxns_attributes['Upper bound'][i])    

In [6]:
##### ----- Add information for each metabolite ----- #####
metabolites_dict = metabolites.set_index('BiGG ID').to_dict('dict')
metabolites_dict['Name']
for met in model.metabolites:
    met.name = metabolites_dict['Name'][f'{met}']
    met.formula = metabolites_dict['Formula'][f'{met}']
    met.compartment = metabolites_dict['Compartment'][f'{met}'].split(' - ')[0]    
    

In [7]:
##### ----- Build the S matrix ----- #####
S = cobra.util.create_stoichiometric_matrix(model, array_type='dense')
model.S = S

In [9]:
##### ----- Save the model ----- #####
write_sbml_model(model, "iCHOv2.xml")

In [None]:
##### ----- Test for errors in the recostruction ----- ######
from cobra.io import read_sbml_model, validate_sbml_model
(model, errors) = validate_sbml_model("iCHOv2.xml")
errors

### Blocked reactions and Dead-Ends

In [25]:
from cobra.io import read_sbml_model
modedl = read_sbml_model("iCHOv2.xml")

##### ----- FVA ----- #####
import importlib
# from utils import runMinMax_GF
import utils
importlib.reload(utils)
model.solver = 'gurobi'


for rxn in model.reactions:
    rxn.bounds = -1000, 1000
minmax = utils.runMinMax_Single(model, end_rxn_index=None)

500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000


In [30]:
import importlib
import utils
importlib.reload(utils)
dead_ends = utils.detect_dead_ends(S)

# Detect dead-end metabolites
is_dead_end = utils.detect_dead_ends(S)

# Get the indices of the dead-end metabolites
dead_end_indices = np.nonzero(is_dead_end)[0]

# Get the names of the dead-end metabolites
dead_end_metabolites = [model.metabolites[i].id for i in dead_end_indices]
print(f'Number of dead end metabolites: {len(dead_end_metabolites)}')

# Print the names of the dead-end metabolites
with open('Dead_ends.txt', 'w') as f:
    for dead_met in dead_end_metabolites:
        print(dead_met, file=f)

Number of dead end metabolites: 2110


In [31]:
##### Print Reactions with the min and max fluxes #####
with open('FVA_Results.txt', 'w') as f:
    for i, j in enumerate(minmax):
        print(model.reactions[i].id, "Min: ",j[0], "Max: ", j[1], file=f)
        
##### Print Blocked Reactions #####
with open('Blocked_Reactions.txt', 'w') as f:
    for i, j in enumerate(minmax):
        if j[0] == 0 and j[1] == 0:
            print(model.reactions[i].id, "Min: ",j[0], "Max: ", j[1], file=f)

In [32]:
# Check Mass Balance
for rxn in model.reactions:
    rxn.check_mass_balance()

In [None]:
# Initiliase model
for rxn_exchange in model.exchanges:
    rxn_exchange.bounds = (-1000, 1000)
    
bio_id = model.reactions.index('biomass_producing')
atp_id = model.reactions.index('DM_atp_c')

model.reactions[bio_id].bounds = (0, 100)
model.reactions.get_by_id('biomass_producing').bounds = (0.0001, 20)

# model.objective = 'biomass_producing'
model.objective = 'SMS'
# model.summary()
solution = model.optimize()
model.metabolites.sphmyln_cho_c.summary()


In [149]:
with open('Biomass_Metabolites_Reactions.txt', 'w') as f:
    bio_mets = model.reactions.get_by_id('biomass_producing').metabolites
    for bio_met in bio_mets:
        f.write("Metabolite: " + bio_met.name + "\n")
        f.write("Reactions: " + ", ".join([r.name for r in bio_met.reactions]) + "\n")
        f.write("Formula: " + ", ".join([r.build_reaction_string() for r in bio_met.reactions]) + "\n\n")

# Detect all the excahnge reactions that have no uptake
from cobra.flux_analysis import flux_variability_analysis
FVA_Result = flux_variability_analysis(model, model.exchanges)

FVA_Result = a[a.minimum == 0]
with open('FVA_Excange.txt', 'w') as f:
    f.write("reaction\tminimum\tmaximum\n")
    for index, row in FVA_Result.iterrows():
        f.write(f"{index}\t{row['minimum']}\t{row['maximum']}\n")

In [81]:
import pandas as pd
import re

block_rxns_biomass = pd.DataFrame(columns=['name', 'min', 'max'])

bio_mets = model.reactions.get_by_id('biomass_producing').metabolites
c =0
for bio_met in bio_mets:
    if bio_met.id != 'atp_c':
        for r_bio in bio_met.reactions:
            search = r_bio.id
            with open('Blocked_Reactions.txt', 'r') as file:
                for line in file:
                    if search in line:
                        match = re.match(r'(\w+) Min:\s*([\d.]+) Max:\s*([\d.]+)', line)
                        if match:
                            name = match.group(1)
                            min_val = float(match.group(2))
                            max_val = float(match.group(3))
                            block_rxns_biomass = pd.concat([block_rxns_biomass, pd.DataFrame({'name': [name], 'min': [min_val], 'max': [max_val]})], ignore_index=True)
                        break
                
block_rxns_biomass = block_rxns_biomass.drop_duplicates(subset=['name'])

for i in block_rxns_biomass['name']:
    reaction = model.reactions.get_by_id(i)
    print(reaction)

In [None]:
for reaction in model.reactions:
    with model as model:
        reaction.knock_out()
        model.optimize()
        print('%s blocked (bounds: %s), new growth rate %f' %
              (reaction.id, str(reaction.bounds), model.objective.value))

In [34]:
import tempfile
from pprint import pprint
from cobra.io import write_sbml_model, validate_sbml_model
with tempfile.NamedTemporaryFile(suffix='.xml') as f_sbml:
    write_sbml_model(model, filename=f_sbml.name)
    report = validate_sbml_model(filename=f_sbml.name)

pprint(report)

(None,
 {'COBRA_CHECK': [],
  'COBRA_ERROR': ["Required attribute 'id' cannot be found or parsed in "
                  "'<Compartment>'."],
  'COBRA_FATAL': [],
  'SBML_ERROR': ['E0 (Error): SBML component consistency (core, L22); Invalid '
                 'attribute found on Compartment object; A Compartment object '
                 "must have the required attributes 'id' and 'constant', and "
                 "may have the optional attributes 'metaid', 'sboTerm', "
                 "'name', 'spatialDimensions', 'size' and 'units'. No other "
                 'attributes from the SBML Level 3 Core namespace are '
                 'permitted on a Compartment object.\n'
                 'Reference: L3V1 Section 4.5\n'
                 " The required attribute 'id' is missing.\n",
                 'E1 (Error): SBML component consistency (fbc, L682); Chemical '
                 'formula must be string; The value of attribute '
                 "'fbc:chemicalFormula' on the SBML <specie