# Build Excel stoich for GAMS (originally A01_build_excel_stoich_for_GAMS.ipynb)

In [1]:
import pandas as pd
import cobra
from collections import OrderedDict
from copy import deepcopy

from gsm_custom_functions import *

#### Load data

In [2]:
# Metabolic model (COBRApy json)
# If you have to use multiple GSM models, 
#   it's best to keep 1 as the main copy and then use transaction files to apply differences to the others. Helps improve consistency in annotations, stoich. etc.
chosen_model = 'Batch-Rabinowitz' # c: carbon, n: nitrogen
# If you have to use multiple GSM models, use this list to store key parameters not found in the model
modelVersions = {
    'Batch-Rabinowitz':{
        'max_allowed_mito_proteome_allo_fraction':0.13953,
        'nonmodeled_proteome_allocation':0.49,
        'dummy_sheet':'batch-Rabinowitz'
    },
    'C':{
        'max_allowed_mito_proteome_allo_fraction':0.13464,
        'nonmodeled_proteome_allocation':0.54,
        'dummy_sheet':'chemo-clim-Rabinowitz'
    },
    'N':{
        'max_allowed_mito_proteome_allo_fraction':0.13033,
        'nonmodeled_proteome_allocation':0.53,
        'dummy_sheet':'chemo-nlim-Rabinowitz'
    }
    }
modelname = 'iRhto'+chosen_model
model = cobra.io.load_json_model('./input/'+modelname+'.json')
# if not choosing between nutrient-limited models, uncomment the following lines:
# model = cobra.io.load_json_model('./input/iRhtoC.json')
# chosen_model = ''

# Update GAMS settings file to reflect model version, 
#   and add NGAM, mito capacity, etc.
#   If not using multiple models, replace everything in the curly braces with the values you want
with open('../pycore/binary_search/model-version-settings.txt', 'w') as f:
    f.write(f"* file with settings updated by build_model scripts, to help with managing multiple GSM models\n"
        f"$setGlobal max_allowed_mito_proteome_allo_fraction {modelVersions[chosen_model]['max_allowed_mito_proteome_allo_fraction']}\n"
        f"$setGlobal nonmodeled_proteome_allocation {modelVersions[chosen_model]['nonmodeled_proteome_allocation']}\n"
        f"v.fx('RXN-ATPM_c_FWD-SPONT') = {model.reactions.get_by_id('ATPM_c').lower_bound} * %nscale%;\n")

# Protein
df_pro = pd.read_excel('./input/PROTEIN_stoich_curation.xlsx')
df_pro.index = df_pro.id.to_list()
df_aamap = pd.read_csv('./input/PROTEIN_amino_acid_map.txt', sep='\t')
df_aamap.index = df_aamap.aa_abbv.to_list()

# Dummy protein
dummy_path = './input/PROTEIN_dummy_prot_calc.xlsx'
# df_aa_dummy = pd.read_excel(dummy_path)
# df_aa_dummy2 = pd.read_excel(dummy_path,sheet_name='unidentified')
# 1st entry is default dummy protein; remove 2nd entry unless needed
dummy_list = [{"df":pd.read_excel(dummy_path,sheet_name=modelVersions[chosen_model]['dummy_sheet']),"name":"PROSYN-PROTDUMMY"},
              {"df":pd.read_excel(dummy_path,sheet_name='unidentified'),"name":"PROSYN-PROTDUMMYUNIDENTIFIED"}]
for prot in dummy_list:
    prot["df"].index = prot["df"].aa_abbv.to_list()
    prot["medianL"] = int(round(prot["df"].loc['A', 'Unnamed: 5'], 0))
    # Add 1e-5 g/mmol to prevent number round lost of protein MW
    #dummy_MW
    prot["MW"] = round(prot["df"].loc['C', 'Unnamed: 5'], 5) + 1e-5
    prot["biosyn"] = prot["name"].replace('PROSYN-','BIOSYN-')
    prot["met"] = prot["name"].lower().replace('prosyn-','BIO-')

# Enzyme
df_enz = pd.read_excel('./input/ENZYME_stoich_curation.xlsx')

# RNA
df_rnas = pd.read_excel('./input/RNA_stoich.xlsx')
df_rnas.index = df_rnas.RNAid.to_list()

# Ribosome
df_ribo_nuc = pd.read_excel('./input/RIBOSOME_nucleus.xlsx')
df_ribo_mit = pd.read_excel('./input/RIBOSOME_mitochondria.xlsx')
ribo_dict = {'_c':df_ribo_nuc, '_m':df_ribo_mit}

# Biomass
# heading start at cell X4
df_biom = pd.read_excel('./input/BIOMASS_RBA.xlsx', sheet_name=modelVersions[chosen_model]['dummy_sheet'], header=4)
# list of biomass rxns to turn off by default and only turn on when needed
whole_biomass_rxns = ['BIOSYN-' + i for i in ['BIODILAERO', 'BIODILAERO-NOGAM']]
# make into file RBA_rxns_whole_biomass.txt
with open('./model/RBA_rxns_whole_biomass.txt', 'w') as f:
    f.write('\n'.join(['/'] + whole_biomass_rxns + ['/']))

# checks which rxns to ignore when seeing if all necessary enzymes are modeled.
rxns_not_needing_enzymes = [rxn.id for rxn in find_biomass_reactions(model)]
# take IDs of all rxns
print('Reactions to ignore when checking if enzymes must be added:')
print(rxns_not_needing_enzymes)

Reactions to ignore when checking if enzymes must be added:
['BIOMASS']


In [3]:
# Make version of model with no generic cofactors, for publication and for MEMOTE testing
model_no_generics = deepcopy(model)
# remove rxns with names beginning with "GENERIC_"
for rxn in model.reactions:
    if rxn.id.startswith("GENERIC_"):
        # print(rxn.id)
        model_no_generics.reactions.get_by_id(rxn.id).remove_from_model()
report_mass_balance(model_no_generics)
# remove metabolites with names beginning with "pseudometabolite "
for met in model.metabolites:
    if met.name.startswith("pseudometabolite "):
        # print(met.name)
        model_no_generics.metabolites.get_by_id(met.id).remove_from_model()
# write JSON version of the model w/o these additions (for future use)
cobra.io.save_json_model(model_no_generics, "./input/"+modelname+"-no-generic-cofactors.json")
# convert to xml file
cobra.io.write_sbml_model(model_no_generics, "./input/"+modelname+"-no-generic-cofactors.xml")

TORULNOXi_c
o2_c + toruln_c --> 2.0 h_c + torularho_c
h_c	H1	1	C00080	H+	biocyc
o2_c	O2	0	C00007	oxygen	biocyc
torularho_c	C40H52O2	0	      	Torularhodin	manual
toruln_c	C40H54	0	C08613	Torulene	seed.compound
H:0, O:0, C:0, charge:2

compCER_m
0.25 cer1_24_m + 0.25 cer1_26_m + 0.25 cer2A_24_m + 0.25 cer2A_26_m <=> cer_m
cer1_24_m	C42H85N1O3	0	      	ceramide-1 (C24)	biocyc
cer1_26_m	C44H89N1O3	0	      	ceramide-1 (C26)	biocyc
cer2A_24_m	C42H85N1O4	0	      	ceramide-2 (C24)	biocyc
cer2A_26_m	C44H89N1O4	0	      	ceramide-2 (C26)	biocyc
cer_m	Cer	0	C00195	Ceramide	manual
C:-43, H:-87, N:-1, O:-3.5, Cer:1, charge:0

compCER_r
0.25 cer1_24_r + 0.25 cer1_26_r + 0.25 cer2A_24_r + 0.25 cer2A_26_r <=> cer_r
cer1_24_r	C42H85N1O3	0	      	ceramide-1 (C24)	biocyc
cer1_26_r	C44H89N1O3	0	      	ceramide-1 (C26)	biocyc
cer2A_24_r	C42H85N1O4	0	      	ceramide-2 (C24)	biocyc
cer2A_26_r	C44H89N1O4	0	      	ceramide-2 (C26)	biocyc
cer_r	Cer	0	C00195	Ceramide	manual
C:-43, H:-87, N:-1, O:-3.5, Cer:1, char

In [4]:
### Protein expressed in mitochondria - change depending on your organism!
# infer which proteins are mitochondrial based on protein_stoich annotations
prot_mitoexp = []
for i in df_pro.index:
    if df_pro.loc[i, 'translation_loc'] == 'm':
        prot_mitoexp.append(i)
print(prot_mitoexp)

['rtmATP9', 'rtmCOX1', 'rtmCOX2', 'rtmCOX3', 'rtmCOB', 'rtmATP6', 'rtmATP8']


#### Assemble reactions

In [5]:
df_eqn = pd.DataFrame(columns=['id', 'type', 'coupling_type', 'coupling_species', 'reaction','FBA_name','dir'])
c = -1
medium = []

### Metabolic network reaction
# Exchange reactions
for rxn in model.reactions:
    if rxn.id[:3] == 'EX_':
        met = [i for i in rxn.metabolites.keys()][0]
        
        c += 1
        new_id = 'RXN-' + rxn.id + '_FWD-SPONT'
        df_eqn.loc[c, 'id'] = new_id
        df_eqn.loc[c, 'type'] = 'metabolic'
        df_eqn.loc[c, 'reaction'] = 'MET-' + met.id + ' -->'
        df_eqn.loc[c, ['FBA_name','dir']] = [rxn.id,'FWD']
        
        c += 1
        new_id = 'RXN-' + rxn.id + '_REV-SPONT'
        df_eqn.loc[c, 'id'] = new_id
        df_eqn.loc[c, 'type'] = 'metabolic'
        df_eqn.loc[c, 'reaction'] = '-->' + 'MET-' + met.id
        df_eqn.loc[c, ['FBA_name','dir']] = [rxn.id,'REV']
        # if lower_bound is negative, then add it to medium
        if rxn.lower_bound < 0:
            medium.append('v.up(\'' + new_id + '\') = ' + str(-rxn.lower_bound) + ' * %nscale%;')

# define the medium
with open('./model/gsm-medium-auto.txt', 'w') as f:
    f.write('\n'.join(['* Default medium from GSM model. Will be overwritten every time the model is built. Use phenotype.txt for any manual updates'] + medium))

# Reactions that are not exchange reactions
for i in df_enz.index:
    rxn_id = df_enz.id[i]
    _,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(rxn_id)
    
    if rxn_base_id[:3] == 'EX_':
        continue
    
    c += 1
    rxn_base = model.reactions.get_by_id(rxn_base_id)
    
    met_dict = metabolites_dict_from_reaction_equation_RBA(rxn_base.reaction)
    met_dict = {k:v for k,v in met_dict.items() if k != ''}
    met_dict = {'MET-' + k:v for k,v in met_dict.items()}
    if rxn_dir == 'REV':
        met_dict = {k:-v for k,v in met_dict.items()}
    elif rxn_dir == 'FWD':
        None
    else:
        print("Unknown ID that indicate reaction direction, only accepting 'FWD' and 'REV'")
    
    if enz_id not in ['SPONT', 'UNKNOWN']:
        df_eqn.loc[c, 'coupling_type'] = 'rxn_enz'
        df_eqn.loc[c, 'coupling_species'] = enz_id
    
    df_eqn.loc[c, 'id'] = rxn_id
    df_eqn.loc[c, 'type'] = 'metabolic'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(met_dict, arrow='-->')
    df_eqn.loc[c, ['FBA_name','dir']] = [rxn_base_id,rxn_dir]
    
### Enzyme synthesis network reaction
enz_stoich = OrderedDict()
for i in df_enz.index:
    enz_stoich[df_enz.enz[i]] = df_enz.protein_stoich[i]

c = df_eqn.shape[0] - 1
for enz_id,prot_str in enz_stoich.items():
    if prot_str == 'zeroCost':
        continue
    
    c += 1
    prot_str = prot_str.split(',')
    coeffs = OrderedDict({'PRO-' + i.split(':')[0]:-int(i.split(':')[1]) for i in prot_str})
    coeffs['ENZ-' + enz_id] = 1
    
    df_eqn.loc[c, 'id'] = 'ENZSYN-' + enz_id
    df_eqn.loc[c, 'type'] = 'enzyme'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(coeffs, arrow='-->')

#these ENZLOAD equations also need to have their MW listed in the enz_mw_g_per_mmol file
#initialize an enzload sting
enzload_str = ""
for i in df_enz.index:
    if df_enz.protein_stoich[i] == 'zeroCost':
        continue
        
    c += 1
    coeffs = OrderedDict()
    coeffs['ENZ-' + df_enz.enz[i]] = -1

    enzload_id = 'ENZLOAD-' + df_enz.id[i][4:]

    #for debugging, comment out if unused
    #print("i: ",i,"\tenzload_id: ", enzload_id)
    
    df_eqn.loc[c, 'id'] = enzload_id
    df_eqn.loc[c, 'type'] = 'enzymeRxnLoad'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(coeffs, arrow='-->')

    #get the associated mw
    enz_mw = getattr(df_enz,"MW (g/mmol)")[i]

    new_line = enzload_id + "\t" + str(enz_mw) + "\n"
    enzload_str = enzload_str + new_line

# write the file resulting from enzload_str
with open('./model/enz_mw_g_per_mmol.txt', 'w') as f:
    f.write(enzload_str)
with open('../input/enz_mw_g_per_mmol.txt', 'w') as f:
    f.write(enzload_str)

### Ribosome
c = df_eqn.shape[0] - 1
for rna in df_rnas.index:
    c += 1
    rna_stoich = OrderedDict({i:0 for i in ['MET-'+rna, 'MET-atp_c', 'MET-ctp_c',
                                            'MET-gtp_c', 'MET-utp_c', 'MET-ppi_c']})
    rna_stoich['RIBO-'+rna] = 1
    rna_stoich['MET-atp_c'] = -int(df_rnas.A[rna])
    rna_stoich['MET-ctp_c'] = -int(df_rnas.C[rna])
    rna_stoich['MET-gtp_c'] = -int(df_rnas.G[rna])
    rna_stoich['MET-utp_c'] = -int(df_rnas.U[rna])
    rna_stoich['MET-ppi_c'] = int(df_rnas.loc[rna, ['A','C','G','U']].sum())
    rna_stoich['BIO-rrna'] = df_rnas.loc[rna, 'MW (g/mmol)']
    
    df_eqn.loc[c, 'id'] = 'RIBOSYN-' + rna
    df_eqn.loc[c, 'type'] = 'ribosome'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(rna_stoich, arrow='-->')

for loc, ribo in ribo_dict.items():
    ## proteins into ribosome subunits (to account for paralogs)
    ribo_stoich = OrderedDict()
    rnas = [item for item in df_rnas.index if item.endswith(loc)]
    # assume 1 of each rRNA and protein is used per ribosome
    for i in ribo.index:
        if ribo.id[i] in rnas:
            ribo_stoich['RIBO-' + ribo.id[i]] = -1
        else:
            # if it's not a paralog of another protein, all paralogs between it and the next non-paralog are treated as paralogs
            if pd.isnull(ribo.paralog[i]):
                ribosub_name = 'RIBOSUB-' + ribo.id[i]
            # add rxn converting the protein into the ribosome subunit (RIBOSUB prefix)
            c += 1
            ribosub_stoich = OrderedDict()
            ribosub_stoich['PRO-' + ribo.id[i]] = -1
            ribosub_stoich[ribosub_name] = 1
            df_eqn.loc[c, 'id'] = 'RIBOSUBSYN-' + ribosub_name + '-FROM-' + ribo.id[i]
            df_eqn.loc[c, 'type'] = 'ribosome-subunit'
            df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(ribosub_stoich, arrow='-->')
            # add the ribosome subunit to the ribosome
            ribo_stoich[ribosub_name] = -1
    # making the ribosome itself from the subunits
    c += 1
    df_eqn.loc[c, 'id'] = 'RIBOSYN-ribonuc' if loc == '_c' else 'RIBOSYN-ribomito'
    df_eqn.loc[c, 'type'] = 'ribosome'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(ribo_stoich, arrow='-->')

#### Build files related to MW

In [6]:
#TEST: This whole block. build the pro_mw_g_per_mmol.txt file

data = getattr(df_pro,'MW (g/mmol)').to_dict()
ids = getattr(df_pro,'id').to_list()

#build the string to write
prot_mw_str = ""

#for each id
for id in ids:
    new_line = id + "\t" + str(data[id]) + "\n"
    prot_mw_str = prot_mw_str + new_line
# add dummy proteins to the string
for prot in dummy_list:
    new_line = prot["name"].replace('PROSYN-','') + "\t" + str(prot["MW"]) + "\n"
    prot_mw_str = prot_mw_str + new_line

#write to output
with open('./model/pro_mw_g_per_mmol.txt', 'w') as f:
    f.write(prot_mw_str)
with open('../input/pro_mw_g_per_mmol.txt', 'w') as f:
    f.write(prot_mw_str)

In [7]:
#TEST: this whole block. get MW list
mw_list = getattr(df_enz,"MW (g/mmol)").to_list()

#get enz list
enz_list = getattr(df_enz,"enz").to_list()

#create string for writing output file
enz_mw_str = "enz_id\tMW (g/mmol)\n"

#add to the string for enzyme entries
#need to initialize a counter
c = 0

#list to keep track of existing entries
#predefined values prevent adding unknown
existing_pro = ['nan','unknown']

for enz in enz_list:

    #only add a new line if unique entry
    if not(str(enz) in existing_pro):

        #for debugging, comment out if unused
        #print("enz: |",enz,"|, type: ",type(enz))
    
        new_line = str(enz) + "\t" + str(mw_list[c]) + "\n"

        #for debugging, comment out if unused
        #print(new_line)

        enz_mw_str = enz_mw_str + new_line

        existing_pro.append(enz)

    c = c + 1

#write to output
with open('./model/enz_mw_g_per_mmol_norxnmapped.txt', 'w') as f:
    f.write(enz_mw_str)
with open('../input/enz_mw_g_per_mmol_norxnmapped.txt', 'w') as f:
    f.write(enz_mw_str)

#### Protein file

In [8]:
### Protein

pro_lengths = {}

c = df_eqn.shape[0] - 1
mitoexp_prosyn = ["/"]
nucexp_prosyn = ["/"]

for i in df_pro.index:  
    c += 1

    prot_st = OrderedDict()
    for met in ['MET-atp_c', 'MET-h2o_c',
                'MET-adp_c', 'MET-pi_c', 'MET-h_c', 'MET-gtp_c',
                'MET-gdp_c']:
        prot_st[met] = 0
    seq = df_pro.sequence[i]
    pro_lengths[i] = len(seq.replace("*",""))
    for aa in df_aamap.index:
        prot_st[df_aamap.tRNA_in[aa]] = -seq.count(aa)
        prot_st[df_aamap.tRNA_out[aa]] = seq.count(aa)
        
    cofs_str = df_pro.cofactor_stoich[i]
    if pd.isnull(cofs_str) == False:
        cofs_st = cofs_str.split(',')
        cofs_st = OrderedDict({c.split(':')[0]:c.split(':')[1] for c in cofs_st})
        for k,v in cofs_st.items():
            prot_st['MET-' + k] = -int(v)
                
    prot_st['PRO-' + df_pro.id[i]] = 1
    df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
    df_eqn.loc[c, 'coupling_species'] = 'ribo'
    
    # protein will occupy cellular space in its specific compartment
    if df_pro.subloc_assigned[i] in ['m', 'mm']:
        prot_st['BIO-protmito'] = df_pro.loc[i, 'MW (g/mmol)']
    else:
        prot_st['BIO-protcyt'] = df_pro.loc[i, 'MW (g/mmol)']
    
    # Cost: Initiation: 1 ATP + 2 GTP (initiate and bind Methionine)
    # Elongation: 2 GTP / cycle
    # (elongation process excludes Methionine since it is already bound in initiation process)
    for met in ['MET-atp_c', 'MET-h2o_c']:
        prot_st[met] -= 1
    for met in ['MET-adp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 1
                
    for met in ['MET-gtp_c', 'MET-h2o_c']:
        prot_st[met] -= 2*len(seq)
    for met in ['MET-gdp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 2*len(seq)
    
    df_eqn.loc[c, 'id'] = 'PROSYN-' + df_pro.id[i]
    df_eqn.loc[c, 'type'] = 'protein'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st, arrow='-->')
    # Set protein to be expressed by nucleus or mitochondrial ribosome
    if df_pro.id[i] in prot_mitoexp:
        mitoexp_prosyn.append("'"+df_eqn.loc[c, 'id']+"'")
    else:
        nucexp_prosyn.append("'"+df_eqn.loc[c, 'id']+"'")

# write the protein length file
len_str = "/\n"
for prot in pro_lengths:
    len_str = len_str + "'PROSYN-" + prot + "' " + str(pro_lengths[prot]) + "\n"
for prot in dummy_list:
    len_str += "'" + prot["name"] + "' " + str(prot["medianL"]) + "\n"
len_str = len_str + "/"
with open('./model/RBA_proteinLength.txt', 'w') as f:
    f.write(len_str)

### Dummy protein
for prot in dummy_list:
    dummy_MW = prot["MW"]
    prot_st = OrderedDict()
    for met in ['MET-atp_c', 'MET-h2o_c',
                'MET-adp_c', 'MET-pi_c', 'MET-h_c', 'MET-gtp_c',
                'MET-gdp_c']:
        prot_st[met] = 0

    seq = df_pro.sequence[i][:-1]
    for aa in df_aamap.index:
        prot_st[df_aamap.tRNA_in[aa]] = -round(prot["df"].N_AA[aa], 4)
        prot_st[df_aamap.tRNA_out[aa]] = round(prot["df"].N_AA[aa], 4)

    for met in ['MET-atp_c', 'MET-h2o_c']:
        prot_st[met] -= 1
    for met in ['MET-adp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 1

    for met in ['MET-gtp_c', 'MET-h2o_c']:
        prot_st[met] -= 2*prot["medianL"]
    for met in ['MET-gdp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 2*prot["medianL"]

    c += 1
    prot_st["BIO-protdummy"] = prot["MW"]
    df_eqn.loc[c, 'id'] = prot["name"]
    df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
    df_eqn.loc[c, 'coupling_species'] = 'ribo' #TEST: Update for ribonuc, ribomito, and other ribo types
    df_eqn.loc[c, 'type'] = 'protein'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st,
                                                            arrow='-->', floatdecimal=6)

# Protein waste reactions
c = df_eqn.shape[0] - 1
for i in df_pro.index:     
    c += 1
    prot_st = OrderedDict()
    prot_st['PRO-' + df_pro.id[i]] = -1
    # add the protein's MW as the coefficient for BIO-protwasted
    prot_st['BIO-protwasted'] = df_pro.loc[i, 'MW (g/mmol)']
    
    df_eqn.loc[c, 'id'] = 'PROWASTE-' + df_pro.id[i]
    df_eqn.loc[c, 'type'] = 'proteinWaste'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st, arrow='-->')
# Pseudoreaction representing mass of waste protein (for kapps): PROWASTE-TOTALPROTEIN
c += 1
df_eqn.loc[c, 'id'] = 'PROWASTE-TOTALPROTEIN'
df_eqn.loc[c, 'type'] = 'proteinWaste'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA({'BIO-protwasted':-1}, arrow='-->')
for loc in ['modeled','dummy','cyt','mito']: # add PROWASTE rxns for biomass-related byproduct of PROSYN rxns, to support simulations at 0 growth
    c += 1
    df_eqn.loc[c, 'id'] = 'PROWASTE-PROT'+loc.upper()
    df_eqn.loc[c, 'type'] = 'proteinWaste'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA({'BIO-prot'+loc:-1}, arrow='-->')

### Write translation localization lists
mitoexp_prosyn.append("/")
nucexp_prosyn.append("/")
with open('./model/RBA_mito_translation.txt', 'w') as f:
    f.write('\n'.join(mitoexp_prosyn))
with open('./model/RBA_nuc_translation.txt', 'w') as f:
    f.write('\n'.join(nucexp_prosyn))

### Biomass
for i in df_biom.index:
    c += 1
    if not pd.isna(df_biom.rxn_id[i]):
        df_eqn.loc[c, 'id'] = df_biom.rxn_id[i]
        df_eqn.loc[c, 'type'] = 'biomass'
        df_eqn.loc[c, 'reaction'] = df_biom.rxn_equation[i]

In [9]:
# make extra kapp calculation files
import os
directory = '../GAMS/parameterization/enz_from_proteome'
if not os.path.exists(directory):
    os.makedirs(directory)

with open(directory + '/pro_and_enz.txt', 'w') as f:
    f.write('\n'.join(['/'] + sorted(list(set(["'ENZ-" + i + "'" for i in enz_list] + ["'PRO-" + i + "'" for i in df_pro.index]))) + ['/']))

In [10]:
# Check if all reactions from GSM model are in the enzyme stoich file
rxns_without_matches = []
for rxn in model.reactions:
    if rxn.id not in df_enz.rxn_src.astype(str).tolist() + rxns_not_needing_enzymes:
        rxns_without_matches.append(rxn.id)
if rxns_without_matches:
    error_message = "Reactions to add to ENZYME_stoich_curation.xlsx (ignore if only biomass rxns listed):\n" + "\n".join(rxns_without_matches)
    raise ValueError(error_message)


#### Save excel files

In [11]:
df_eqn.to_excel('./model/RBA_stoichiometry.xlsx', index=None)

# Build GAMS Sij and flux bounds (originally A02_build_GAMS_Sij_and_fluxBounds.ipynb)

In [12]:
import pandas as pd
from gsm_custom_functions import *

#### Load excel file

In [13]:
df_eqn = pd.read_excel('./model/RBA_stoichiometry.xlsx')

#### Assemble list of species

In [14]:
met_list_raw = []
for i in df_eqn.index:
    met_dict = metabolites_dict_from_reaction_equation_RBA(df_eqn.reaction[i])
    met_list_raw += met_dict.keys()
    

met_list = sorted(list(set(met_list_raw)))
met_list = [i for i in met_list if i != '']
with open('./model/RBA_species.txt', 'w') as f:
    f.write('\n'.join(['/'] + ["'" + i + "'" for i in met_list] + ['/']))
# add list of demand rxns for each metabolite (for testing purposes)
demand_list = ["DM-" + i for i in met_list]
# WIP: add to df_eqn
with open('./model/RBA_rxns_demand.txt', 'w') as f:
    f.write('\n'.join(['/'] + ["'DM-" + i + "'" for i in met_list] + ['/']))

#### Assemble list of reactions

In [15]:
rxn_list = df_eqn.id.to_list()
rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
rxn_list = ['/'] + rxn_list + ['/']

In [16]:
with open('./model/RBA_rxns.txt', 'w') as f:
    f.write('\n'.join(rxn_list))

#### Get list of uptake reactions

In [17]:
rxn_list = []
for i in df_eqn.id:
    tag,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(i)
    if tag == 'RXN' and rxn_base_id[:3] == 'EX_' and rxn_dir == 'REV':
        rxn_list.append(i)

rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
rxn_list = ['/'] + rxn_list + ['/']

In [18]:
with open('./model/RBA_rxns_EXREV.txt', 'w') as f:
    f.write('\n'.join(rxn_list))

#### Get list of secretion reactions

In [19]:
rxn_list = []
for i in df_eqn.id:
    tag,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(i)
    if tag == 'RXN' and rxn_base_id[:3] == 'EX_' and rxn_dir == 'FWD':
        rxn_list.append(i)

rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
rxn_list = ['/'] + rxn_list + ['/']

In [20]:
with open('./model/RBA_rxns_EXFWD.txt', 'w') as f:
    f.write('\n'.join(rxn_list))

#### Get list of protein waste reactions

In [21]:
rxn_list = []
for i in df_eqn.id:
    tag = i.split('-')[0]
    if tag == 'PROWASTE':
        rxn_list.append(i)

rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
rxn_list = ['/'] + rxn_list + ['/']

In [22]:
with open('./model/RBA_rxns_prowaste.txt', 'w') as f:
    f.write('\n'.join(rxn_list))

#### Get list of enzyme syn reactions

In [23]:
rxn_list = []
for i in df_eqn.id:
    tag = i.split('-')[0]
    if tag == 'ENZSYN':
        rxn_list.append(i)

rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
rxn_list = ['/'] + rxn_list + ['/']

In [24]:
with open('./model/RBA_rxns_enzsyn.txt', 'w') as f:
    f.write('\n'.join(rxn_list))

#### Get list of enzyme load reactions

In [25]:
rxn_list = []
for i in df_eqn.id:
    tag = i.split('-')[0]
    if tag == 'ENZLOAD':
        rxn_list.append(i)

rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
# list created so GAMS knows which ENZLOAD corresponds to which rxn
rxn_enzload_coupling_list = [i + '.' + i.replace('ENZLOAD-','RXN-') + ' 1' for i in rxn_list]
rxn_list = ['/'] + rxn_list + ['/']

In [26]:
with open('./model/RBA_rxns_enzload.txt', 'w') as f:
    f.write('\n'.join(rxn_list))
with open('./model/RBA_rxn_enzload_coupling.txt', 'w') as f:
    f.write('\n'.join(['/'] + rxn_enzload_coupling_list + ['/']))

#### Get list of metabolic network reactions

In [27]:
rxn_list = []
for i in df_eqn.id:
    tag = i.split('-')[0]
    if tag == 'RXN':
        rxn_list.append(i)

rxn_list = ["'" + i + "'" for i in rxn_list if i != '']
rxn_list = ['/'] + rxn_list + ['/']

In [28]:
with open('./model/RBA_rxns_rxnmetabolicnetwork.txt', 'w') as f:
    f.write('\n'.join(rxn_list))

In [29]:
# Make k_app (enzyme turnover rate) file draft using placeholder values
kapp_list = [i + " 360000" for i in rxn_list if i != '/']
kapp_list = ['/'] + kapp_list + ['/']
with open('./model/kapp_placeholders.txt', 'w') as f:
    f.write('\n'.join(kapp_list))

#### Assemble stoichiometry

In [30]:
# make extra kapp calculation files
import os
directory = '../GAMS/parameterization/enz_from_proteome/'
if not os.path.exists(directory):
    os.makedirs(directory)

sij = []
rxns_enz = []
rxns_proenz = []
sij_proenz = []
for i in df_eqn.index:
    x = metabolites_dict_from_reaction_equation_RBA(df_eqn.reaction[i])
    met_dict = dict()
    for k,v in x.items():
        if k == '':
            continue
        if v.is_integer():
            met_dict[k] = int(v)
        else:
            met_dict[k] = v
            
    if df_eqn.id[i].startswith("ENZ") or df_eqn.id[i].startswith("PROSYN"):
        rxns_proenz.append("'" + df_eqn.id[i].replace('PROSYN-','PROIN-') + "'")
        if df_eqn.id[i].startswith("ENZ"):
            rxns_enz.append("'" + df_eqn.id[i] + "'")
        # sij.append("'" + df_eqn.id[i] + "'.'" + df_eqn.id[i] + "' 1")
        for k,v in met_dict.items():
            sij.append("'" + k + "'.'" + df_eqn.id[i] + "' " + str(v))
            if k.startswith('PRO-') or k.startswith('ENZ-'):
                sij_proenz.append("'" + k + "'.'" + df_eqn.id[i].replace('PROSYN-','PROIN-') + "' " + str(v))
    else:
        for k,v in met_dict.items():
            sij.append("'" + k + "'.'" + df_eqn.id[i] + "' " + str(v))
    
sij = ['/'] + sij + ['/']
with open('./model/RBA_sij.txt', 'w') as f:
    f.write('\n'.join(sij))
with open(directory + 'rxns_enz.txt', 'w') as f:
    f.write('\n'.join(['/'] + sorted(rxns_enz) + ['/']))
with open(directory + 'rxns_pro_and_enz.txt', 'w') as f:
    f.write('\n'.join(['/'] + sorted(rxns_proenz) + ['/']))
with open(directory + 'sij_pro_and_enz.txt', 'w') as f:
    f.write('\n'.join(['/'] + sij_proenz + ['/']))

In [31]:
print('\n'.join(sij))

/
'MET-13BDglucan_e'.'RXN-EX_13BDglucan_e_FWD-SPONT' -1
'MET-13BDglucan_e'.'RXN-EX_13BDglucan_e_REV-SPONT' 1
'MET-crn_e'.'RXN-EX_crn_e_FWD-SPONT' -1
'MET-crn_e'.'RXN-EX_crn_e_REV-SPONT' 1
'MET-lac__D_e'.'RXN-EX_lac__D_e_FWD-SPONT' -1
'MET-lac__D_e'.'RXN-EX_lac__D_e_REV-SPONT' 1
'MET-mev__R_e'.'RXN-EX_mev__R_e_FWD-SPONT' -1
'MET-mev__R_e'.'RXN-EX_mev__R_e_REV-SPONT' 1
'MET-pnto__R_e'.'RXN-EX_pnto__R_e_FWD-SPONT' -1
'MET-pnto__R_e'.'RXN-EX_pnto__R_e_REV-SPONT' 1
'MET-btd_e'.'RXN-EX_btd_e_FWD-SPONT' -1
'MET-btd_e'.'RXN-EX_btd_e_REV-SPONT' 1
'MET-3mop_e'.'RXN-EX_3mop_e_FWD-SPONT' -1
'MET-3mop_e'.'RXN-EX_3mop_e_REV-SPONT' 1
'MET-lac__L_e'.'RXN-EX_lac__L_e_FWD-SPONT' -1
'MET-lac__L_e'.'RXN-EX_lac__L_e_REV-SPONT' 1
'MET-mal__L_e'.'RXN-EX_mal__L_e_FWD-SPONT' -1
'MET-mal__L_e'.'RXN-EX_mal__L_e_REV-SPONT' 1
'MET-g3pi_e'.'RXN-EX_g3pi_e_FWD-SPONT' -1
'MET-g3pi_e'.'RXN-EX_g3pi_e_REV-SPONT' 1
'MET-dad_2_e'.'RXN-EX_dad_2_e_FWD-SPONT' -1
'MET-dad_2_e'.'RXN-EX_dad_2_e_REV-SPONT' 1
'MET-dgsn_e'.'RXN-EX_

# Build GAMS RBA constraints (originally A03_build_GAMS_RBA_constraints.ipynb)

In [32]:
import pandas as pd
from gsm_custom_functions import *

In [33]:
#### LOAD INPUTS AND PARAMETERS
copy_to_gams = True # if True, copy the model to the gams folder

# Stoichiometry
df_stoich = pd.read_excel('./model/RBA_stoichiometry.xlsx')
df_stoich.index = df_stoich.id.to_list()

# Load protein sequence lengths
df_pro = pd.read_excel('./input/PROTEIN_stoich_curation.xlsx')
df_pro.index = df_pro.id.to_list()

In [34]:
### Enzyme synthesis requirement coupled to metabolic reaction rate
idx = df_stoich[df_stoich.coupling_type == 'rxn_enz'].index
eqn_list = []; kapp_list = []; eqn_list_equality = []; kapp_slack_list = []

for i in idx:
    lhs = "v('ENZLOAD-" + df_stoich.id[i][4:] + "') * " + "kapp('" + i + "')"
    print(df_stoich.id[i])
    rhs = "%mu% * v('" + i + "')"
    eqn_list.append(lhs + ' =g= ' + rhs + ';')
    eqn_list_equality.append(lhs + ' =e= ' + rhs + ';')
    kapp_slack_list.append("kapp_slack_ub('RXN-" + df_stoich.id[i][4:] + "') - kapp_slack_lb('RXN-" + df_stoich.id[i][4:] + "') + " + lhs + ' =e= ' + rhs + ';')
    
eqn_idx = ['EnzCap'+str(i) for i in range(0, len(eqn_list))]
eqn_list = ['EnzCap'+str(i)+'.. ' + eqn_list[i] for i in range(0, len(eqn_list))]

with open('./model/RBA_enzCapacityConstraints_declares.txt', 'w') as f:
    f.write('\n'.join(eqn_idx))
with open('./model/RBA_enzCapacityConstraints_eqns.txt', 'w') as f:
    f.write('\n'.join(eqn_list))
    
with open('./model/RBA_enzCapacityConstraints_eqns_equality_version.txt', 'w') as f:
    f.write('\n'.join(['EnzCap'+str(i)+'.. ' + eqn_list_equality[i] for i in range(0, len(eqn_list_equality))]))
# more concise version of the above, with declares and equations in one file for easier reading/testing
with open('./model/RBA_enzCapacityConstraints_declares_and_eqns_equality_version.txt', 'w') as f:
    f.write('\n'.join(['Equation EnzCap'+str(i)+'; EnzCap'+str(i)+'.. ' + eqn_list_equality[i] for i in range(0, len(eqn_list_equality))]))
# kapp slack equations, for testing purposes only (e.g., if kapps are too low, seeing how much they'd need to increase)
kapp_slack_list = ['Equation EnzCap'+str(i)+'; EnzCap'+str(i)+'.. ' + kapp_slack_list[i] for i in range(0, len(kapp_slack_list))]
with open('./model/kapp-slack-RBA_enzCapacityConstraints_equality_version.txt', 'w') as f:
    f.write('\n'.join(kapp_slack_list))

RXN-12AMANTF_g_FWD-rt2093
RXN-12AMANTF_g_FWD-rt7201
RXN-13BGH_e_FWD-rt5185
RXN-13GS_c_FWD-rt7616
RXN-14BMANTF_c_FWD-rt6298
RXN-16GS_c_FWD-rt0150
RXN-16GS_c_FWD-rt3279
RXN-1MLCLAT_mm_FWD-rt8106
RXN-1MLCLAT_mm_REV-rt8106
RXN-2DOXG6PP_c_FWD-rt0208
RXN-2DOXG6PP_c_FWD-rt5045
RXN-2OGMAH_c_FWD-rt6557
RXN-2OSUCAH_c_FWD-rt6557
RXN-2OXOADPt_c_m_FWD-rt2267
RXN-2OXOADPt_c_m_REV-rt2267
RXN-3DHSKD_c_FWD-rt3472
RXN-3DHSKD_c_REV-rt3472
RXN-3DSPHR_r_FWD-rt1611
RXN-3HACD200_rm_FWD-rt2309
RXN-3HACD200_rm_REV-rt2309
RXN-3HACD220_rm_FWD-rt2309
RXN-3HACD220_rm_REV-rt2309
RXN-3HACD240_rm_FWD-rt2309
RXN-3HACD240_rm_REV-rt2309
RXN-3HACD260_rm_FWD-rt2309
RXN-3HACD260_rm_REV-rt2309
RXN-3HAD100_c_FWD-rt0302
RXN-3HAD120_c_FWD-rt0302
RXN-3HAD140_c_FWD-rt0302
RXN-3HAD160_c_FWD-rt0302
RXN-3HAD180_c_FWD-rt0302
RXN-3HAD40_c_FWD-rt0302
RXN-3HAD40_c_REV-rt0302
RXN-3HAD40_m_FWD-rt7207_m
RXN-3HAD40_m_REV-rt7207_m
RXN-3HAD60_c_FWD-rt0302
RXN-3HAD60_m_FWD-rt7207_m
RXN-3HAD60_m_REV-rt7207_m
RXN-3HAD80_c_FWD-rt0302
RXN-3HAD80_

In [35]:
### Write prosyn reaction
idx = [i for i in df_stoich.index if i[:7] == 'PROSYN-']
prosyn = ["'" + i + "'" for i in idx]
prosyn = ['/'] + prosyn + ['/']
with open('./model/RBA_rxns_prosyn.txt', 'w') as f:
    f.write('\n'.join(prosyn))
## write ribosyn set
idx = [i for i in df_stoich.index if i[:8] == 'RIBOSYN-']
ribosyn = ["'" + i + "'" for i in idx]
ribosyn = ['/'] + ribosyn + ['/']
with open('./model/RBA_rxns_ribosyn.txt', 'w') as f:
    f.write('\n'.join(ribosyn))

In [36]:
# Make fwd and reversible rxn list for metabolic network and all rxns (not needed but helpful for testing)
# Also, make flux coupling analysis file in case you use that for testing
import pandas as pd
import cobra
from collections import OrderedDict
from copy import deepcopy

# Using Patrick's FCA code
rxn_types = {'irrev': 0, 'reversible-fwd-half': 1, 'reversible-rev-half': 2, 'pseudoreaction': 3, 'exchange-fwd-half': 4, 'exchange-rev-half': 5}

rev_rxn_list = []; rev_list = []
fwd_rxn_list = []; fwd_list = []
fca_list = []

for i in df_stoich.id:
    tag,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(i)
    #print("'"+i+"' "+str(rxn_types['irrev']))
    #print(rxn_base_id)
    #print(i,'\t',tag,'\t',rxn_dir)
    if rxn_dir == 'FWD':
        fwd_list.append(i)
        if tag == 'RXN':
            fwd_rxn_list.append(i)
        if rxn_base_id[:3] == 'EX_':
            fca_list.append("'"+i+"' "+str(rxn_types['exchange-fwd-half']))
        # Add as irreversible if no counterpart found in other direction
        elif i.replace("_FWD-","_REV-") not in df_stoich.id:
            fca_list.append("'"+i+"' "+str(rxn_types['irrev'])) 
        else:
            fca_list.append("'"+i+"' "+str(rxn_types['reversible-fwd-half']))
    elif rxn_dir == 'REV':
        rev_list.append(i)
        if tag == 'RXN':
            rev_rxn_list.append(i)
        if rxn_base_id[:3] == 'EX_':
            fca_list.append("'"+i+"' "+str(rxn_types['exchange-rev-half']))
        # Add as irreversible if no counterpart found in other direction
        elif i.replace("_REV-","_FWD-") not in df_stoich.id:
            fca_list.append("'"+i+"' "+str(rxn_types['irrev'])) 
        else:
            fca_list.append("'"+i+"' "+str(rxn_types['reversible-rev-half']))
    else:
        fca_list.append("'"+i+"' "+str(rxn_types['irrev'])) 

fwd_rxn_list = ["'" + i + "'" for i in fwd_rxn_list if i != '/']
fwd_rxn_list = ['/'] + fwd_rxn_list + ['/']
with open('./model/RBA_rxns_rxnmetabolicnetworkFWD.txt', 'w') as f:
    f.write('\n'.join(fwd_rxn_list))
rev_rxn_list = ["'" + i + "'" for i in rev_rxn_list if i != '/']
rev_rxn_list = ['/'] + rev_rxn_list + ['/']
with open('./model/RBA_rxns_rxnmetabolicnetworkREV.txt', 'w') as f:
    f.write('\n'.join(rev_rxn_list))

fwd_list = ["'" + i + "'" for i in fwd_list if i != '/']
fwd_list = ['/'] + fwd_list + ['/']
with open('./model/RBA_rxns_FWD.txt', 'w') as f:
    f.write('\n'.join(fwd_list))
rev_list = ["'" + i + "'" for i in rev_list if i != '/']
rev_list = ['/'] + rev_list + ['/']
with open('./model/RBA_rxns_REV.txt', 'w') as f:
    f.write('\n'.join(rev_list))

fca_list = ['/'] + fca_list + ['/']
with open('./model/rt_rxntype.txt', 'w') as f:
    f.write('\n'.join(fca_list))

In [37]:
# creating set pairing GSM values w/ RBA ones, for use in converting flux constraints from GSM format to RBA

# filter out rxns with no FBA_name
df_gsm = df_stoich[df_stoich.FBA_name.notnull()]
# get GSM rxn IDs from FBA_name column
gsm_rxn_ids = df_gsm.FBA_name.to_list()
# get RBA rxn IDs from index
rba_rxn_ids = df_gsm.index.to_list()
rba_rxn_dirs = [extract_details_from_rxnid(i)[2] for i in rba_rxn_ids]
# convert dir to -1 if rev, 1 otherwise
rba_rxn_dirs = [-1 if i == 'REV' else 1 for i in rba_rxn_dirs]
# create list of strings pairing GSM rxn IDs with RBA rxn IDs
gsm_rba_rxn_pairs = ["'"+gsm_rxn_ids[i]+"'.'"+rba_rxn_ids[i]+"'"+' '+str(int(rba_rxn_dirs[i])) for i in range(0, len(gsm_rxn_ids))]
# write to file
with open('./model/GSM_RBA_rxn_pairs.txt', 'w') as f:
	f.write('\n'.join(['/'] + gsm_rba_rxn_pairs + ['/']))
# make file w/ all FBA IDs
with open('./model/GSM_rxn_ids.txt', 'w') as f:
	f.write('\n'.join(['/'] + ["'"+i+"'" for i in set(gsm_rxn_ids)] + ['/']))

In [38]:
if copy_to_gams: # automatically copies the model folder to the GAMS folder (removing the previous version), if you choose to
    import shutil
    # copy "model" folder to GAMS folder
    # Source folder path
    source_folder = './model'

    # Destination folder path
    destination_folder = '../GAMS/model'

    # Remove the existing destination folder
    shutil.rmtree(destination_folder)

    # Copy the folder
    shutil.copytree(source_folder, destination_folder)