In [1]:
import pandas as pd

import sys
sys.path.append('../build_GAMS_model/')
from custom_functions_scRBA import extract_details_from_rxnid
from custom_functions_scRBA import metabolites_dict_from_reaction_equation_RBA

In [2]:
df_all = pd.read_excel('../build_GAMS_model/model/RBA_stoichiometry.xlsx')
df_all.index = df_all.id.to_list()

#### Check reactions and metabolites

In [3]:
rxns = []; mets = [];
for i in df_all.index:
    tag = i.split('-')[0]
    if tag == 'RXN':
        # Get reactions
        _,rxn,rdir,enz = extract_details_from_rxnid(i)
        rxns.append(rxn)
        
        # Get metabolites
        met_dict = metabolites_dict_from_reaction_equation_RBA(df_all.reaction[i])
        mets += list(met_dict.keys())
        
rxns = sorted(list(set(rxns)))
mets = sorted(list(set(mets)))
mets = [met for met in mets if met not in ['']]

In [4]:
print('Number of reactions:', len(rxns))
print('Number of metabolites:', len(mets))

Number of reactions: 1888
Number of metabolites: 1602


#### Ribosomes
Nucleus: 80 protein subunits and 4 rRNA subunits<br>
Michondria: 74 protein subunits and 2 rRNA subunits

#### Check proteins and enzymes

In [5]:
with open('../build_GAMS_model/model/RBA_species.txt') as f:
    species = f.read().split('\n')
species = [i[1:-1] for i in species if i not in ['', '/']]

In [11]:
prots_raw = [i for i in species if i.split('-')[0] == 'PRO']
prots = []
for p in prots_raw:
    if '_' in p:
        comp = p.split('_')[-1]
        p_nocomp = p[:-len(comp)-1]
        prots.append(p_nocomp)
    else:
        prots.append(p)
prots = sorted(list(set(prots)))

enzs_raw = [i for i in species if i.split('-')[0] == 'ENZ']
enzs = []
for e in enzs_raw:
    if '_' in e:
        comp = e.split('_')[-1]
        e_nocomp = e[:-len(comp)-1]
        enzs.append(e_nocomp)
    else:
        enzs.append(e)
enzs = sorted(list(set(enzs)))

In [12]:
print('Number of proteins:', len(prots))
print('Number of enzymes:', len(enzs))

Number of proteins: 1177
Number of enzymes: 903


In [32]:
species

['BIO-13BDglucan_en',
 'BIO-16BDglucan_en',
 'BIO-atp_c',
 'BIO-ca2_c',
 'BIO-carb',
 'BIO-chtn_c',
 'BIO-coa_c',
 'BIO-cofactor',
 'BIO-ctp_c',
 'BIO-cu2_c',
 'BIO-datp_c',
 'BIO-dctp_c',
 'BIO-dgtp_c',
 'BIO-dna',
 'BIO-dttp_c',
 'BIO-ergst161_rm',
 'BIO-ergst181_rm',
 'BIO-ergst_c',
 'BIO-fad_c',
 'BIO-fe2_c',
 'BIO-glycogen_c',
 'BIO-gtp_c',
 'BIO-hdca_c',
 'BIO-hdcea_c',
 'BIO-hemeA_c',
 'BIO-ipc_g',
 'BIO-k_c',
 'BIO-lipid',
 'BIO-mannan_c',
 'BIO-metal',
 'BIO-mg2_c',
 'BIO-mn2_c',
 'BIO-mrna',
 'BIO-nad_c',
 'BIO-nadh_c',
 'BIO-nadp_c',
 'BIO-nadph_c',
 'BIO-ocdca_c',
 'BIO-ocdcea_c',
 'BIO-pail_c',
 'BIO-pc_c',
 'BIO-pe_c',
 'BIO-pi',
 'BIO-prot',
 'BIO-protcyt',
 'BIO-protdummy',
 'BIO-protmito',
 'BIO-protmodeled',
 'BIO-ps_c',
 'BIO-ribflv_c',
 'BIO-rna',
 'BIO-rrna',
 'BIO-so4',
 'BIO-tag_c',
 'BIO-thf_c',
 'BIO-thmpp_c',
 'BIO-tre_c',
 'BIO-trna',
 'BIO-utp_c',
 'BIO-varbiom',
 'BIO-zn2_c',
 'ENZ-ACP1CEM1',
 'ENZ-ACP1MCT1',
 'ENZ-ALG1314',
 'ENZ-ATPASECPLXgm',
 'ENZ-ATPAS

In [19]:
rxns

['12AMANTF_g',
 '13BDGLUCANt_c_en',
 '13BGH_e',
 '13GS_c',
 '14BMANTF_c',
 '14DMLANOSTt_c_e',
 '16GS_c',
 '1AGPCt_l_rm',
 '1AGPEt_l_rm',
 '1MLCLAT_mm',
 '23CAPPD_c',
 '2DDA7Pt_c_m',
 '2DHPt_c_m',
 '2HGOR_c',
 '2MBACt_c_e',
 '2MBALDt_c_e',
 '2MBALDt_c_m',
 '2MBTOHt_c_e',
 '2MBTOHt_c_m',
 '2OBUTt_c_m',
 '2OXOADPt_c_m',
 '2PHETOHt_c_e',
 '2PHETOHt_c_m',
 '3C3HMPt_c_e',
 '3C3HMPt_c_m',
 '3C4MOPt_c_m',
 '3DH5HPBMT_m',
 '3DH5HPBt_c_m',
 '3DSPHR_r',
 '3HACD200_rm',
 '3HACD220_rm',
 '3HACD240_rm',
 '3HACD260_rm',
 '3HAD40_m',
 '3HAD60_m',
 '3HAD80_m',
 '3HAO_c',
 '3HPH5MBDC_m',
 '3HXPHBH_c',
 '3HXPHBt_c_m',
 '3IPM3MT_c',
 '3MBALDt_c_e',
 '3MBALDt_c_m',
 '3MLCLAT_mm',
 '3MOBDC_c',
 '3MOPt_c_e',
 '3MOPt_c_m',
 '3OACE200_rm',
 '3OACE220_rm',
 '3OACE240_rm',
 '3OACE260_rm',
 '3OACR200_rm',
 '3OACR220_rm',
 '3OACR240_rm',
 '3OACR260_rm',
 '3OAR40_m',
 '3OAR60_m',
 '3OAR80_m',
 '3OAS60_m',
 '3OAS80_m',
 '4ABTORx_c',
 '4ABTORy_c',
 '4ABUTNt_c_m',
 '4ABUTt_c_m',
 '4ABUTtps_e',
 '4ABZt_c_e',
 '4ABZt_c_

In [20]:
mets

['MET-10fthf_c',
 'MET-10fthf_m',
 'MET-13BDglucan_c',
 'MET-13BDglucan_e',
 'MET-13BDglucan_en',
 'MET-13dampp_c',
 'MET-13dpg_c',
 'MET-13em3gacpail_r',
 'MET-14bsq_c',
 'MET-14dmlanost_c',
 'MET-14dmlanost_e',
 'MET-16BDglucan_en',
 'MET-1agp_c',
 'MET-1agp_gm',
 'MET-1agp_l',
 'MET-1agp_rm',
 'MET-1agp_vm',
 'MET-1agpc_en',
 'MET-1agpc_gen_e',
 'MET-1agpc_l',
 'MET-1agpc_mm',
 'MET-1agpc_rm',
 'MET-1agpe_en',
 'MET-1agpe_l',
 'MET-1agpe_rm',
 'MET-1agpi_en',
 'MET-1agps_en',
 'MET-1em2gacpail_r',
 'MET-1em3gacpail_r',
 'MET-1mag_c',
 'MET-1mag_gm',
 'MET-1mag_l',
 'MET-1mag_m',
 'MET-1mag_vm',
 'MET-1mlcl_mm',
 'MET-1mncam_c',
 'MET-1p3h5c_c',
 'MET-1p3h5c_m',
 'MET-1pyr5c_c',
 'MET-1pyr5c_m',
 'MET-23camp_c',
 'MET-23dhmb_m',
 'MET-23dhmp_m',
 'MET-23dpg_c',
 'MET-25aics_c',
 'MET-25dhtpp_c',
 'MET-25drapp_c',
 'MET-2agpi_rm',
 'MET-2ahbut_m',
 'MET-2ahhmd_m',
 'MET-2amsa_c',
 'MET-2aobut_c',
 'MET-2cpr5p_c',
 'MET-2dda7p_c',
 'MET-2dda7p_m',
 'MET-2dhp_c',
 'MET-2dhp_m',
 'MET-2d

In [13]:
df_all.head()

Unnamed: 0,id,type,coupling_type,coupling_species,reaction
RXN-EX_13BDglucan_e_FWD-SPONT,RXN-EX_13BDglucan_e_FWD-SPONT,metabolic,,,MET-13BDglucan_e -->
RXN-EX_13BDglucan_e_REV-SPONT,RXN-EX_13BDglucan_e_REV-SPONT,metabolic,,,-->MET-13BDglucan_e
RXN-EX_crn_e_FWD-SPONT,RXN-EX_crn_e_FWD-SPONT,metabolic,,,MET-crn_e -->
RXN-EX_crn_e_REV-SPONT,RXN-EX_crn_e_REV-SPONT,metabolic,,,-->MET-crn_e
RXN-EX_lac__D_e_FWD-SPONT,RXN-EX_lac__D_e_FWD-SPONT,metabolic,,,MET-lac__D_e -->


In [14]:
met_dict

{'MET-ficytC_m': -2.0,
 'MET-lac__L_c': -1.0,
 'MET-focytC_m': 2.0,
 'MET-h_c': 2.0,
 'MET-pyr_c': 1.0}