In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.append('../../pycore/')
from simulate import RBA_result
from utils import extract_details_from_rxnid

import matplotlib.pyplot as plt
import matplotlib.patches as mp
import seaborn as sns
import matplotlib.lines as mlines
%matplotlib inline

In [9]:
df_prot = pd.read_excel('../../build_GAMS_model/input/PROTEIN_stoich_curation.xlsx')
df_prot.index = df_prot.id.to_list()

df_data_idx = pd.read_excel('./expmt_exflux_data.xlsx')
df_data_idx.index = df_data_idx.data_id.to_list()

In [3]:
df_path = pd.read_excel('./manual_pathways_assignments.xlsx')

In [13]:
sim_id = ('batch', 'vglc_13_0')
exp_id = 'Elsemman2022_batchGlc'

# Conversion g/gDW -> g/gProtein. Conversion factor ptot = gProtein/gDW
mu = df_data_idx.loc[exp_id, 'GR_expmt (1/h)']
ptot = (36.94 + 34.22*mu) / 100

# Load simulated results
res = RBA_result(biom_id=None)
res.load_from_json('../output/' + sim_id[0] + '/results_capacitycorrected/' + \
                   sim_id[1] + '/RBA_result.json')

# Load experimental observations
df_data = pd.read_excel('../../parameterization/datasets/' + exp_id + \
                        '/' + exp_id + '.xlsx')
df_data.index = df_data.id.to_list()

# Write
for i in df_path.index:
    p = df_path.protein[i]
    if p in res.proteome_allocation.keys():
        df_path.loc[i, 'simulated (g/gProtein)'] = res.proteome_allocation[p]
    else:
        df_path.loc[i, 'simulated (g/gProtein)'] = 0
        
    if p in df_data.index:
        val = df_data.loc[p, 'conc (g/gDW)']
        if pd.isnull(val) == False:
            df_path.loc[i, 'experimental (g/gProtein)'] = val * ptot

In [6]:
res

<simulate.RBA_result at 0x7efebf7c82d0>

In [14]:
df_path

Unnamed: 0,pathway,rxn_id,protein,simulated (g/gProtein),experimental (g/gProtein)
0,Glycolysis/PPP,HEX1_c,YCL040W,0.000126,
1,Glycolysis/PPP,HEX1_c,YFR053C,0.000000,6.964399e-07
2,Glycolysis/PPP,HEX1_c,YGL253W,0.000093,6.569043e-04
3,Glycolysis/PPP,HEX1_c,YLR446W,0.000000,
4,Glycolysis/PPP,PGI_c,YBR196C,0.000665,1.243278e-03
...,...,...,...,...,...
67,TCA/Energy,SUCDq6_m,YKL141W,0.000007,1.025103e-05
68,TCA/Energy,SUCDq6_m,YKL148C,0.000000,2.129591e-05
69,TCA/Energy,SUCDq6_m,YLL041C,0.000010,1.025103e-05
70,TCA/Energy,FUM_m,YPL262W_m,0.000000,8.058651e-05


<br><br>
### View experimental data

In [20]:
df_data_idx = pd.read_excel('./expmt_exflux_data.xlsx')
df_path_expmt_raw = pd.read_excel('./manual_pathways_assignments.xlsx')
expmt_data = dict()

for i in df_data_idx.index:
    # Load proteomics data
    i_exp = df_data_idx.data_id[i]
    df_data = pd.read_excel('../../parameterization/datasets/' + i_exp + \
                            '/' + i_exp + '.xlsx')
    df_data.index = df_data.id.to_list()
    
    # Conversion g/gDW -> g/gProtein. Conversion factor ptot = gProtein/gDW
    mu = df_data_idx.loc[i, 'GR_expmt (1/h)']
    ptot = (36.94 + 34.22*mu) / 100
    
    # Write to working dataframe
    df_path_expmt = df_path_expmt_raw.copy(deep=True)
    for j in df_path_expmt.index:
        p = df_path_expmt.protein[j]
        if p in df_data.index:
            gpgDW = df_data.loc[p, 'conc (g/gDW)']
            df_path_expmt.loc[j, 'conc (g/gProtein)'] = gpgDW / ptot
            
    expmt_data[i_exp] = df_path_expmt

In [21]:
rxn = 'PGI_c'
for exp_id,df_data in expmt_data.items():
    print(exp_id)
    df = df_data[df_data.rxn_id == rxn]
    for i in df.index:
        print(df.protein[i], df.loc[i, 'conc (g/gProtein)'])

Lahtvee2017_chemoRef
YBR196C 0.004360609503616819
Yu2020_chemoClim
YBR196C 0.018360495028848667
Yu2021_chemoClimNH4_010
YBR196C 0.011061807705796916
Elsemman2022_chemoGlc020
YBR196C 0.003273574017150395
Elsemman2022_chemoGlc023
YBR196C 0.0033291077018682137
Elsemman2022_chemoGlc027
YBR196C 0.003156980210234973
Elsemman2022_chemoGlc030
YBR196C 0.003137952508158899
Bjorkeroth2020_batchYNB
YBR196C 0.00782962439779235
Elsemman2022_batchGlc
YBR196C 0.0050463947158482965
Elsemman2022_batchGlcCyHx4
YBR196C 0.005274717271152006
Bjorkeroth2020_batchYNBAA
YBR196C 0.008270512771041029
Elsemman2022_batchGlcCyHx1
YBR196C 0.0038776394846297656
Elsemman2022_batchGlcCyHx2
YBR196C 0.004124658269102581
Elsemman2022_batchGlcCyHx3
YBR196C 0.004759242198952417


In [16]:
expmt_data

{'Lahtvee2017_chemoRef':            pathway    rxn_id    protein  conc (g/gProtein)
 0   Glycolysis/PPP    HEX1_c    YCL040W           0.002983
 1   Glycolysis/PPP    HEX1_c    YFR053C           0.001884
 2   Glycolysis/PPP    HEX1_c    YGL253W           0.001362
 3   Glycolysis/PPP    HEX1_c    YLR446W                NaN
 4   Glycolysis/PPP     PGI_c    YBR196C           0.004361
 ..             ...       ...        ...                ...
 67      TCA/Energy  SUCDq6_m    YKL141W           0.000034
 68      TCA/Energy  SUCDq6_m    YKL148C           0.000434
 69      TCA/Energy  SUCDq6_m    YLL041C           0.000890
 70      TCA/Energy     FUM_m  YPL262W_m           0.000584
 71      TCA/Energy     MDH_m    YKL085W           0.004747
 
 [72 rows x 4 columns],
 'Yu2020_chemoClim':            pathway    rxn_id    protein  conc (g/gProtein)
 0   Glycolysis/PPP    HEX1_c    YCL040W           0.000994
 1   Glycolysis/PPP    HEX1_c    YFR053C           0.001976
 2   Glycolysis/PPP    HEX1_c 