In [49]:
import pandas as pd
import itertools, os

In [14]:
df_phys = pd.read_excel('./raw_data_files/Hackett_Rabinowitz_2016.xls',
                        sheet_name='Chemostat Information')
df_phys.index = df_phys.ChemostatID.to_list()

df_exrxn = pd.read_excel('./raw_data_files/Hackett_Rabinowitz_2016.xls',
                         sheet_name='Media Exchange')

In [37]:
#### ID conversion of raw data format to model format
# Note: orotate exchange flux is skipped because no orotate exchange reaction in the model
# and orotate secretion flux is very small
conv = {'D-glucose [extracellular]': 'EX_glc__D_e',
        '(R)-lactate [extracellular]': 'EX_lac__D_e',
        'acetaldehyde [extracellular]': 'EX_acald_e',
        'acetate [extracellular]': 'EX_ac_e',
        'ethanol [extracellular]': 'EX_etoh_e',
        'glycerol [extracellular]': 'EX_glyc_e',
        'succinate [extracellular]': 'EX_succ_e'}

In [53]:
fname_prefix = 'Hackett2016_Rabinowitz'
conds = ['C', 'N', 'P', 'U', 'L']
dils = ['0.05', '0.11', '0.16', '0.22', '0.30']

for cond,dil in itertools.product(conds,dils):
    case = cond+dil
    fpath = './' + fname_prefix + '_' + case + '/'
    if os.path.isdir(fpath) == False:
        os.makedirs(fpath)
    
    phenotype_text = []
    mu = df_phys.DR_Actual[case]
    f_gDW = df_phys.Volume_Fraction_Mean[case]
    D_gDW = df_phys.loc[case, 'gDCW/mL']

    df = df_exrxn[df_exrxn.Condition == case]
    df.index = df.Metabolite.to_list()
    for rawkey,newkey in conv.items():
        if rawkey in df.Metabolite:
            dC = df.loc[rawkey, 'Change_(M)']
            if df.Type[rawkey] == 'uptake':
                vtype = 'REV'; vzerotype = 'FWD';
            elif df.Type[rawkey] == 'excretion':
                vtype = 'FWD'; vzerotype = 'REV';
            else:
                print('Unrecognized exchange flux type')

            rid = 'RXN-' + newkey + '_' + vtype + '-SPONT'
            rid_zero = 'RXN-' + newkey + '_' + vzerotype + '-SPONT'
            v = dC * 1000 * mu / f_gDW / D_gDW
            if vtype == 'REV':
                phenotype_text.append("v.fx('" + rid + "') = " + str(round(v,4)) + ";")
            else:
                phenotype_text.append("v.lo('" + rid + "') = " + str(round(v,4)) + ";")

            phenotype_text.append("v.fx('" + rid_zero + "') = 0;")
            phenotype_text.append('')
    
    if cond == 'U':
        phenotype_text.append("v.up('RXN-EX_ura_e_REV-SPONT') = 1e4;")
        phenotype_text.append("v.fx('RXN-EX_ura_e_FWD-SPONT') = 0;")
    elif cond == 'L':
        phenotype_text.append("v.up('RXN-EX_leu__L_e_REV-SPONT') = 1e4;")
        phenotype_text.append("v.fx('RXN-EX_leu__L_e_FWD-SPONT') = 0;")
    
    with open(fpath + 'phenotype_frompy.txt', 'w') as f:
        f.write('\n'.join(phenotype_text))
    
    break

In [59]:
df_prot = pd.read_excel('../scRBA_build_GAMS_model/input/PROTEIN_stoich_curation.xlsx')
df_prot.index = df_prot.id.to_list()

df_pdata = pd.read_excel('./raw_data_files/Hackett_Rabinowitz_2016.xls',
                         sheet_name='Protein log2 RA')

In [70]:
case = 'C0.05'
pdata_unlog = pd.core.series.Series(dtype='float64')
for i in df_pdata.index:
    if '/' not in df_pdata.Gene[i]:
        pid = df_pdata.Gene[i]
        pdata_unlog[pid] = 2 ** df_pdata.loc[i,case]

In [71]:
pdata_unlog

Q0250      1.521374
YAL003W    0.728111
YAL005C    0.853462
YAL012W    0.680597
YAL016W    1.568022
             ...   
YPR181C    1.316988
YPR183W    1.213675
YPR184W    1.478461
YPR189W    0.604341
YPR191W    1.549328
Length: 1154, dtype: float64

In [58]:
df_prot.head()

Unnamed: 0,id,gene_src,name,uniprot,subloc_assigned,cofactor_stoich,cofactor_comments,MW (g/mmol),sequence,status
Q0045,Q0045,Q0045,COX1,P00401,mm,"hemegen_c:2,cu2_c:1",Binds 2 heme A groups non-covalently per subun...,59.59198,MVQRWLYSTNAKDIAVLYFMLAIFSGMAGTAMSLIIRLELAAPGSQ...,cofAsgnManual
Q0080,Q0080,Q0080,ATP8,P00856,mm,,,5.36422,MPQLVPFYFMNQLTYGFLLMITLLILFSQFFLPMILRLYVSRLFISKL*,
Q0085,Q0085,Q0085,ATP6,P00854,mm,,,28.12549,MFNLLNTYITSPLDQFEIRTLFGLQSSFIDLSCLNLTTFSLYTIIV...,
Q0105,Q0105,Q0105,COB,P00163,mm,hemegen_c:2,Binds 2 heme b groups non-covalently per subunit.,43.86371,MAFRKSNVYLSLVNSYIIDSPQPSSINYWWNMGSLLGLCLVIQIVT...,cofAsgnManual
Q0130,Q0130,Q0130,OLI1,P61829,mm,,,8.27731,MQLVLAAKYIGAGISTIGLLGAGIGIAIVFAALINGVSRNPSIKDT...,


In [60]:
df_pdata.head()

Unnamed: 0,Gene,P0.05,P0.11,P0.16,P0.22,P0.30,C0.05,C0.11,C0.16,C0.22,...,L0.05,L0.11,L0.16,L0.22,L0.30,U0.05,U0.11,U0.16,U0.22,U0.30
0,Q0250,-0.196066,-0.224162,-0.525505,-0.882266,-1.690752,0.605374,0.676004,0.608752,0.731384,...,-2.182514,-2.294294,-2.799327,-2.660288,-1.84945,-0.34524,-1.447588,-1.889145,-1.36711,-1.4094
1,YAL003W,0.089168,0.125745,-0.055566,-0.01282,-0.040958,-0.45777,-0.523911,-0.495589,-0.463633,...,-0.674725,-0.677956,-0.682031,-0.387444,-0.293247,-0.223617,-0.739912,-0.762628,-0.42478,-0.230735
2,YAL005C,-0.109645,-0.474417,-0.691404,-0.893698,-1.153349,-0.228601,-0.592487,-0.519658,-0.471793,...,-1.006625,-1.262954,-1.423012,-1.340277,-1.077013,0.172234,-0.559264,-1.000085,-1.187593,-1.059307
3,YAL012W,-0.026941,0.15262,0.266509,0.32436,0.366615,-0.555126,-0.038603,0.136587,0.108889,...,0.504863,0.666391,0.752442,0.808444,0.750038,0.91537,0.519255,0.832379,0.861355,0.865522
4,YAL016W,0.14031,0.039977,0.142683,0.137657,-0.049959,0.648946,0.506366,0.30728,0.274115,...,0.049987,-0.181987,-0.151273,-0.239334,-0.048145,0.214667,0.424744,0.385827,0.189725,0.346792
