In [54]:
import numpy as np

import seaborn as sns
import pandas as pd
import os
import pprint
import ast
import re
pp = pprint.PrettyPrinter(depth=6)

os.chdir(os.path.expanduser('~/vivarium-ecoli'))

import matplotlib.pyplot as plt
import dill
import requests
import xmltodict

In [2]:
complexation_rxn_df = pd.read_csv('reconstruction/ecoli/flat/complexation_reactions.tsv', sep='\t')
stoich_series = complexation_rxn_df.loc[:,['id', 'stoichiometry']].dropna().reset_index(drop=True)

stoich_list = []
for i, stoich in enumerate(stoich_series.loc[:, 'stoichiometry']):

    stoich = stoich.replace('null', '-1')

    stoich = ast.literal_eval(stoich)
    for k,v in stoich.items():
        stoich_list.append([k, v, stoich_series.loc[i, 'id'], None, None])

In [3]:
pre_complex_df = pd.DataFrame(stoich_list, columns=['complex', 'stoichiometry', 'reaction', 'enzyme-reaction', 'cofactors'])
pre_complex_df = pre_complex_df[pre_complex_df['stoichiometry'] > 0].reset_index(drop=True)
pre_complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors
0,1-PFK,1,1-PFK_RXN,,
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,,
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,,
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,,
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,,
...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,
1097,CPLX0-8253,1,CPLX0-8253_RXN,,
1098,SRP-CPLX,1,SRP-CPLX_RXN,,
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,


## Connect to EcoCyc to go from Complex -> Cofactor + Metal sites

In [4]:
s = requests.Session() # create session
# Post login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'cellulararchitect@protonmail.com', 'password':'Cellman0451'})

<Response [200]>

In [5]:
for i in range(len(pre_complex_df.index)): #

    complex = pre_complex_df.loc[i, 'complex']

    req_str = f'https://websvc.biocyc.org/getxml?id=ECOLI:{complex}&detail=low'

    r = s.get(req_str)
    if r.status_code != 200:
        print(complex, r.status_code)
        continue

    o = xmltodict.parse(r.content)['ptools-xml']['Protein']

    if 'catalyzes' not in o:
        continue
    else:
        o = o['catalyzes']['Enzymatic-Reaction']

    if type(o) is dict:
        o = [o]

    for enzrxn in o:
        enz_id = enzrxn['@frameid']

        enz_req_str = f'https://websvc.biocyc.org/getxml?id=ECOLI:{enz_id}&detail=high'

        re = s.get(enz_req_str)
        oe = xmltodict.parse(re.content)['ptools-xml']['Enzymatic-Reaction']

        pre_complex_df.at[i, 'enzyme-reaction'] = enz_id

        if "cofactor" in oe:
            # print(enz_id)
            oe = oe['cofactor']

            if type(oe) is dict:
                oe = [oe]

            cofactor_list = []
            for cofactor in oe:
                cof = cofactor['Compound']['@frameid']
                cofactor_list.append(cof)
                # print('\t' + cof)

            pre_complex_df.at[i, 'cofactors'] = cofactor_list



CPLX0-2423 404
CPLX0-3976 404
RECFOR-CPLX 404
CPLX0-7796APO 404


In [10]:
pre_complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2]
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]"
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]"
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7]
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2]
...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,
1097,CPLX0-8253,1,CPLX0-8253_RXN,,
1098,SRP-CPLX,1,SRP-CPLX_RXN,,
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,


In [7]:
pre_complex_df.to_csv('cofactors.csv', index=False)

# Annotate table with counts

In [24]:
complex_df = pd.read_csv('notebooks/fbagd/cofactors.csv', sep=',').fillna('None')
complex_df.cofactors = complex_df.cofactors.apply(ast.literal_eval)
complex_df['counts'] = 0

complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2],0
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]",0
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]",0
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7],0
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2],0
...,...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,,0
1097,CPLX0-8253,1,CPLX0-8253_RXN,,,0
1098,SRP-CPLX,1,SRP-CPLX_RXN,,,0
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,,0


In [25]:
time = '50'
date = '2023-06-13'
experiment = 'fba-redux'
entry = f'{experiment}_{time}_{date}'
folder = f'out/fbagd/{entry}/'

In [26]:
output = np.load(folder + 'output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output['agents']['0']
fba = output['listeners']['fba_results']
mass = output['listeners']['mass']


In [27]:
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

In [28]:
metabolism = agent['ecoli-metabolism-redux']
stoichiometry = metabolism.stoichiometry
bulk_ids = metabolism.bulk_ids

In [29]:
bulk = pd.DataFrame(output['bulk'], columns=bulk_ids)

In [31]:
complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2],0
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]",0
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]",0
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7],0
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2],0
...,...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,,0
1097,CPLX0-8253,1,CPLX0-8253_RXN,,,0
1098,SRP-CPLX,1,SRP-CPLX_RXN,,,0
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,,0


In [32]:
for complex_name in complex_df['complex']:
    bulk_name = complex_name + '[c]'

    if bulk_name in bulk_ids:
        complex_df.loc[complex_df['complex'] == complex_name, 'counts'] = bulk.at[24, bulk_name]

    elif bulk_name not in bulk_ids:
        for cpd in list(bulk_ids):
            if complex_name in cpd and len(cpd) < len(complex_name) + 4:
                bulk_name = cpd
                complex_df.loc[complex_df['complex'] == complex_name, 'counts'] = bulk.at[24, bulk_name]


In [33]:
complex_df.sort_values(by='counts', ascending=False)

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
709,CPLX0-8223,1,CPLX0-8223_RXN,,,36915
390,CPLX0-7534,1,CPLX0-7534_RXN,ENZRXN0-8522,,19181
864,GAPDH-A-CPLX,1,GAPDH-A-CPLX_RXN,GAPDH-A-ENZRXN,,13436
487,CPLX0-7747,1,CPLX0-7747_RXN,ENZRXN0-252,,12667
917,ISOCITHASE-CPLX,1,ISOCITHASE-CPLX_RXN,ISOCITDEH-ENZRXN,[MN+2],12315
...,...,...,...,...,...,...
123,ATOA-CPLX,1,ATOA-CPLX_RXN,,,0
124,ATOD-CPLX,1,ATOD-CPLX_RXN,,,0
126,ATPASE-1-CPLX,1,ATPASE-1-CPLX_RXN,TRANS-ENZRXN-2,,0
132,B2-CPLX,1,B2-CPLX_RXN,,,0


In [94]:
cofactor_df = complex_df[['cofactors', 'counts']].explode('cofactors').groupby('cofactors').sum().sort_values(by='counts', ascending=False)
cofactor_df

Unnamed: 0_level_0,counts
cofactors,Unnamed: 1_level_1
MG+2,55738
MN+2,27211
PYRIDOXAL_PHOSPHATE,24413
ZN+2,12807
23-DIPHOSPHOGLYCERATE,10058
FAD,8954
K+,5157
CPD-7,4556
FE+2,3256
THIAMINE-PYROPHOSPHATE,1868


In [95]:
# change name of index value FeS-Centers to CPD-7

cofactor_df['molecular_composition'] = 0
cofactor_df['molecular_composition'] = cofactor_df['molecular_composition'].astype(object)

cofactor_df

Unnamed: 0_level_0,counts,molecular_composition
cofactors,Unnamed: 1_level_1,Unnamed: 2_level_1
MG+2,55738,0
MN+2,27211,0
PYRIDOXAL_PHOSPHATE,24413,0
ZN+2,12807,0
23-DIPHOSPHOGLYCERATE,10058,0
FAD,8954,0
K+,5157,0
CPD-7,4556,0
FE+2,3256,0
THIAMINE-PYROPHOSPHATE,1868,0


In [96]:
for cofactor_name in cofactor_df.index:
    mol = cofactor_name
    print(cofactor_name)
    mol = mol.replace('+', '%2b')
    mol_str = f'https://websvc.biocyc.org/getxml?id=ECOLI:{mol}&detail=low'

    r = s.get(mol_str)
    o = xmltodict.parse(r.content)['ptools-xml']['Compound']['cml']['molecule']['formula']['@concise']
    #pp.pprint(o)

    re_output = re.findall(r'[A-Z]+ \d+', o)
    # split on space, convert number to int then make dict
    re_output = {i.split(' ')[0]: int(i.split(' ')[1]) for i in re_output}

    cofactor_df.at[cofactor_name, 'molecular_composition'] = re_output

MG+2


ValueError: Incompatible indexer with Series