In [54]:
import numpy as np

import seaborn as sns
import pandas as pd
import os
import pprint
import ast
import re
pp = pprint.PrettyPrinter(depth=6)

os.chdir(os.path.expanduser('~/vivarium-ecoli'))

import matplotlib.pyplot as plt
import dill
import requests
import xmltodict

In [2]:
complexation_rxn_df = pd.read_csv('reconstruction/ecoli/flat/complexation_reactions.tsv', sep='\t')
stoich_series = complexation_rxn_df.loc[:,['id', 'stoichiometry']].dropna().reset_index(drop=True)

stoich_list = []
for i, stoich in enumerate(stoich_series.loc[:, 'stoichiometry']):

    stoich = stoich.replace('null', '-1')

    stoich = ast.literal_eval(stoich)
    for k,v in stoich.items():
        stoich_list.append([k, v, stoich_series.loc[i, 'id'], None, None])

In [3]:
pre_complex_df = pd.DataFrame(stoich_list, columns=['complex', 'stoichiometry', 'reaction', 'enzyme-reaction', 'cofactors'])
pre_complex_df = pre_complex_df[pre_complex_df['stoichiometry'] > 0].reset_index(drop=True)
pre_complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors
0,1-PFK,1,1-PFK_RXN,,
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,,
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,,
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,,
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,,
...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,
1097,CPLX0-8253,1,CPLX0-8253_RXN,,
1098,SRP-CPLX,1,SRP-CPLX_RXN,,
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,


## Connect to EcoCyc to go from Complex -> Cofactor + Metal sites

In [4]:
s = requests.Session() # create session
# Post login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'cellulararchitect@protonmail.com', 'password':'Cellman0451'})

<Response [200]>

In [5]:
for i in range(len(pre_complex_df.index)): #

    complex = pre_complex_df.loc[i, 'complex']

    req_str = f'https://websvc.biocyc.org/getxml?id=ECOLI:{complex}&detail=low'

    r = s.get(req_str)
    if r.status_code != 200:
        print(complex, r.status_code)
        continue

    o = xmltodict.parse(r.content)['ptools-xml']['Protein']

    if 'catalyzes' not in o:
        continue
    else:
        o = o['catalyzes']['Enzymatic-Reaction']

    if type(o) is dict:
        o = [o]

    for enzrxn in o:
        enz_id = enzrxn['@frameid']

        enz_req_str = f'https://websvc.biocyc.org/getxml?id=ECOLI:{enz_id}&detail=high'

        re = s.get(enz_req_str)
        oe = xmltodict.parse(re.content)['ptools-xml']['Enzymatic-Reaction']

        pre_complex_df.at[i, 'enzyme-reaction'] = enz_id

        if "cofactor" in oe:
            # print(enz_id)
            oe = oe['cofactor']

            if type(oe) is dict:
                oe = [oe]

            cofactor_list = []
            for cofactor in oe:
                cof = cofactor['Compound']['@frameid']
                cofactor_list.append(cof)
                # print('\t' + cof)

            pre_complex_df.at[i, 'cofactors'] = cofactor_list



CPLX0-2423 404
CPLX0-3976 404
RECFOR-CPLX 404
CPLX0-7796APO 404


In [10]:
pre_complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2]
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]"
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]"
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7]
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2]
...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,
1097,CPLX0-8253,1,CPLX0-8253_RXN,,
1098,SRP-CPLX,1,SRP-CPLX_RXN,,
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,


In [7]:
pre_complex_df.to_csv('cofactors.csv', index=False)

# Annotate table with counts

In [169]:
complex_df = pd.read_csv('notebooks/fbagd/cofactors.csv', sep=',').fillna('None')
complex_df.cofactors = complex_df.cofactors.apply(ast.literal_eval)
complex_df['counts'] = 0

# replace None with empty list
complex_df['cofactors'] = complex_df['cofactors'].apply(lambda x: [] if x is None else x)

complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2],0
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]",0
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]",0
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7],0
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2],0
...,...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,[],0
1097,CPLX0-8253,1,CPLX0-8253_RXN,,[],0
1098,SRP-CPLX,1,SRP-CPLX_RXN,,[],0
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,[],0


In [170]:
time = '50'
date = '2023-06-13'
experiment = 'fba-redux'
entry = f'{experiment}_{time}_{date}'
folder = f'out/fbagd/{entry}/'

In [171]:
output = np.load(folder + 'output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output['agents']['0']
fba = output['listeners']['fba_results']
mass = output['listeners']['mass']


In [172]:
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

In [173]:
metabolism = agent['ecoli-metabolism-redux']
stoichiometry = metabolism.stoichiometry
bulk_ids = metabolism.bulk_ids

In [174]:
bulk = pd.DataFrame(output['bulk'], columns=bulk_ids)

In [175]:
complex_df

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2],0
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]",0
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]",0
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7],0
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2],0
...,...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,[],0
1097,CPLX0-8253,1,CPLX0-8253_RXN,,[],0
1098,SRP-CPLX,1,SRP-CPLX_RXN,,[],0
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,[],0


In [176]:
for complex_name in complex_df['complex']:
    bulk_name = complex_name + '[c]'

    if bulk_name in bulk_ids:
        complex_df.loc[complex_df['complex'] == complex_name, 'counts'] = bulk.at[24, bulk_name]

    elif bulk_name not in bulk_ids:
        for cpd in list(bulk_ids):
            if complex_name in cpd and len(cpd) < len(complex_name) + 4:
                bulk_name = cpd
                complex_df.loc[complex_df['complex'] == complex_name, 'counts'] = bulk.at[24, bulk_name]


In [177]:
complex_df.sort_values(by='counts', ascending=False)

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
709,CPLX0-8223,1,CPLX0-8223_RXN,,[],36915
390,CPLX0-7534,1,CPLX0-7534_RXN,ENZRXN0-8522,[],19181
864,GAPDH-A-CPLX,1,GAPDH-A-CPLX_RXN,GAPDH-A-ENZRXN,[],13436
487,CPLX0-7747,1,CPLX0-7747_RXN,ENZRXN0-252,[],12667
917,ISOCITHASE-CPLX,1,ISOCITHASE-CPLX_RXN,ISOCITDEH-ENZRXN,[MN+2],12315
...,...,...,...,...,...,...
123,ATOA-CPLX,1,ATOA-CPLX_RXN,,[],0
124,ATOD-CPLX,1,ATOD-CPLX_RXN,,[],0
126,ATPASE-1-CPLX,1,ATPASE-1-CPLX_RXN,TRANS-ENZRXN-2,[],0
132,B2-CPLX,1,B2-CPLX_RXN,,[],0


In [179]:
cofactor_df = complex_df[['cofactors', 'counts']].explode('cofactors').groupby('cofactors').sum().sort_values(by='counts', ascending=False)
cofactor_df

Unnamed: 0_level_0,counts
cofactors,Unnamed: 1_level_1
MG+2,55738
MN+2,27211
PYRIDOXAL_PHOSPHATE,24413
ZN+2,12807
23-DIPHOSPHOGLYCERATE,10058
FAD,8954
K+,5157
CPD-7,4556
FE+2,3256
THIAMINE-PYROPHOSPHATE,1868


In [180]:
# change name of index value FeS-Centers to CPD-7

cofactor_df['molecular_composition'] = 0
cofactor_df['molecular_composition'] = cofactor_df['molecular_composition'].astype(object)

cofactor_df

Unnamed: 0_level_0,counts,molecular_composition
cofactors,Unnamed: 1_level_1,Unnamed: 2_level_1
MG+2,55738,0
MN+2,27211,0
PYRIDOXAL_PHOSPHATE,24413,0
ZN+2,12807,0
23-DIPHOSPHOGLYCERATE,10058,0
FAD,8954,0
K+,5157,0
CPD-7,4556,0
FE+2,3256,0
THIAMINE-PYROPHOSPHATE,1868,0


In [181]:
for cofactor_name in cofactor_df.index:
    mol = cofactor_name
    mol = mol.replace('+', '%2b')
    mol_str = f'https://websvc.biocyc.org/getxml?id=ECOLI:{mol}&detail=low'

    r = s.get(mol_str)
    o = xmltodict.parse(r.content)['ptools-xml']['Compound']

    if 'cml' not in o:
        print(f'No cml for {cofactor_name}')
        cofactor_df.at[cofactor_name, 'molecular_composition'] = {}
        continue
    else:
        atom_array = o['cml']['molecule']['atomArray']['atom']
    #pp.pprint(o)

    if type(atom_array) is dict:
        atom_array = [atom_array]

    element_dict = {}
    # go through entries of atom array and add 1 to key of element_dict
    for atom in atom_array:
        element = atom['@elementType']
        if element in element_dict:
            element_dict[element] += 1
        else:
            element_dict[element] = 1

    cofactor_df.at[cofactor_name, 'molecular_composition'] = element_dict

No cml for FeS-Centers
No cml for CPD-17649


In [182]:
for cofactor_name in ['FeS-Centers', 'CPD-17649']:
    cofactor_df.at[cofactor_name, 'molecular_composition'] = {'FE': 4, 'S': 4}

In [183]:
element_matrix = cofactor_df['molecular_composition'].apply(pd.Series).fillna(0)
element_matrix

Unnamed: 0_level_0,MG,MN,C,N,O,P,ZN,K,FE,S,NI,CU,R,CO,CA,H,MO,Proteins
cofactors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
MG+2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MN+2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PYRIDOXAL_PHOSPHATE,0.0,0.0,8.0,1.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZN+2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23-DIPHOSPHOGLYCERATE,0.0,0.0,3.0,0.0,10.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FAD,0.0,0.0,27.0,9.0,15.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
K+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CPD-7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FE+2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
THIAMINE-PYROPHOSPHATE,0.0,0.0,12.0,4.0,7.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [184]:
metals = ['MG', 'MN', 'ZN', 'FE', 'NI', 'CU', 'CO', 'CA', 'MO']
metal_distribution = pd.DataFrame(index=element_matrix.index, columns=metals).fillna(0)
for element in metals:
    metal_distribution.loc[:, element] = cofactor_df['counts'] * element_matrix.loc[:, element]


In [185]:
metal_distribution.sort_values(by='FE', ascending=False)['FE']

cofactors
CPD-7                     18224.0
FE+2                       3256.0
CPD-6                      1270.0
Heme-b                     1025.0
HEME_O                      814.0
3FE-4S                      636.0
FeS-Centers                 304.0
FE+3                        161.0
PROTOHEME                   139.0
SIROHEME                     98.0
CPD-23429                    46.0
HEME_D                       13.0
HEME_C                       13.0
NADPH                         0.0
CPD-17649                     0.0
AMMONIUM                      0.0
CPD-24862                     0.0
ADENOSYLCOBALAMIN             0.0
CPD-15873                     0.0
CPD-18260                     0.0
TOPAQUINONE                   0.0
CPD-8123                      0.0
FMNH2                         0.0
CPD0-1882                     0.0
CPD0-2654                     0.0
MG+2                          0.0
LYS                           0.0
CA+2                          0.0
PYRIDOXAL_PHOSPHATE           0.0
ZN+2

# Chain iron content back to enzymes

In [186]:
complex_df.loc[:]

Unnamed: 0,complex,stoichiometry,reaction,enzyme-reaction,cofactors,counts
0,1-PFK,1,1-PFK_RXN,1PFRUCTPHOSPHN-ENZRXN,[MG+2],31
1,2OXOGLUTARATEDEH-CPLX,1,2OXOGLUTARATEDEH-CPLX_RXN,2OXOGLUTARATEDEH-ENZRXN,"[LIPOIC-ACID, THIAMINE-PYROPHOSPHATE, FAD, MG+2]",86
2,3-ISOPROPYLMALDEHYDROG-CPLX,1,3-ISOPROPYLMALDEHYDROG-CPLX_RXN,3-ISOPROPYLMALDEHYDROG-ENZRXN,"[MG+2, MN+2]",1065
3,3-ISOPROPYLMALISOM-CPLX,1,3-ISOPROPYLMALISOM-CPLX_RXN,3-ISOPROPYLMALISOM-ENZRXN,[CPD-7],2631
4,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX,1,3-METHYL-2-OXOBUT-OHCH3XFER-CPLX_RXN,3-METHYL-2-OXOBUT-OHCH3XFER-ENZRXN,[MG+2],174
...,...,...,...,...,...,...
1096,CPLX0-8053,1,CPLX0-8053_RXN,,[],208
1097,CPLX0-8253,1,CPLX0-8253_RXN,,[],0
1098,SRP-CPLX,1,SRP-CPLX_RXN,,[],0
1099,CPLX0-7796APO,1,CPLX0-7796APO_RXN,,[],50


In [223]:
# filter cofactors in element matrix that have FE coefficient > 0
fe_cofactors = list(element_matrix[element_matrix['FE'] > 0].index)

# filter complex_df to only include complexes where one of the cofactors is in fe_cofactors
exploded_complexes = complex_df.explode('cofactors').dropna().reset_index(drop=True)
fe_complexes = exploded_complexes.loc[exploded_complexes.cofactors.isin(fe_cofactors), ['complex', 'cofactors', 'counts']]
fe_complexes

# add column that multiplies counts by number of FE atoms in cofactor based on element matrix
fe_complexes['fe_counts'] = 0
for cofactor in fe_cofactors:
    fe_complexes.loc[fe_complexes['cofactors'] == cofactor, 'fe_counts'] = fe_complexes.loc[fe_complexes['cofactors'] == cofactor, 'counts'] * element_matrix.loc[cofactor, 'FE']

# add a column that divides the fe_counts by the total sum of fe_counts
fe_complexes['fe_counts_norm'] = 100 * fe_complexes['fe_counts'] / fe_complexes['fe_counts'].sum()

fe_complexes

Unnamed: 0,complex,cofactors,counts,fe_counts,fe_counts_norm
7,3-ISOPROPYLMALISOM-CPLX,CPD-7,2631,10524.0,40.478480
31,ADHE-CPLX,FE+2,93,93.0,0.357706
43,ANGLYC3PDEHYDROG-CPLX,FeS-Centers,69,276.0,1.061579
48,APP-UBIOX-CPLX,HEME_D,13,13.0,0.050002
49,APP-UBIOX-CPLX,Heme-b,13,13.0,0.050002
...,...,...,...,...,...
415,SULFITE-REDUCT-CPLX,CPD-7,93,372.0,1.430824
418,SULFITE-REDUCT-CPLX,SIROHEME,93,93.0,0.357706
419,SUPEROX-DISMUTFE-CPLX,FE+3,161,161.0,0.619255
424,THREODEHYD-CPLX,FE+2,33,33.0,0.126928


In [219]:
element_matrix

Unnamed: 0_level_0,MG,MN,C,N,O,P,ZN,K,FE,S,NI,CU,R,CO,CA,H,MO,Proteins
cofactors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
MG+2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MN+2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PYRIDOXAL_PHOSPHATE,0.0,0.0,8.0,1.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZN+2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23-DIPHOSPHOGLYCERATE,0.0,0.0,3.0,0.0,10.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FAD,0.0,0.0,27.0,9.0,15.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
K+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CPD-7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FE+2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
THIAMINE-PYROPHOSPHATE,0.0,0.0,12.0,4.0,7.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
