In [1]:
import polars as pl
import numpy as np
import os
import json
from scipy.sparse import csr_matrix
os.chdir(os.path.expanduser('~/vivarium-ecoli'))


In [2]:
time = '1300'
date = '2024-04-23'
experiment = 'validation_experiment'
entry = f'{experiment}_{time}_{date}'
folder = f'out/cofactors/{entry}/'

In [3]:
output_all = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output_all['agents']['0']
fba = output['listeners']['fba_results']
mass = output['listeners']['mass']
bulk = pl.DataFrame(output['bulk'])

fluxes = np.array(fba['estimated_fluxes'][1:])
exchanges = fba['estimated_exchange_dmdt']

ans = output['listeners']['unique_molecule_counts']['active_ribosome']

In [4]:
# So say I want to find the names of the proteins themselves, these are functions to implement that do so: 
initial_state = json.load(open('data/wcecoli_t0.json'))

bulk_ids = [item[0] for item in initial_state['bulk']]

bulk.columns = bulk_ids

In [8]:
# Check what the actual outputs are:
bulk.columns[0:10]


['--TRANS-ACENAPHTHENE-12-DIOL[c]',
 '1-6-Galactofuran[c]',
 '1-ACYLGLYCEROL-3-P-ACYLTRANSFER-MONOMER[i]',
 '1-AMINO-PROPAN-2-OL[c]',
 '1-AMINO-PROPAN-2-ONE-3-PHOSPHATE[c]',
 '1-CHLORO-24-DINITROBENZENE[c]',
 '1-DEOXYXYLONOJIRIMYCIN[c]',
 '1-ETHYLADENINE[c]',
 '1-Hydroxy-2-oxolimonenes[c]',
 '1-KETO-2-METHYLVALERATE[c]']

In [14]:
len(ecocyc_ids)

16097

In [15]:
ecocyc_ids[0:10]

['--TRANS-ACENAPHTHENE-12-DIOL',
 '1-6-Galactofuran',
 '1-ACYLGLYCEROL-3-P-ACYLTRANSFER-MONOMER',
 '1-AMINO-PROPAN-2-OL',
 '1-AMINO-PROPAN-2-ONE-3-PHOSPHATE',
 '1-CHLORO-24-DINITROBENZENE',
 '1-DEOXYXYLONOJIRIMYCIN',
 '1-ETHYLADENINE',
 '1-Hydroxy-2-oxolimonenes',
 '1-KETO-2-METHYLVALERATE']

In [21]:
def read_names(file_path):
    return np.genfromtxt(file_path, dtype=str, delimiter="\n")

def read_matrix(file_path, sparse=False):
    # add sparse matrix reading
    if sparse:
        return csr_matrix(pl.read_csv(file_path, has_header=False).to_numpy())
    else:
        return pl.read_csv(file_path, has_header=False).to_numpy()


Cmatrix = read_matrix("/Users/miagrahn/vivarium-ecoli/notebooks/mia notebooks/C_matrix.csv", sparse=True)
complex_ids = list(read_names("/Users/miagrahn/vivarium-ecoli/notebooks/mia notebooks/complex_ids.txt"))
monomer_ids = list(read_names("/Users/miagrahn/vivarium-ecoli/notebooks/mia notebooks/monomer_ids.txt"))

In [22]:
list(complex_ids)

['1-PFK',
 '2OXOGLUTARATEDEH-CPLX',
 '3-ISOPROPYLMALDEHYDROG-CPLX',
 '3-ISOPROPYLMALISOM-CPLX',
 '3-METHYL-2-OXOBUT-OHCH3XFER-CPLX',
 '3-OXOACYL-ACP-SYNTHII-CPLX',
 '6PFK-1-CPX',
 '6PFK-2-CPX',
 '6PGLUCONDEHYDROG-CPLX',
 '7-ALPHA-HYDROXYSTEROID-DEH-CPLX',
 '7KAPSYN-CPLX',
 'ABC-10-CPLX',
 'ABC-11-CPLX',
 'ABC-12-CPLX',
 'ABC-13-CPLX',
 'ABC-14-CPLX',
 'ABC-15-CPLX',
 'ABC-16-CPLX',
 'ABC-18-CPLX',
 'ABC-19-CPLX',
 'ABC-2-CPLX',
 'ABC-20-CPLX',
 'ABC-21-CPLX',
 'ABC-22-CPLX',
 'ABC-23-CPLX',
 'ABC-24-CPLX',
 'ABC-25-CPLX',
 'ABC-26-CPLX',
 'ABC-27-CPLX',
 'ABC-28-CPLX',
 'ABC-29-CPLX',
 'ABC-3-CPLX',
 'ABC-304-CPLX',
 'ABC-32-CPLX',
 'ABC-33-CPLX',
 'ABC-34-CPLX',
 'ABC-35-CPLX',
 'ABC-4-CPLX',
 'ABC-40-CPLX',
 'ABC-41-CPLX',
 'ABC-42-CPLX',
 'ABC-45-CPLX',
 'ABC-46-CPLX',
 'ABC-48-CPLX',
 'ABC-49-CPLX',
 'ABC-5-CPLX',
 'ABC-51-CPLX',
 'ABC-52-CPLX',
 'ABC-54-CPLX',
 'ABC-55-CPLX',
 'ABC-56-CPLX',
 'ABC-57-CPLX',
 'ABC-58-CPLX',
 'ABC-59-CPLX',
 'ABC-6-CPLX',
 'ABC-60-CPLX',
 'ABC-61-CP

In [42]:
ecocyc_ids = []

for id in bulk_ids:
    if id[0:-3] not in bulk_ids:
        ecocyc_ids.append(id[0:-3])
        
protein_ids = complex_ids + monomer_ids

id_col_mapping = {}

for id in protein_ids:
    
    if id in ecocyc_ids:
        col_idx = ecocyc_ids.index(id)
    
        id_col_mapping[id] = bulk_ids[col_idx]
        
    else:
        print(id)

protein_array = np.zeros([bulk.shape[0], len(protein_ids)])

for i,id in enumerate(protein_ids):
    
    if id in id_col_mapping:
        bulk_col_key = id_col_mapping[id]
        
        protein_array[:, i] = np.array(bulk.select([bulk_col_key])).flatten()
        
protein_array

CPLX0-3964
MONOMER0-1241
MONOMER0-4223


array([[  27.,   73., 1194., ...,  487.,    0.,    9.],
       [  27.,   73., 1194., ...,  487.,    0.,    9.],
       [  27.,   73., 1194., ...,  487.,    0.,    9.],
       ...,
       [  27.,   95., 1563., ...,  785.,    0.,    9.],
       [  27.,   95., 1563., ...,  785.,    0.,    9.],
       [  27.,   95., 1563., ...,  787.,    0.,    9.]])

In [43]:
protein_array @ Cmatrix

array([[128.,  55., 184., ..., 516.,  58.,  67.],
       [128.,  55., 184., ..., 516.,  58.,  67.],
       [128.,  55., 184., ..., 516.,  58.,  67.],
       ...,
       [206.,  55., 406., ..., 814.,  58.,  67.],
       [206.,  55., 406., ..., 814.,  58.,  67.],
       [206.,  55., 406., ..., 816.,  58.,  67.]])

In [45]:
monomer_ids

['1-ACYLGLYCEROL-3-P-ACYLTRANSFER-MONOMER',
 '1-PFK-MONOMER',
 '2-DEHYDROPANTOATE-REDUCT-MONOMER',
 '2-ISOPROPYLMALATESYN-MONOMER',
 '2-OCTAPRENYL-METHOXY-BENZOQ-METH-MONOMER',
 '2-OCTAPRENYLPHENOL-HYDROX-MONOMER',
 '3-CH3-2-OXOBUTANOATE-OH-CH3-XFER-MONOMER',
 '3-ISOPROPYLMALDEHYDROG-MONOMER',
 '3-OXOACYL-ACP-REDUCT-MONOMER',
 '3-OXOACYL-ACP-SYNTHII-MONOMER',
 '4OHBENZOATE-OCTAPRENYLTRANSFER-MONOMER',
 '6PFK-1-MONOMER',
 '6PFK-2-MONOMER',
 '6PGLUCONDEHYDROG-MONOMER',
 '6PGLUCONOLACT-MONOMER',
 '7-ALPHA-HYDROXYSTEROID-DEH-MONOMER',
 '7KAPSYN-MONOMER',
 'AAS-MONOMER',
 'ABC-MONOMER',
 'ABGT-MONOMER',
 'ACETATEKINA-MONOMER',
 'ACETOLACTSYNIII-HCHAIN-MONOMER',
 'ACETOLACTSYNIII-ICHAIN-MONOMER',
 'ACETYL-COA-ACETYLTRANSFER-MONOMER',
 'ACETYLGLUTKIN-MONOMER',
 'ACETYLORNDEACET-MONOMER',
 'ACETYLORNTRANSAM-MONOMER',
 'ACNEULY-MONOMER',
 'ACONITASE-MONOMER',
 'ACONITATEDEHYDRB-MONOMER',
 'ACPSUB-MONOMER',
 'ACRB-MONOMER',
 'ACRD-MONOMER',
 'ACRF-MONOMER',
 'ACS-MONOMER',
 'ACSERLYA-MONOMER',
 