In [20]:
import polars as pl
import numpy as np
import os
import json
from scipy.sparse import csr_matrix
os.chdir(os.path.expanduser('~/vivarium-ecoli'))


In [21]:
def read_names(file_path):
    return np.genfromtxt(file_path, dtype=str, delimiter="\n")

def read_matrix(file_path, sparse=False):
    # add sparse matrix reading
    if sparse:
        return csr_matrix(pl.read_csv(file_path, has_header=False).to_numpy())
    else:
        return pl.read_csv(file_path, has_header=False).to_numpy()


Cmatrix = read_matrix("/Users/miagrahn/vivarium-ecoli/notebooks/mia notebooks/C_matrix.csv", sparse=True)
complex_ids = list(read_names("/Users/miagrahn/vivarium-ecoli/notebooks/mia notebooks/complex_ids.txt"))
monomer_ids = list(read_names("/Users/miagrahn/vivarium-ecoli/notebooks/mia notebooks/monomer_ids.txt"))

In [22]:
time = '1300'
date = '2024-04-23'
experiment = 'validation_experiment'
entry = f'{experiment}_{time}_{date}'
folder = f'out/cofactors/{entry}/'

In [23]:
output_all = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output_all['agents']['0']
fba = output['listeners']['fba_results']
mass = output['listeners']['mass']
bulk = pl.DataFrame(output['bulk'])

fluxes = np.array(fba['estimated_fluxes'][1:])
exchanges = fba['estimated_exchange_dmdt']

ans = output['listeners']['unique_molecule_counts']['active_ribosome']

In [24]:
# So say I want to find the names of the proteins themselves, these are functions to implement that do so: 
initial_state = json.load(open('data/wcecoli_t0.json'))

bulk_ids = [item[0] for item in initial_state['bulk']]

bulk.columns = bulk_ids

In [25]:
# take the protein [location] out of the name of the bulk ids: 
ecocyc_ids = []
for id in bulk_ids: 
	ecocyc_ids.append(id[0:-3])

ecocyc_id_idxs = []
for i in range(len(ecocyc_ids)):
	ecocyc_id_idxs.append(i)


In [26]:
# combine the protein counts for each monomer (option 2): 
id_idx_dict = {id: i for i, id in enumerate(bulk_ids)}
idx_id_dict = {idx: i for i, idx in id_idx_dict.items()}

protein_ids = complex_ids + monomer_ids
all_names = []
name_groups = []
idx_groups = []

for p in protein_ids:
	if p not in all_names:
		indexes = [name for key, name in id_idx_dict.items() if p in key]
		for idx in indexes:
			if ecocyc_ids[idx] == p:
				pass
			else:
				indexes.remove(idx)
		name_group = [idx_id_dict.get(name) for name in indexes]
		all_names.append(p)
		name_groups.append([name_group])
		idx_groups.append([indexes])

In [27]:
# fuse and collect protein counts of interest (in the C matrix): 
bulk_interest_protein_counts = np.zeros([bulk.shape[0], len(protein_ids)])
bulk_PC_dict = {}

for i in range(len(idx_groups)):
	id = protein_ids[i]
	idxs = idx_groups[i]
	if id in ecocyc_ids:
		if len(idxs) == 1:
			idx = idxs[0][0]
			df = np.array(bulk.select(bulk_ids[idx]))
		else:
			together = []
			for idx in idxs:
				index = idx[0]
				temp_df = np.array(bulk.select(bulk_ids[index]))
				together.append(temp_df)
			df = np.sum(together, axis=1)
		protein_counts = np.transpose(df)
		bulk_interest_protein_counts[:, i] = protein_counts
		bulk_PC_dict.update({id:idxs}) 

In [28]:
sim_protein_counts = bulk_interest_protein_counts @ Cmatrix

array([[128.,  55., 184., ..., 516.,  58.,  67.],
       [128.,  55., 184., ..., 516.,  58.,  67.],
       [128.,  55., 184., ..., 516.,  58.,  67.],
       ...,
       [206.,  55., 406., ..., 814.,  58.,  67.],
       [206.,  55., 406., ..., 814.,  58.,  67.],
       [206.,  55., 406., ..., 816.,  58.,  67.]])