In [1]:
import numpy as np
import ast
import seaborn as sns
import pandas as pd
import os
import matplotlib.pyplot as plt
import dill
import requests
import xmltodict
import cvxpy as cp
import itertools
import networkx as nx

from scipy.special import logsumexp

%matplotlib inline
# sns.set(style='darkgrid', palette='viridis', context='talk')

os.chdir(os.path.expanduser('~/dev/vivarium-ecoli'))

from ecoli.processes.metabolism_redux_classic import NetworkFlowModel, FlowResult
from ecoli.library.schema import numpy_schema, bulk_name_to_idx, listener_schema, counts
from ecoli.processes.registries import topology_registry
TOPOLOGY = topology_registry.access("ecoli-metabolism-redux")

In [2]:
# create a function to deal with non-homogeous dtype of columns. mixture of strings and lists
def string_to_list(s):
    try:
        return np.array(ast.literal_eval(s))  # Safely convert string to array
    except (ValueError, SyntaxError):
        return np.array(s)  # Return as array if object is not a list and conversion fails (e.g., NaN values)

### Load Experiments

In [5]:
time = '400'
date = '2025-01-13'
experiment = 'metabolism-redux-classic-BASAL_NEW-modified_process_new_rxn_name'
entry = f'{experiment}_{time}_{date}'
folder = f'out/cofactors/{entry}/'

output = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output['agents']['0']
fba = output['listeners']['fba_results']
mass = output['listeners']['mass']
bulk = pd.DataFrame(output['bulk'])

In [6]:
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

### Create Dataframe

In [7]:
# create simulation flux dataframe - OLD
self = agent['ecoli-metabolism-redux-classic'] #self of metabolism_redux_classic class
reaction_names = self.reaction_names

sim_fluxes = pd.DataFrame(fba["estimated_fluxes"])
target_fluxes = pd.DataFrame(fba["target_kinetic_fluxes"])

sim_fluxes.columns = reaction_names
sim_fluxes = sim_fluxes.iloc[1:,] # remove first row (NAs) of the dataframe
target_fluxes = target_fluxes.iloc[1:,] # remove first row (NAs) of the dataframe

In [8]:
fba_new_reaction_ids = self.parameters["fba_new_reaction_ids"]
fba_reaction_ids_to_base_reaction_ids = self._parameters['fba_reaction_ids_to_base_reaction_ids']

In [11]:
#### ALL REACTION FLUX #####

# take average of flux through time -- should I normalize by cell mass? 
sim_fluxes_avg = sim_fluxes.mean(axis=0)

# set up plotly df
df = pd.DataFrame({'sim_fluxes_avg': sim_fluxes_avg, 'sim_fluxes_avg_log':sim_fluxes_avg})
df['is_new'] = 'Old Reactions'
df.loc[fba_new_reaction_ids, 'is_new'] = 'New Reactions'  

# log flux and + e-6
df.sim_fluxes_avg_log += 10**-6
df.sim_fluxes_avg_log = np.log10(df.sim_fluxes_avg_log)

### Search for dead-ends by parsing through the S (stoichiometry) matrix and find metabolites (row) with no usage (negative entry)

In [261]:
S = self.stoichiometry
metabolites = self.metabolite_names
reactions = self.reaction_names
metabolite_idx_to_name = {i:metabolite for i, metabolite in enumerate(metabolites)}
rxn_idx_to_id = {i: reaction for i, reaction in enumerate(reactions)}
rxn_id_to_idx = {reaction: i for i, reaction in enumerate(reactions)}

In [66]:
S = self.stoichiometry
dead_end_met = []
dead_end_met_rxn = {}
for row in np.arange(0,S.shape[0]):
    if not np.any(S[row,] <0): # if the metabolite doesn't have an outlet
        met = metabolite_idx_to_name[row]
        dead_end_met.append(met)
        
        rxn_idxs = np.where(S[row,] != 0)[0]
        rxn_ids =  [rxn_idx_to_id[idx] for idx in rxn_idxs]
        dead_end_met_rxn[met]=rxn_ids

In [71]:
print(f'There are a total of {len(dead_end_met)} metabolites that are only being created but not used')
dead_end_met_rxn 

#TODO: RNAs probably don't need to considered dead-ends because they are used in other parts of the model and have their own degradation module


There are a total of 632 metabolites that are only being created but not used


{'1-2-Diglycerides[c]': ['RXN-17700'],
 '1-Lyso-phospholipids[p]': ['RXN-17363'],
 '1-Lysophosphatidylglycerols[p]': ['RXN-20915'],
 '1-Lysophosphatidylserines[p]': ['RXN-20913'],
 '16S-rRNA-2-O-methylcytidine1402[c]': ['RXN-11637'],
 '16S-rRNA-5-O-methylcytosine1407[c]': ['RXN-11593'],
 '16S-rRNA-5-O-methylcytosine967[c]': ['RXN-11591'],
 '16S-rRNA-N2-methylguanine1207[c]': ['RXN-11576'],
 '16S-rRNA-N2-methylguanine966[c]': ['RXN0-6515'],
 '16S-rRNA-N2methylguanine1516[c]': ['RXN0-6731'],
 '16S-rRNA-N3-methyluracil1498[c]': ['RXN-11598'],
 '16S-rRNA-N4-methylcytidine1402[c]': ['RXN-11638'],
 '16S-rRNA-N6-dimethyladenine1518-1519[c]': ['RXN-11633'],
 '16S-rRNA-N7-methylguanine527[c]': ['RXN-11578'],
 '16S-rRNA-pseudouridine516[c]': ['RXN-11833'],
 '2-Acyl-sn-glycerol-3-phosphates[p]': ['RXN-20916'],
 '2-Acylglycero-Phosphocholines[c]': ['RXN0-1842'],
 '2-CARBOXYMUCONATE[p]': ['RXN0-2943'],
 '2-DEOXY-D-GLUCOSE[c]': ['3.1.3.68-RXN[CCO-CYTOSOL]-2-DEOXY-D-GLUCOSE-6-PHOSPHATE/WATER//2-DEOXY

### Dead-end Reactions/metabolites in new reactions

In [121]:
new_reaction_dead_end = {}
temp = [] # np.unique(temp) = new dead-end reactions
reaction_dead_end_pct = {} # only metabolites w/ 100% new reactions are considered. or else: it had been dead-end before the incorporation of new reactions

# find metabolite with no outlet that goes through new reactions
for met, rxn_id in dead_end_met_rxn.items():
    count = 0
    for id in rxn_id:
        if id in fba_new_reaction_ids:
            new_reaction_dead_end[met] = rxn_id
            temp.append(id)
            count +=1
        reaction_dead_end_pct[met] = count/len(rxn_id)

print(f'Number of Dead-end metabolites going through new reactions: {len(new_reaction_dead_end)}')
print(f'Number of Dead-end new reactions: {len(np.unique(temp))}')
print(f'Number of Dead-end reactions: {len(np.unique(np.hstack(list(new_reaction_dead_end.values()))))}')

Number of Dead-end metabolites going through new reactions: 83
Number of Dead-end new reactions: 103
Number of Dead-end reactions: 161


In [133]:
# closer look at the dead-end metabolites using only new reactions
new_reaction_dead_end_met = [met for met, pct in reaction_dead_end_pct.items() if pct == 1]
# extracting dead-end metabolites from new_reactions_dead_end. Should be extactly the same as new_reaction_dead_end_met
temp = [met for met, rxn_id in new_reaction_dead_end.items() if all(np.isin(rxn_id, fba_new_reaction_ids))]

print(f'Does new_reaction_dead_end_met and temp have same length: {len(new_reaction_dead_end_met) == len(temp)}')
print(f'Are new_reaction_dead_end_met and temp the same: {np.all(np.isin(new_reaction_dead_end_met, temp))}')

In [137]:
new_reaction_dead_end_met #TODO: pick a metabolite and trace 

['2-DEOXY-D-GLUCOSE[c]',
 '3-HYDROXYBENZOATE[c]',
 'ALPHA-RIBAZOLE[c]',
 'Alpha-D-aldose-1-phosphates[c]',
 'BUTANOL[c]',
 'CPD-12365[c]',
 'CPD-12367[c]',
 'CPD-12587[c]',
 'CPD-13315[c]',
 'CPD-13852[c]',
 'CPD-15158[c]',
 'CPD-16154[c]',
 'CPD-173[c]',
 'CPD-18260[c]',
 'CPD-1843[c]',
 'CPD-19237[c]',
 'CPD-20757[p]',
 'CPD-23445[c]',
 'CPD-24859[p]',
 'CPD-25891[c]',
 'CPD-335[c]',
 'CPD-3565[c]',
 'CPD-3611[c]',
 'CPD-582[c]',
 'CPD-8363[c]',
 'CPD-8550[p]',
 'CPD0-1101[p]',
 'CPD0-2114[c]',
 'CPD0-2134[c]',
 'CPD0-2173[c]',
 'CPD0-2207[c]',
 'CPD0-2229[c]',
 'CPD0-2241[c]',
 'CPD0-2485[c]',
 'CPD0-2486[c]',
 'CPD0-2559[c]',
 'CPD0-621[c]',
 'CcmABCD-Complex[c]',
 'CcmCDE-Complex-Heme[p]',
 'CcmE-Protein-Heme-Ox[p]',
 'CcmE-Proteins[c]',
 'Charged-SEC-tRNAs[c]',
 'Cytochromes-c[c]',
 'D-fructofuranose-1-phosphate[c]',
 'D-fructopyranose-1-phosphate[c]',
 'Donor-H1[i]',
 'ENTB-CPLX[c]',
 'ETR-Semiquinones[c]',
 'FERRIC-CITRATE-COMPLEX[c]',
 'Fatty-Acids[p]',
 'HYDROQUINONE[c]',
 'K

#### BUTANOL seems interesting

In [314]:
# BUTANOL seems interesting
# get butanol met index and S matrix involvement
met_of_interest = 'BUTANOL[c]'
butanol_idx = np.where(np.isin(metabolites, met_of_interest)) [0]

# slice S matrix to get only reactions involving butanol and related reactions
first = S[butanol_idx, :].copy()
butanol_rxn_idx = np.where(first!=0)[1]
butanol_rxn_id = [rxn_idx_to_id[idx] for idx in butanol_rxn_idx]

# get metabolites involved in other Butanol reactions
seconds = S[:, butanol_rxn_idx].copy()
sec_met_idx = np.unique(np.where(seconds!=0)[0])
sec_met_id = [metabolite_idx_to_name[idx] for idx in sec_met_idx]

temp = S[np.ix_(sec_met_idx,butanol_rxn_idx)]
df_temp = pd.DataFrame(temp, index=sec_met_id, columns=butanol_rxn_id)
sec_met_id

# expand to get how key metabolites are involved in other reactions
no_interest = ['NADH[c]', 'NAD[c]', 'PROTON[c]', 'WATER[c]']
sec_key_met = sec_met_id.copy()
[sec_key_met.remove(remove)  for remove in no_interest]
sec_key_met

sec_key_met_idx = [metabolite_to_idx[id] for id in sec_key_met]
idx =  np.nonzero(np.any(S[sec_key_met_idx,:] != 0, axis=0))[0] ##check columns to remove rxns with no sec_key_met involvement

##given expanded rxn idx, get metabolites involved in those reactions as well 
S_sec_key_met = S[:, idx].copy()
r_idx =  np.nonzero(np.any(S_sec_key_met != 0, axis=1))[0]
S_sec_key_met = S[np.ix_(r_idx, idx)].copy()

met_id = [metabolite_idx_to_name[ix] for ix in r_idx]
rxn_id = [rxn_idx_to_id[ix] for ix in idx]

df_temp2 = pd.DataFrame(S_sec_key_met, index=met_id, columns=rxn_id)

In [315]:
df_temp2

Unnamed: 0,R4-RXN-BUTYL-HYDROPEROXIDE/NADH/PROTON//BUTANOL/NAD/WATER.51.,RXN-19953-Reduced-Cys2-Peroxiredoxins/BUTYL-HYDROPEROXIDE//2Cys-Peroxiredoxins-With-HydroxyCys/BUTANOL.93.,RXN-19954,RXN-20692,RXN0-5468,RXN0-5468-Red-thioredoxins-1/Cys2-Peroxiredoxin-Disulfide//Ox-thioredoxins-1/Reduced-Cys2-Peroxiredoxins.95.,RXN0-5468-Reduced-CcmG-Proteins/Cys2-Peroxiredoxin-Disulfide//Oxidized-CcmG-Proteins/Reduced-Cys2-Peroxiredoxins.103.,RXN0-5468-Reduced-CcmH-Proteins/Cys2-Peroxiredoxin-Disulfide//Oxidized-CcmH-Proteins/Reduced-Cys2-Peroxiredoxins.103.
2Cys-Peroxiredoxins-With-HydroxyCys[c],0,1,-1,0,0,0,0,0
BUTANOL[c],1,1,0,0,0,0,0,0
BUTYL-HYDROPEROXIDE[c],-1,-1,0,0,0,0,0,0
Cys2-Peroxiredoxin-Disulfide[c],0,0,1,-1,-1,-1,-1,-1
NADH[c],-1,0,0,0,0,0,0,0
NAD[c],1,0,0,0,0,0,0,0
Ox-Prx-Disulfide-Reductases[c],0,0,0,1,0,0,0,0
Ox-Thioredoxin[c],0,0,0,0,1,0,0,0
Ox-thioredoxins-1[c],0,0,0,0,0,1,0,0
Oxidized-CcmG-Proteins[c],0,0,0,0,0,0,1,0


In [316]:
df.loc[rxn_id,]

Unnamed: 0,sim_fluxes_avg,sim_fluxes_avg_log,is_new
R4-RXN-BUTYL-HYDROPEROXIDE/NADH/PROTON//BUTANOL/NAD/WATER.51.,0.0,-6.0,New Reactions
RXN-19953-Reduced-Cys2-Peroxiredoxins/BUTYL-HYDROPEROXIDE//2Cys-Peroxiredoxins-With-HydroxyCys/BUTANOL.93.,0.0,-6.0,New Reactions
RXN-19954,0.0,-6.0,New Reactions
RXN-20692,0.0,-6.0,New Reactions
RXN0-5468,0.0,-6.0,New Reactions
RXN0-5468-Red-thioredoxins-1/Cys2-Peroxiredoxin-Disulfide//Ox-thioredoxins-1/Reduced-Cys2-Peroxiredoxins.95.,0.0,-6.0,New Reactions
RXN0-5468-Reduced-CcmG-Proteins/Cys2-Peroxiredoxin-Disulfide//Oxidized-CcmG-Proteins/Reduced-Cys2-Peroxiredoxins.103.,0.0,-6.0,New Reactions
RXN0-5468-Reduced-CcmH-Proteins/Cys2-Peroxiredoxin-Disulfide//Oxidized-CcmH-Proteins/Reduced-Cys2-Peroxiredoxins.103.,0.0,-6.0,New Reactions


### Give COBRA a shot

In [318]:
met_ids = met_id
rxn_ids = rxn_id

metabolites_c = [cobra.Metabolite(met) for met in met_ids]
reactions_c   = [cobra.Reaction(rxn) for rxn in rxn_ids]

# Add metabolites and reactions to the model
model = cobra.Model("Custom_Pathway")
for i, reaction in enumerate(reactions_c):
    # Add metabolites to reactions based on stoichiometry
    for j, metabolite in enumerate(metabolites_c):
        if S_sec_key_met[j][i] != 0:
            reaction.add_metabolites({metabolite: S_sec_key_met[j][i]})
    model.add_reactions([reaction])
    
for reaction in model.reactions:
    print(reaction.build_reaction_string())

BUTYL-HYDROPEROXIDE[c] + NADH[c] + PROTON[c] --> BUTANOL[c] + NAD[c] + WATER[c]
BUTYL-HYDROPEROXIDE[c] + Reduced-Cys2-Peroxiredoxins[c] --> 2Cys-Peroxiredoxins-With-HydroxyCys[c] + BUTANOL[c]
2Cys-Peroxiredoxins-With-HydroxyCys[c] --> Cys2-Peroxiredoxin-Disulfide[c] + WATER[c]
Cys2-Peroxiredoxin-Disulfide[c] + Red-Prx-Disulfide-Reductases[c] --> Ox-Prx-Disulfide-Reductases[c] + Reduced-Cys2-Peroxiredoxins[c]
Cys2-Peroxiredoxin-Disulfide[c] + Red-Thioredoxin[c] --> Ox-Thioredoxin[c] + Reduced-Cys2-Peroxiredoxins[c]
Cys2-Peroxiredoxin-Disulfide[c] + Red-thioredoxins-1[c] --> Ox-thioredoxins-1[c] + Reduced-Cys2-Peroxiredoxins[c]
Cys2-Peroxiredoxin-Disulfide[c] + Reduced-CcmG-Proteins[c] --> Oxidized-CcmG-Proteins[c] + Reduced-Cys2-Peroxiredoxins[c]
Cys2-Peroxiredoxin-Disulfide[c] + Reduced-CcmH-Proteins[c] --> Oxidized-CcmH-Proteins[c] + Reduced-Cys2-Peroxiredoxins[c]


In [319]:
from escher import Builder

# Build an Escher visualization for the model
builder = Builder(model=model)
# builder

TypeError: Object of type int8 is not JSON serializable

In [249]:
import cobra

# Define metabolites
metabolites_c = [
    cobra.Metabolite("2Cys-Peroxiredoxins-With-HydroxyCys[c]"),
    cobra.Metabolite("BUTANOL[c]"),
    cobra.Metabolite("BUTYL-HYDROPEROXIDE[c]"),
    cobra.Metabolite("NADH[c]"),
    cobra.Metabolite("NAD[c]"),
    cobra.Metabolite("PROTON[c]"),
    cobra.Metabolite("Reduced-Cys2-Peroxiredoxins[c]"),
    cobra.Metabolite("WATER[c]")
]

# Define reactions
reactions = [
    cobra.Reaction("R4-RXN-BUTYL-HYDROPEROXIDE"),
    cobra.Reaction("RXN-19953-Reduced-Cys2-Peroxiredoxins")
]

# Define stoichiometry (rows: metabolites, columns: reactions)
stoichiometry = [
    [0, 1],  # 2Cys-Peroxiredoxins-With-HydroxyCys[c]
    [1, 1],  # BUTANOL[c]
    [-1, -1],  # BUTYL-HYDROPEROXIDE[c]
    [-1, 0],  # NADH[c]
    [1, 0],  # NAD[c]
    [-1, 0],  # PROTON[c]
    [0, -1],  # Reduced-Cys2-Peroxiredoxins[c]
    [1, 0]    # WATER[c]
]

# Add metabolites and reactions to the model
model = cobra.Model("Custom_Pathway")
for i, reaction in enumerate(reactions):
    # Add metabolites to reactions based on stoichiometry
    for j, metabolite in enumerate(metabolites_c):
        if stoichiometry[j][i] != 0:
            reaction.add_metabolites({metabolite: stoichiometry[j][i]})
    model.add_reactions([reaction])

# Print summary of the model
print(model.summary())




Objective
nan Expression = nan

Uptake
------
Empty DataFrame
Columns: [Metabolite, Reaction, Flux, C-Number, C-Flux]
Index: []

Secretion
---------
Empty DataFrame
Columns: [Metabolite, Reaction, Flux, C-Number, C-Flux]
Index: []



In [254]:
for reaction in model.reactions:
    print(reaction.build_reaction_string())

BUTYL-HYDROPEROXIDE[c] + NADH[c] + PROTON[c] --> BUTANOL[c] + NAD[c] + WATER[c]
BUTYL-HYDROPEROXIDE[c] + Reduced-Cys2-Peroxiredoxins[c] --> 2Cys-Peroxiredoxins-With-HydroxyCys[c] + BUTANOL[c]


In [251]:
from escher import Builder

# Build an Escher visualization for the model
builder = Builder(model=model)
# builder

In [252]:
for reaction in model.reactions:
    print(reaction.build_reaction_string())

BUTYL-HYDROPEROXIDE[c] + NADH[c] + PROTON[c] --> BUTANOL[c] + NAD[c] + WATER[c]
BUTYL-HYDROPEROXIDE[c] + Reduced-Cys2-Peroxiredoxins[c] --> 2Cys-Peroxiredoxins-With-HydroxyCys[c] + BUTANOL[c]


In [253]:
solution = model.optimize()
solution


Unnamed: 0,fluxes,reduced_costs
R4-RXN-BUTYL-HYDROPEROXIDE,0.0,0.0
RXN-19953-Reduced-Cys2-Peroxiredoxins,0.0,0.0
