Description:

This script aims to test all carbon source media defined by Ecocyc (https://ecocyc.org/ECOLI/new-image?object=Growth-Media): individual growth media (the ones with data), microarray Plate ID: Biolog PM1 and PM2. This script will use the basal simulation after cp2 as the base, and run FBA external to the model to see if model solves and see if there is growth (needs definition) and analyze gene usage. 

In [1]:
import numpy as np
import ast
import pandas as pd
import os
import matplotlib.pyplot as plt
import dill
import requests
import xmltodict
import cvxpy as cp
import itertools
import networkx as nx
import plotly.express as px

from scipy.special import logsumexp

%matplotlib inline
# sns.set(style='dar|kgrid', palette='viridis', context='talk')

os.chdir(os.path.expanduser('~/vEcoli')) # does not exsist I changed it so that it would be the path to the vecoli on my computer

from ecoli.processes.metabolism_redux_classic import NetworkFlowModel, FlowResult
from ecoli.library.schema import numpy_schema, bulk_name_to_idx, listener_schema, counts
from ecoli.processes.registries import topology_registry
TOPOLOGY = topology_registry.access("ecoli-metabolism-redux")
import plotly.express as px

In [2]:
# load checkpoint 2 model
time = '400'
date = '2025-05-15'
experiment = 'NEW_NewGenes_checkpoint2'
condition = 'basal'
entry = f'{experiment}_{time}_{date}'
folder = f'out/metabolism-comparison/{condition}/{entry}/'

output = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output['agents']['0']
fba = output['listeners']['fba_results']
bulk = pd.DataFrame(output['bulk'])
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

In [3]:
# get commonly stored variables
metabolism = agent['ecoli-metabolism-redux-classic']
stoichiometry = metabolism.stoichiometry.copy()
reaction_names = metabolism.reaction_names
fba_new_reaction_ids = metabolism.parameters["fba_new_reaction_ids"]
fba_reaction_ids_to_base_reaction_ids = metabolism.parameters['fba_reaction_ids_to_base_reaction_ids']
metabolites = metabolism.metabolite_names.copy()
binary_kinetic_idx = metabolism.binary_kinetic_idx
exchange_molecules = metabolism.exchange_molecules

S = stoichiometry .copy()
S = pd.DataFrame(S, index=metabolites , columns=reaction_names )
homeostatic_count = pd.DataFrame(fba["homeostatic_metabolite_counts"], columns=metabolism.homeostatic_metabolites).loc[24, :]
homeostatic = pd.DataFrame(fba["target_homeostatic_dmdt"], columns=metabolism.homeostatic_metabolites).loc[24, :]
maintenance = pd.DataFrame(fba["maintenance_target"][1:], columns=['maintenance_reaction']).iat[24, 0]
kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy()

In [4]:
# parameters that are the same across the two simulation
kinetic_reaction_ids = metabolism.kinetic_constraint_reactions
allowed_exchange_uptake = metabolism.allowed_exchange_uptake
FREE_RXNS = ["TRANS-RXN-145", "TRANS-RXN0-545", "TRANS-RXN0-474"]
ADDED_RXNS = ['HS-TRANSPORT-RXN-CPD0-1202', 'HS-TRANSPORT-RXN-CPD0-1202 (reverse)',
                   'HS-TRANSPORT-RXN[CCO-OUTER-MEM]-OXAMATE', 'HS-TRANSPORT-RXN[CCO-OUTER-MEM]-OXAMATE (reverse)',
                   'HS-TRANSPORT-RXN[CCO-PM-BAC-NEG]-OXAMATE', 'HS-TRANSPORT-RXN[CCO-PM-BAC-NEG]-OXAMATE (reverse)',
                   'HS-BETA-GLUCURONID-RXN_CPD-3611//METOH', 'HS-SPONTANEOUS-TRANSPORT[CCO-OUTER-MEM]-HCN', 'HS-SPONTANEOUS-TRANSPORT[CCO-OUTER-MEM]-HCN (reverse)',
                   'HS-SPONTANEOUS-TRANSPORT[CCO-PM-BAC-NEG]-HCN','HS-SPONTANEOUS-TRANSPORT[CCO-PM-BAC-NEG]-HCN (reverse)']

# Define functions for testing

In [5]:
def get_subset_S(S, met_of_interest):
    S_met = S.loc[met_of_interest, :]
    S_met = S_met.loc[:,~np.all(S_met == 0, axis=0)]
    return S_met, S_met.columns

def get_keys(dict, value):
    return [key for key in dict if dict[key] == value]

def test_NetworkFlowModel(objective_weights,
                          uptake_addition = set([]), uptake_removal = set([]), new_exchange_molecules = set([]),
                          add_metabolite = None, add_reaction = None, add_kinetic = None, remove_reaction = None, force_reaction = None):
    # update exchanges
    uptake = metabolism.allowed_exchange_uptake.copy()
    uptake = set(uptake)
    uptake = uptake | uptake_addition
    uptake = uptake - uptake_removal

    exchange_molecules = metabolism.exchange_molecules.copy()
    exchange_molecules = exchange_molecules | new_exchange_molecules

    # update stoichiometry
    reaction_names = metabolism.reaction_names.copy()
    kinetic_reaction_ids = metabolism.kinetic_constraint_reactions.copy()
    kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy()
    metabolites = metabolism.metabolite_names.copy()

    S_new = stoichiometry.copy()

    if add_metabolite is not None: # add to metabolites list because they are currently not included in the model
        for m in add_metabolite:
            if m not in metabolites:
                metabolites.append(m)
        # append rows of zeros to S_new of length add_metabolite
        S_new = np.concatenate((S_new, np.zeros((len(add_metabolite), S_new.shape[1]))), axis=0)

    if add_reaction is not None:
        # assert add_reaction is a dictionary
        assert isinstance(add_reaction, dict)

        for r,s in add_reaction.items():
            if r not in reaction_names:
                reaction_names.append(r)
            # append columns of reaction stoich to S_new of length add_reaction
            new_reaction = np.zeros((S_new.shape[0], 1))
            for m, v in s.items():
                new_reaction[metabolites.index(m), 0] = v
            S_new = np.concatenate((S_new, new_reaction), axis=1)

    if add_kinetic is not None:
        # assert add_kinetic is a dictionary
        assert isinstance(add_kinetic, dict)

        for r, v in add_kinetic.items():
            if r not in kinetic_reaction_ids:
                kinetic_reaction_ids.append(r)
                kinetic[r] = v

    if remove_reaction is not None:
        for r in remove_reaction:
            r_idx = reaction_names.index(r)
            S_new = np.delete(S_new, r_idx, axis=1)
            reaction_names.remove(r)
            if r in kinetic_reaction_ids:
                kinetic_reaction_ids.remove(r)
                del kinetic[r]

    if force_reaction is not None:
        force_reaction_idx = np.array([reaction_names.index(r) for r in force_reaction])
    else:
        force_reaction_idx = force_reaction

    # Solve NetworkFlowModel
    model = NetworkFlowModel(
            stoich_arr=S_new,
            metabolites=metabolites,
            reactions=reaction_names,
            homeostatic_metabolites=metabolism.homeostatic_metabolites,
            kinetic_reactions=kinetic_reaction_ids,
            free_reactions=FREE_RXNS)
    model.set_up_exchanges(exchanges=exchange_molecules, uptakes=uptake)
    solution: FlowResult = model.solve(
            homeostatic_concs=homeostatic_count * metabolism.counts_to_molar.asNumber(), # in conc
            homeostatic_dm_targets=np.array(list(dict(homeostatic).values())), # *10^7
            maintenance_target=maintenance, # *10^6 ish
            kinetic_targets=np.array(list(dict(kinetic).values())), # *10^6 ish
            # binary_kinetic_idx=binary_kinetic_idx, #7646
            binary_kinetic_idx=None,
            force_flow_idx=force_reaction_idx,
            objective_weights=objective_weights, #same
            upper_flux_bound= 1000000000, # increase to 10^9 because notebook runs FlowResult using Counts, WC runs using conc.
            solver=cp.GLOP) #SCS. ECOS, MOSEK
    return solution.objective, solution.velocities, reaction_names, S_new, metabolites, kinetic

# Create Dataframes to Store Results

In [6]:
# all reactions
sim = pd.DataFrame(fba["estimated_fluxes"], columns= reaction_names).mean(axis=0).copy()

# kinetic reactions
kc_target_cp2 = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()

In [7]:
df_all = sim.copy()
df_all.columns = ['sim_cp2_basal']

In [8]:
df_all

1-ACYLGLYCEROL-3-P-ACYLTRANSFER-RXN                                                                               0.000000e+00
1.1.1.127-RXN                                                                                                     0.000000e+00
1.1.1.127-RXN (reverse)                                                                                           0.000000e+00
1.1.1.215-RXN (reverse)                                                                                           0.000000e+00
1.1.1.251-RXN                                                                                                     0.000000e+00
                                                                                                                      ...     
XYLULOKIN-RXN-CPD-24961/ATP//XYLULOSE-5-PHOSPHATE/ADP/PROTON.47.                                                  0.000000e+00
YIAE1-RXN (reverse)                                                                                            

# Network Flow New Enviornments

In [9]:
# microarray plate 2: ~ tests 95 carbon sources
conditions = {
    'A2 - Chondroitin Sulfate C - MIX0-504': None, #not in model
    'A3 - α- Cyclodextrin - MIX0-505': None, #not in model
    'A4 - β- Cyclodextrin - MIX0-506': None, # not in model
    'A5 - γ- Cyclodextrin - MIX0-507': None, # not in model
    'A6 - Dextrin - MIX0-508': None, # not in model
    'A7 - Gelatin - MIX0-795 ': None, # not in model
    'A8 - Glycogen - MIX0-509': None, # not in model
    'A9 - Inulin - MIX0-510': None, # not in model
    'A10 - Laminarin - MIX0-511': None, # not in model
    'A11 - Mannan - MIX0-512': None, # not in model
    'A12 - Pectin - MIX0-513': None, # not in model
    'B1 - N-Acetyl-D- Galactosamine - MIX0-514': None, # not in model
    'B2 - N-Acetyl- Neuraminic acid - MIX0-515': {
        'Add': set(['CPD0-1123[e]']),
        'Remove': set(['GLC[p]','CA+2[p]']), #don't remove ca because this doesn't have it
    },
    'B3 - β- D-Allose - MIX0-516': None, #not in model (there is a 6-phosphate version just fyi)
    'B4 - Amygdalin - MIX0-517': None, #not in model
    'B5 - D-Arabinose - MIX0-518': {
        'Add': set(['D-ARABINOSE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B6 - D-Arabitol - MIX0-519': {
        'Add': set(['CPD-355[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B7 - L-Arabitol - MIX0-520': {
        'Add': set(['L-ARABITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B8 - Arbutin - MIX0-521': {
        'Add': set(['HYDROQUINONE-O-BETA-D-GLUCOPYRANOSIDE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B9 - 2-Deoxy- D-Ribose - MIX0-522': { #check if deoxyribose with phosphate works prolly no
        'Add': set(['L-LACTATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B10 - i-Erythritol - MIX0-523': {
        'Add': set(['ERYTHRITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B11 - D-Fucose - MIX0-524': { #also wishy washy if
        'Add': set(['MANNITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B12 - 3-O-β-D- Galactopyranosyl- D-Arabinose - MIX0-525': {
        'Add': set(['CPD-3785[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C1 - Gentiobiose - MIX0-526': {
        'Add': set(['CPD-3605[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C2 - L-Glucose - MIX0-527': {
        'Add': set(['CPD-3607[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C3 - D-Lactitol - MIX0-768': {
        'Add': set(['CPD0-2460[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C4 - D-Melezitose - MIX0-789': {
        'Add': set(['CPD-13409[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C5 - Maltitol - MIX0-529' : {
        'Add': set(['CPD-3609[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]'])
    },
    'C6 - α- Methyl-D- Glucoside - MIX0-530': None, #not in model
    'C7 - β-Methyl- D-Galactoside - MIX0-531': {
        'Add': set(['METHYL-BETA-D-GALACTOSIDE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C8 - 3-Methylglucose - MIX0-532': {
        'Add': set(['CPD0-1937[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C9 - β- Methyl-D- Glucuronic acid - MIX0-533': {
        'Add': set(['CPD-3611[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C10 - α- Methyl-D- Mannoside - MIX0-534': None, #not in model
    'C11 - β- Methyl-D- Xyloside - MIX0-535': None, # not in model
    'C12 - Palatinose - MIX0-536': {
        'Add': set(['CPD-230[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D1 - D-Raffinose - MIX0-537': {
        'Add': set(['CPD-1099[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D2 - Salicin - MIX0-538': {
        'Add': set(['CPD-1142[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D3 - Sedoheptulosan - MIX0-539': {
        'Add': set(['CPD-3614[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D4 - L-Sorbose - MIX0-540': { #ask about this one c ? because only in cytosol
        'Add': set(['CPD-9569[c]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D5 - Stachyose - MIX0-541': {
        'Add': set(['CPD-170[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # not in the model
    'D6 - D-Tagatose - MIX0-542': {
        'Add': set(['CPD-24954[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D7 - Turanose - MIX0-773': {
        'Add': set(['CPD-13399[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D8 - Xylitol - MIX0-543': {
        'Add': set(['XYLITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D9 - N-Acetyl-D- Glucosaminitol - MIX0-544': { #** come back to this no loading
        'Add': set(['Alpha-lactose[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D10 - γ- Amino-N- Butyric acid - MIX0-545': {
        'Add': set(['4-AMINO-BUTYRATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D11 - δ- Amino Valeric acid - MIX0-546': {
        'Add': set(['5-AMINOPENTANOATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D12 - Butyric acid - MIX0-547': {
        'Add': set(['BUTYRIC_ACID[e]']), #is there such thing as out
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E1 - Capric acid - MIX0-548': {
        'Add': set(['CPD-3617[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),        
    },
    'E2 - Caproic acid - MIX0-549': {
        'Add': set(['HEXANOATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]'])
    },
    'E3 - Citraconic acid - MIX0-550': None, # not in the model
    'E4 - Citramalic acid - MIX0-791': None,
    'E5 - D-Glucosamine - MIX0-551': {
        'Add': set(['GLUCOSAMINE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E6 - 2-Hydroxybenzoic acid - MIX0-787': {
        'Add': set(['CPD-110[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E7 - 4-Hydroxybenzoic acid - MIX0-552': {
        'Add': set(['4-hydroxybenzoate[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E8 - β- Hydroxybutyric acid - MIX0-553': {
        'Add': set(['CPD-335[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E9 - γ- Hydroxybutyric acid - MIX0-554': {
        'Add': set(['4-HYDROXY-BUTYRATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E10 - α- Keto- Valeric acid - MIX0-555': None,
    'E11 - Itaconic acid - MIX0-556': {
        'Add': set(['ITACONATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E12 - 5-Keto-D- Gluconic acid - MIX0-557': {
        'Add': set(['5-DEHYDROGLUCONATE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F1 - D-Lactic acid Methyl Ester - MIX0-780':  None, # not in the model
    'F2 - Malonic acid - MIX0-558': {
        'Add': set(['MALONATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F3 - Melibionic acid  - MIX0-559': {
        'Add': set(['CPD-3801[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # lacks uptake and metabolic pathway
    'F4 - Oxalic acid - MIX0-560': {
        'Add': set(['OXALATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F5 - Oxalomalic acid - MIX0-561':  None, # not in the model
    'F6 - Quinic acid - MIX0-562': {
        'Add': set(['QUINATE[c]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F7 - D-Ribono- 1,4-Lactone - MIX0-788': {
        'Add': set(['CPD-13413[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F8 - Sebacic acid - MIX0-563': {
        'Add': set(['CPD-3623[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F9 - Sorbic acid - MIX0-564': None, # not in the model
    'F10 - Succinamic acid - MIX0-565':  None, # not in the model
    'F11 - D-Tartaric acid - MIX0-566': {
        'Add': set(['D-TARTRATE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F12 - L-Tartaric acid - MIX0-567': {
        'Add': set(['TARTRATE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G1 - Acetamide - MIX0-568': {
        'Add': set(['ACETAMIDE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G2 - L-Alaninamide - MIX0-569': None, # not in the model
    'G3 - N-Acetyl-L- Glutamic acid - MIX0-570': {
        'Add': set(['ACETYL-GLU[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G4 - L-Arginine - MIX0-571': {
        'Add': set(['ARG[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G5 - Glycine - MIX0-572': {
        'Add': set(['GLY[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G6 - L-Histidine - MIX0-573': {
        'Add': set(['HIS[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G7 - L-Homoserine - MIX0-574': {
        'Add': set(['HOMO-SER[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G8 - Hydroxy- L-Proline - MIX0-796': {
        'Add': set(['4-HYDROXY-L-PROLINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G9 - L-Isoleucine - MIX0-575': {
        'Add': set(['ILE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G10 - L-Leucine - MIX0-576':{
        'Add': set(['LEU[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G11 - L-Lysine - MIX0-577': {
        'Add': set(['LYS[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G12 - L-Methionine - MIX0-578': {
        'Add': set(['MET[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H1 - L-Ornithine - MIX0-579': {
        'Add': set(['L-ORNITHINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H2 - L-Phenylalanine - MIX0-580': {
        'Add': set(['PHE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H3 - L-Pyroglutamic acid - MIX0-581': {
        'Add': set(['5-OXOPROLINE[c]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H4 - L-Valine - MIX0-582': {
        'Add': set(['VAL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H5 - D,L-Carnitine - MIX0-583': { #***
        'Add': set(['Ket0-D-Psicose[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H6 - sec-Butylamine - MIX0-584': {
        'Add': set(['CPD-3627[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H7 - D,L-Octopamine - MIX0-585': {
        'Add': set(['CPD-58[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H8 - Putrescine - MIX0-586': {
        'Add': set(['PUTRESCINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H9 - Dihydroxyacetone - MIX0-587': {
        'Add': set(['DIHYDROXYACETONE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H10 - 2,3-Butanediol - MIX0-588': None, #not in the model
    'H11 - 2,3-Butanedione - MIX0-769': {
        'Add': set(['DIACETYL[c]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H12 - 3-Hydroxy- 2-butanone - MIX0-589': None, # not in the model
}

In [10]:
# run tests
condition_names = []
cp4_oofv = dict({})
plateID_to_condition = dict({})
for condition_name, condition in conditions.items():
    
    # store in dictionary the optimal objective function value
    temp = condition_name.split(' - ')
    plate_ID = temp[0]
    carbon_source = temp[1]
    plateID_to_condition[plate_ID] = carbon_source
    
    # solve the cvxpy problem
    objective_weights = {'secretion': 0.01, 'efficiency': 0.000001, 'kinetics': 0.000001}
    if condition == None:
        cp4_oofv[plate_ID] = None
        continue
    oofv, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                            objective_weights,
                                            uptake_addition=condition['Add'], uptake_removal=condition['Remove'],)    
    
    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp3_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp3_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    cp4_oofv[plate_ID] = oofv
    
    print(f"""Finished enviornment: {condition_name} with objective function value: {oofv}""")

Finished enviornment: B2 - N-Acetyl- Neuraminic acid - MIX0-515 with objective function value: 17691.55802983203
Finished enviornment: B5 - D-Arabinose - MIX0-518 with objective function value: 27159.69057620989
Finished enviornment: B6 - D-Arabitol - MIX0-519 with objective function value: 42321.62630242223
Finished enviornment: B7 - L-Arabitol - MIX0-520 with objective function value: 42321.62630242223
Finished enviornment: B8 - Arbutin - MIX0-521 with objective function value: 42321.62630250762
Finished enviornment: B9 - 2-Deoxy- D-Ribose - MIX0-522 with objective function value: 23364.00991704458
Finished enviornment: B10 - i-Erythritol - MIX0-523 with objective function value: 42321.62630242223
Finished enviornment: B11 - D-Fucose - MIX0-524 with objective function value: 17909.72541156313
Finished enviornment: B12 - 3-O-β-D- Galactopyranosyl- D-Arabinose - MIX0-525 with objective function value: 21927.046840529776
Finished enviornment: C1 - Gentiobiose - MIX0-526 with objective f

In [None]:
df_all['is_new'] = 'Old Reactions'
df_all.loc[ADDED_RXNS, 'is_new'] = 'Heena\'s Reactions'
df_all.loc[fba_new_reaction_ids, 'is_new'] = 'New Reactions'

df_all['kinetic'] = [kinetic[r] if r in kinetic_reaction_ids else False for r in df_all.index]

In [None]:
df_all

In [None]:
%store -r df_gene_usage genes_to_enzymes df_all_gene_usage new_genes

In [None]:
conditions_previous = ['basal', 'acetate', 'rich', 'anaerobic_basal', 'anaerobic_acetate', 'anaerobic_rich']
conditions_all = conditions_previous + condition_names

df_all_gene_usage = df_all_gene_usage.copy()

gene_used_before = df_gene_usage['is_used']
all_gene_used_before = df_all_gene_usage['is_used']

df_all_gene_usage = df_all_gene_usage.drop(['is_used'], axis = 1)
df_all_gene_usage_cp4 = df_all_gene_usage.copy()
reaction_catalysts = metabolism.parameters["reaction_catalysts"]

for condition in condition_names:
    df_all_gene_usage_cp4[condition] = 0
    new_reaction_usage = df_all.loc[fba_new_reaction_ids, condition]
    for rxn in fba_new_reaction_ids:
        is_used = new_reaction_usage.loc[rxn]
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                if is_used:
                    df_all_gene_usage_cp4.loc[gene, condition] += 1
                if rxn in kinetic_reaction_ids:
                    df_all_gene_usage_cp4.loc[gene, 'has_kinetic'] = True


# tally usage
df_all_gene_usage_cp4['is_used'] = np.any(df_all_gene_usage_cp4[conditions_all] > 0, axis=1)
df_gene_usage_cp4 = df_all_gene_usage_cp4.loc[new_genes,:]

In [None]:
# previous % new gene usage
num_genes = len(gene_used_before)
assert len(gene_used_before) == len(df_gene_usage_cp4['is_used']), 'Different number of new genes'

perc_gene_usage_prev = sum(gene_used_before)/num_genes
perc_gene_usage_new  = sum(df_gene_usage_cp4['is_used'])/num_genes

print(f'% new genes usage before checkpoints is: {perc_gene_usage_prev: 0.2%}')
print(f'          % new genes usage added through microarray 2 is: {perc_gene_usage_new: 0.2%}')

# Add checkpoint 2 and checkpoint4 reactions and condition testings

In [None]:
conditions = {
    'EG10593-α-D-galactopyranose': {
        'Add': set(['ALPHA-D-GALACTOSE[e]']),
        'Remove': set(['GLC[p]']),
        'Remove Reaction': ['TRANS-RXN-21'] #might also need to find solution to the "disabling" will i need to list them all out here
    },
    'EG11869-2-dehydro-3-deoxy-D-gluconate': {
        'Add': set(['2-DEHYDRO-3-DEOXY-D-GLUCONATE[p]']),
        'Remove': set(['GLC[p]']),
    },
    'EG12495-L-ascorbate': {
        'Add': set(['ASCORBATE[p]']),
        'Remove': set([]),

    },
    'EG20053-D-galactonate':{
        'Add': set(['D-GALACTONATE[e]']),
        'Remove': set([]),

    },
    'EG11700-1-(β-D ribofuranosyl)nicotinamide':{
        'Add': set(['NICOTINAMIDE_RIBOSE[p]']),
        'Remove': set(['GLC[p]']),
    },
    'EG12281-L-glutamate':{
        'Add': set(['GLT[p]']),
        'Remove': set(['GLC[p]']),
        'Remove Reaction': ['TRANS-RXN-261', 'TRANS-RXN-16']
    },
    'EG10406-L-glutamate':{
        'Add': set(['GLT[p]']),
        'Remove': set(['GLC[p]']),
        'Remove Reaction': ['TRANS-RXN-261', 'TRANS-RXN-162']
    },
}

In [None]:
condition_names = []
for condition_name, condition in conditions.items():
    
    if 'Remove Reaction' in condition:
        remove_reaction = condition['Remove Reaction']
    else:
        remove_reaction = None
    
    objective_weights = {'secretion': 0.01, 'efficiency': 0.000001, 'kinetics': 0.000001}
    
    _, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                            objective_weights,
                                            uptake_addition=condition['Add'], uptake_removal=condition['Remove'],
                                            remove_reaction=remove_reaction,
    )    
    
    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp2_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp2_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    
    print(f"""Finished enviornment: {condition_name}""")

# calculate % gene usage

In [None]:
df_all_gene_usage_cp4_w_cp23 = df_all_gene_usage_cp4.copy()
reaction_catalysts = metabolism.parameters["reaction_catalysts"]
conditions_all = conditions_all + condition_names
for condition in condition_names:
    df_all_gene_usage_cp4_w_cp23[condition] = 0
    new_reaction_usage = df_all.loc[fba_new_reaction_ids, condition]
    for rxn in fba_new_reaction_ids:
        is_used = new_reaction_usage.loc[rxn]
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                if is_used:
                    df_all_gene_usage_cp4_w_cp23.loc[gene, condition] += 1
                if rxn in kinetic_reaction_ids:
                    df_all_gene_usage_cp4_w_cp23.loc[gene, 'has_kinetic'] = True


# tally usage
df_all_gene_usage_cp4_w_cp23['is_used'] = np.any(df_all_gene_usage_cp4_w_cp23[conditions_all] > 0, axis=1)
df_gene_usage_ccp4_w_cp23 = df_all_gene_usage_cp4_w_cp23.loc[new_genes,:]

In [None]:
# previous % new gene usage
num_genes = len(gene_used_before)
assert len(gene_used_before) == len(df_gene_usage_cp4['is_used']),'Different number of new genes'

perc_gene_usage_prev = sum(gene_used_before)/num_genes
perc_gene_usage_cp3_w_cp23  = sum(df_all_gene_usage_cp4_w_cp23['is_used'])/num_genes

print(f'% new genes usage before checkpoint 1 and 2 is: {perc_gene_usage_prev: 0.2%}')
print(f'         %new genes enabled by microarray 2 is: {perc_gene_usage_new: 0.2%}')
print(f'          % new genes usage at checkpoint 3 is: {perc_gene_usage_cp3_w_cp23: 0.2%}')

In [None]:
%store -r df_all_gene_usage
# % all gene usage
num_genes = len(df_all_gene_usage)
perc_all_gene_usage_basal = sum(df_all_gene_usage_cp4_w_cp23.basal > 0)/num_genes * 100
perc_all_gene_usage_basic_conditions = sum(df_all_gene_usage['is_used'])/num_genes * 100
perc_all_gene_usage_cp3_w_cp2  = sum(df_all_gene_usage_cp4_w_cp23['is_used'])/num_genes * 100

perc_all = [perc_all_gene_usage_basal, perc_all_gene_usage_basic_conditions, perc_all_gene_usage_cp3_w_cp2]
perc_all

# Plot 1: Gene Usage Histogram at checkpoint 4

In [None]:
%store -r df_all_gene_usage_cp1 df_all_gene_usage_cp2 df_all_gene_usage_cp3_w_cp2

In [None]:
# % new gene usage
df_gene_usage = df_all_gene_usage.loc[new_genes,:]
df_gene_usage_cp1 = df_all_gene_usage_cp1.loc[new_genes,:]
df_gene_usage_cp2 = df_all_gene_usage_cp2.loc[new_genes,:]
df_gene_usage_cp3 = df_all_gene_usage_cp3_w_cp2.loc[new_genes,:]

num_genes = len(df_gene_usage)
perc_gene_usage_basal = sum(df_gene_usage_cp2.basal > 0)/num_genes * 100
perc_gene_usage_basic_conditions = sum(gene_used_before)/num_genes * 100
perc_gene_usage_cp1  = sum(df_gene_usage_cp1['is_used'])/num_genes * 100
perc_gene_usage_cp2  = sum(df_gene_usage_cp2['is_used'])/num_genes * 100
perc_gene_usage_cp3  = sum(df_gene_usage_cp3_w_cp2['is_used'])/num_genes * 100
perc_gene_usage_cp4  = sum(df_gene_usage_cp4_w_cp23['is_used'])/num_genes * 100

perc = [perc_gene_usage_basal, perc_gene_usage_basic_conditions, perc_gene_usage_cp1, perc_gene_usage_cp2, perc_gene_usage_cp3, perc_gene_usage_cp4]
x_label = ['Basal', 'Basal, Acetate, Rich, Anaerobic', 'Checkpoint 1', 'Checkpoint 2', 'Cp2 + Microarray 1']

In [None]:
# % all gene usage
num_genes = len(df_all_gene_usage)
perc_all_gene_usage_basal = sum(df_all_gene_usage_cp2.basal > 0)/num_genes * 100
perc_all_gene_usage_basic_conditions = sum(df_all_gene_usage['is_used'])/num_genes * 100
perc_all_gene_usage_cp1  = sum(df_all_gene_usage_cp1['is_used'])/num_genes * 100
perc_all_gene_usage_cp2  = sum(df_all_gene_usage_cp2['is_used'])/num_genes * 100
perc_all_gene_usage_cp3_w_cp2  = sum(df_all_gene_usage_cp3_w_cp2['is_used'])/num_genes * 100


perc_all = [perc_all_gene_usage_basal, perc_all_gene_usage_basic_conditions, perc_all_gene_usage_cp1, perc_all_gene_usage_cp2, perc_all_gene_usage_cp3_w_cp2]

In [None]:
# Create a DataFrame
df = pd.DataFrame({
    "Condition": x_label,
    "New Metabolic Gene": perc,
    "All Metabolic Gene": perc_all,
})

# Reshape to long format
df_long = df.melt(id_vars="Condition", 
                  value_vars=["New Metabolic Gene", "All Metabolic Gene"],
                  var_name="Gene Group",
                  value_name="Percent Usage")

# Create grouped bar plot
fig = px.bar(
    df_long,
    x="Condition",
    y="Percent Usage",
    color="Gene Group",              # creates a legend
    barmode="group",                 # side-by-side bars
    text="Percent Usage",
    title="New vs All Metabolic Gene Usage by Condition",
    labels={"Percent Usage": "Percent gene usage (%)"},
    color_discrete_map={
        "New Metabolic Gene": "#4C78A8",
        "All Metabolic Gene": "#c26426"
    }
)

# Customize appearance
fig.update_traces(texttemplate='%{text:.2f}%', textposition='outside')
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    yaxis_title="Percent gene usage (%)",
    yaxis=dict(range=[0, 45]),
    xaxis_title=None,
    xaxis_tickangle=-35,
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(t=50, b=50, l=50, r=50),
    width=800,   # width in pixels
    height=500
)

fig.show(renderer='browser')
# save 
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/gene_usage_histogram_cp3.png", scale=5, width=800, height=500)

# Plot 2: Histogram showing optimal objective function value distribution

In [None]:
oofv_val = list(cp3_oofv.values())
oofv_basal, _, _, _, _, _ = test_NetworkFlowModel(objective_weights)

In [None]:
values = [v if v is not None else np.nan for v in oofv_val]

# Create histogram
hist = go.Histogram(
    x=values,
    nbinsx=50,
    marker_color='lightblue',
    name="Distribution"
)

# Highlight the first value
vline = go.Scatter(
    x=[values[0], values[0]],
    y=[0, 15],  
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Neg Control"],
    textposition="bottom center",
    showlegend=False
)

# Highlight the basal value
vline2 = go.Scatter(
    x=[oofv_basal, oofv_basal],
    y=[0, 20],  
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Basal"],
    textposition="bottom center",
    showlegend=False
)

# Combine and plot
fig = go.Figure(data=[hist, vline, vline2])
fig.add_vrect(x0=40000, x1=43500, line_width=0, fillcolor="LightSalmon", opacity=0.1, annotation_text='No Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=15000, x1=26000, line_width=0, fillcolor="aqua", opacity=0.1, annotation_text='Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=26500, x1=39500, line_width=0, fillcolor="yellow", opacity=0.1, annotation_text='Slow Growth', annotation_position="top", annotation_font_size = 16, layer="below")

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Histogram with First Value Highlighted",
    xaxis_title="Optimal Objective Function Value",
    yaxis_title="Count",
    bargap=0.05
)
fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/oofv_histogram_cp3.png", scale=5, width=1000, height=500)

# Plot 3: Heatmap showing 'growth' 'no growth'

In [None]:
def discrete_colorscale(bvals, colors):
    """
    bvals - list of values bounding intervals/ranges of interest
    colors - list of rgb or hex colorcodes for values in [bvals[k], bvals[k+1]],0<=k < len(bvals)-1
    returns the plotly  discrete colorscale
    """
    if len(bvals) != len(colors)+1:
        raise ValueError('len(boundary values) should be equal to  len(colors)+1')
    bvals = sorted(bvals)     
    nvals = [(v-bvals[0])/(bvals[-1]-bvals[0]) for v in bvals]  #normalized values
    
    dcolorscale = [] #discrete colorscale
    for k in range(len(colors)):
        dcolorscale.extend([[nvals[k], colors[k]], [nvals[k+1], colors[k]]])
    return dcolorscale  

In [None]:
bvals = [14000, 25500, 40000, 43500]
colors = ['#dda0dd', '#ffb6c2', '#fcf3bf']
dcolorsc = discrete_colorscale(bvals, colors)
dcolorsc

In [None]:
bvals = np.array(bvals)
tickvals = [np.mean(bvals[k:k+2]) for k in range(len(bvals)-1)] #position with respect to bvals where ticktext is displayed
ticktext = [f'<{bvals[1]}'] + [f'{bvals[k]}-{bvals[k+1]}' for k in range(1, len(bvals)-2)]+[f'>{bvals[-2]}']
tickvals

In [None]:
text = "hellowwww"
a = text.split('w')
'<br>'.join(a)

In [None]:
# Plate dimensions
rows = list("ABCDEFGH")
cols = list(range(1, 13))

# Create a 2D array with None as default
matrix = []
label = []
for r in rows:
    row_data = []
    row_text = []
    for c in cols:
        key = f"{r}{c}"
        row_data.append(cp3_oofv.get(key))  # Fill missing wells with None
        text = plateID_to_condition.get(key)
        text_segment = text.split(' ')
        text = '<br>'.join(text_segment)
        row_text.append(text)
    matrix.append(row_data)
    label.append(row_text)

In [None]:
import plotly.graph_objects as go
heatmap = go.Heatmap(z=matrix, 
                     x=[str(c) for c in cols],
                     y=rows,
                     text=label,
                     texttemplate="%{text}",
                     textfont={"size": 10},
                     colorscale = dcolorsc, 
                     colorbar = dict(thickness=25, 
                                     tickvals=tickvals, 
                                     ticktext=ticktext),
                     )

fig = go.Figure(data=[heatmap])
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Plate Reader Heatmap with Values",
    xaxis_title="Column",
    yaxis_title="Row",
    yaxis_autorange='reversed'  
)

fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/heatmap_microarray1_C_source.png", scale=5, width=1200, height=800)

# Plot 4: combined kinetic sim versus target plot - no more shiny

In [None]:
kc_target_cp3 = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()
cols = conditions_all[6:] + ['is_new', 'kinetic']
df_kc_sim_cp3 = df_all.loc[kinetic_reaction_ids, cols]

In [None]:
df_kc_sim_cp3_log = df_kc_sim_cp3.copy()
df_kc_sim_cp3_log['kinetic'] = df_kc_sim_cp3_log['kinetic'].apply(pd.to_numeric, errors='coerce')
cols = conditions_all[6:] + [ 'kinetic']
df_kc_sim_cp3_log.loc[:,cols] += 1e-6
df_kc_sim_cp3_log.loc[:,cols] = np.log10(df_kc_sim_cp3_log.loc[:,cols])
df_kc_sim_cp3_log

In [None]:
# work with df_kc_cp1 or df_kc_cp1_log
# create a dataframe with rows as reactions, col1 as flux closest to target, col2 being condition(s) that have that flux
df_kc_cp3_log_combined = pd.DataFrame({'flux': np.NaN, 'condition': 'none', 'target': df_kc_sim_cp3_log['kinetic'], 'is_new': df_kc_sim_cp3_log['is_new']}, index=kinetic_reaction_ids)
condition_names = conditions_all[6:]
# create temporary dataframe that tracks the difference between sim flux and target
temp = df_kc_sim_cp3_log.copy()
temp.replace(0, np.nan, inplace=True)
for condition in condition_names:
    temp.loc[:, condition] = np.abs(temp.loc[:, condition] - df_kc_sim_cp3_log['kinetic'].values)

# parse by reaction (index) to find the conditions with flux closest to target (minimal difference)
for reaction in temp.index:
    # get the condition with the minimum difference
    min_diff = temp.loc[reaction, condition_names].min()
    # get the condition(s) with the minimum difference
    conditions = temp.loc[reaction, condition_names][temp.loc[reaction, condition_names] == min_diff].index.tolist()
    # update the dataframe
    if np.all(df_kc_sim_cp3_log.loc[reaction,condition_names] == -6):
        df_kc_cp3_log_combined.loc[reaction, 'flux'] = -6
    else:
        df_kc_cp3_log_combined.loc[reaction, 'flux'] = df_kc_sim_cp3_log.loc[reaction,conditions[0]]
        if conditions == condition_names:
            df_kc_cp3_log_combined.loc[reaction, 'condition'] = 'all'
        else:
            df_kc_cp3_log_combined.loc[reaction, 'condition'] = ', '.join(conditions)
# 
# df_kc_cp1_log_combined = df_kc_cp1_combined.copy()         
# df_kc_cp1_log_combined['target'] = df_kc_cp1_log_combined['target'].apply(pd.to_numeric, errors='coerce')
# df_kc_cp1_log_combined.loc[:,['flux','target']] += 1e-6
# df_kc_cp1_log_combined.loc[:,['flux','target']] = np.log10(df_kc_cp1_log_combined.loc[:,['flux','target']])
# 
df_kc_cp3_log_combined.to_csv('notebooks/Heena notebooks/Metabolism_New Genes/df_kc_cp3_combined.csv', index=True)

In [None]:
df_kc_cp3_log_combined

In [None]:
# plotly figure
import plotly.express as px
fig = px.scatter(
    df_kc_cp3_log_combined,
    x='target',
    y='flux',
    color='is_new',
    hover_name=df_kc_cp3_log_combined.index,
    hover_data=['condition'],
    labels={'flux': 'Simulated Flux (log10)', 'target': 'Target Flux (log10)'},
    title='Kinetic Simulated Flux vs Target Flux at Checkpoint 3',
)
fig.update_traces(marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis_title="Target Flux (log10)",
    yaxis_title="Simulated Flux (log10)",
    legend_title_text='New Reactions',
    width=800,
    height=600
)
fig.show(renderer='browser')