Description:

This script aims to test all carbon source media defined by Ecocyc (https://ecocyc.org/ECOLI/new-image?object=Growth-Media): individual growth media (the ones with data), microarray Plate ID: Biolog PM1 and PM2. This script will use the basal simulation after cp2 as the base, and run FBA external to the model to see if model solves and see if there is growth (needs definition) and analyze gene usage. 

In [4]:
import numpy as np
import ast
import pandas as pd
import os
import matplotlib.pyplot as plt
import dill
import requests
import xmltodict
import cvxpy as cp
import itertools
import networkx as nx
import plotly.express as px

from scipy.special import logsumexp

%matplotlib inline
# sns.set(style='dar|kgrid', palette='viridis', context='talk')

os.chdir(os.path.expanduser('~/dev/vivarium-ecoli'))

from ecoli.processes.metabolism_redux_classic import NetworkFlowModel, FlowResult
from ecoli.library.schema import numpy_schema, bulk_name_to_idx, listener_schema, counts
from ecoli.processes.registries import topology_registry
TOPOLOGY = topology_registry.access("ecoli-metabolism-redux")
import plotly.express as px

In [5]:
# load checkpoint 2 model
time = '400'
date = '2025-05-15'
experiment = 'NEW_NewGenes_checkpoint2'
condition = 'basal'
entry = f'{experiment}_{time}_{date}'
folder = f'out/metabolism-comparison/{condition}/{entry}/'

output = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output['agents']['0']
fba = output['listeners']['fba_results']
bulk = pd.DataFrame(output['bulk'])
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

In [6]:
# get commonly stored variables
metabolism = agent['ecoli-metabolism-redux-classic']
stoichiometry = metabolism.stoichiometry.copy()
reaction_names = metabolism.reaction_names
fba_new_reaction_ids = metabolism.parameters["fba_new_reaction_ids"]
fba_reaction_ids_to_base_reaction_ids = metabolism.parameters['fba_reaction_ids_to_base_reaction_ids']
metabolites = metabolism.metabolite_names.copy()
binary_kinetic_idx = metabolism.binary_kinetic_idx
exchange_molecules = metabolism.exchange_molecules

S = stoichiometry .copy()
S = pd.DataFrame(S, index=metabolites , columns=reaction_names )
homeostatic_count = pd.DataFrame(fba["homeostatic_metabolite_counts"], columns=metabolism.homeostatic_metabolites).loc[24, :]
homeostatic = pd.DataFrame(fba["target_homeostatic_dmdt"], columns=metabolism.homeostatic_metabolites).loc[24, :]
maintenance = pd.DataFrame(fba["maintenance_target"][1:], columns=['maintenance_reaction']).iat[24, 0]
kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy()

In [7]:
# parameters that are the same across the two simulation
kinetic_reaction_ids = metabolism.kinetic_constraint_reactions
allowed_exchange_uptake = metabolism.allowed_exchange_uptake
FREE_RXNS = ["TRANS-RXN-145", "TRANS-RXN0-545", "TRANS-RXN0-474"]
ADDED_RXNS = ['HS-TRANSPORT-RXN-CPD0-1202', 'HS-TRANSPORT-RXN-CPD0-1202 (reverse)',
                   'HS-TRANSPORT-RXN[CCO-OUTER-MEM]-OXAMATE', 'HS-TRANSPORT-RXN[CCO-OUTER-MEM]-OXAMATE (reverse)',
                   'HS-TRANSPORT-RXN[CCO-PM-BAC-NEG]-OXAMATE', 'HS-TRANSPORT-RXN[CCO-PM-BAC-NEG]-OXAMATE (reverse)',
                   'HS-BETA-GLUCURONID-RXN_CPD-3611//METOH', 'HS-SPONTANEOUS-TRANSPORT[CCO-OUTER-MEM]-HCN', 'HS-SPONTANEOUS-TRANSPORT[CCO-OUTER-MEM]-HCN (reverse)',
                   'HS-SPONTANEOUS-TRANSPORT[CCO-PM-BAC-NEG]-HCN','HS-SPONTANEOUS-TRANSPORT[CCO-PM-BAC-NEG]-HCN (reverse)']

# Define functions for testing

In [23]:
S_met, RXNS = get_subset_S(S, ['ACET[c]'])
S_met

Unnamed: 0,ACECOATRANS-RXN-BUTYRYL-COA/ACET//BUTYRIC_ACID/ACETYL-COA.42.,ACECOATRANS-RXN-CPD-10267/ACET//CPD-3617/ACETYL-COA.36.,ACECOATRANS-RXN-CPD-196/ACET//CPD-195/ACETYL-COA.33.,ACECOATRANS-RXN-HEXANOYL-COA/ACET//HEXANOATE/ACETYL-COA.40.,ACECOATRANS-RXN-LAUROYLCOA-CPD/ACET//DODECANOATE/ACETYL-COA.44.,ACECOATRANS-RXN-PALMITYL-COA/ACET//PALMITATE/ACETYL-COA.40.,ACECOATRANS-RXN-PROPIONYL-COA/ACET//PROPIONATE/ACETYL-COA.42.,ACECOATRANS-RXN-STEAROYL-COA/ACET//STEARIC_ACID/ACETYL-COA.43.,ACECOATRANS-RXN-TETRADECANOYL-COA/ACET//CPD-7836/ACETYL-COA.44.,ACETATE--COA-LIGASE-RXN,...,RXN0-5414,RXN0-7013,RXN0-7075,RXN0-7075 (reverse),RXN0-7391,SULFOCYS-RXN,SULFOCYS-RXN (reverse),TRANS-RXN0-571,TRANS-RXN0-571 (reverse),UDPACYLGLCNACDEACETYL-RXN
ACET[c],-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,1,1,1,-1,1,1,-1,1,-1,1


In [24]:
df_all.loc[RXNS]

ACECOATRANS-RXN-BUTYRYL-COA/ACET//BUTYRIC_ACID/ACETYL-COA.42.                  0.0000
ACECOATRANS-RXN-CPD-10267/ACET//CPD-3617/ACETYL-COA.36.                        0.0000
ACECOATRANS-RXN-CPD-196/ACET//CPD-195/ACETYL-COA.33.                           0.0000
ACECOATRANS-RXN-HEXANOYL-COA/ACET//HEXANOATE/ACETYL-COA.40.                    0.0000
ACECOATRANS-RXN-LAUROYLCOA-CPD/ACET//DODECANOATE/ACETYL-COA.44.                0.0000
ACECOATRANS-RXN-PALMITYL-COA/ACET//PALMITATE/ACETYL-COA.40.                    0.0000
ACECOATRANS-RXN-PROPIONYL-COA/ACET//PROPIONATE/ACETYL-COA.42.              23737.2825
ACECOATRANS-RXN-STEAROYL-COA/ACET//STEARIC_ACID/ACETYL-COA.43.                 0.0000
ACECOATRANS-RXN-TETRADECANOYL-COA/ACET//CPD-7836/ACETYL-COA.44.                0.0000
ACETATE--COA-LIGASE-RXN                                                        0.0000
ACETATEKIN-RXN                                                               145.9300
ACETATEKIN-RXN (reverse)                              

In [8]:
def get_subset_S(S, met_of_interest):
    S_met = S.loc[met_of_interest, :]
    S_met = S_met.loc[:,~np.all(S_met == 0, axis=0)]
    return S_met, S_met.columns

def get_keys(dict, value):
    return [key for key in dict if dict[key] == value]

def test_NetworkFlowModel(objective_weights,
                          uptake_addition = set([]), uptake_removal = set([]), new_exchange_molecules = set([]),
                          add_metabolite = None, add_reaction = None, add_kinetic = None, remove_reaction = None, force_reaction = None, solver_choice=cp.GLOP):
    # update exchanges
    uptake = metabolism.allowed_exchange_uptake.copy()
    uptake = set(uptake)
    uptake = uptake | uptake_addition
    uptake = uptake - uptake_removal
    
    exchange_molecules = metabolism.exchange_molecules.copy()
    exchange_molecules = exchange_molecules | new_exchange_molecules
    
    # update stoichiometry
    reaction_names = metabolism.reaction_names.copy()
    kinetic_reaction_ids = metabolism.kinetic_constraint_reactions.copy()
    kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy()
    metabolites = metabolism.metabolite_names.copy()
    
    S_new = stoichiometry.copy()
    
    if add_metabolite is not None: # add to metabolites list because they are currently not included in the model
        for m in add_metabolite:
            if m not in metabolites:
                metabolites.append(m)
        # append rows of zeros to S_new of length add_metabolite
        S_new = np.concatenate((S_new, np.zeros((len(add_metabolite), S_new.shape[1]))), axis=0)
    
    if add_reaction is not None:
        # assert add_reaction is a dictionary
        assert isinstance(add_reaction, dict)
        
        for r,s in add_reaction.items():
            if r not in reaction_names:
                reaction_names.append(r)
            # append columns of reaction stoich to S_new of length add_reaction
            new_reaction = np.zeros((S_new.shape[0], 1))
            for m, v in s.items():
                new_reaction[metabolites.index(m), 0] = v
            S_new = np.concatenate((S_new, new_reaction), axis=1)
    
    if add_kinetic is not None:
        # assert add_kinetic is a dictionary
        assert isinstance(add_kinetic, dict)
        
        for r, v in add_kinetic.items():
            if r not in kinetic_reaction_ids:
                kinetic_reaction_ids.append(r)
                kinetic[r] = v
                
    if remove_reaction is not None:
        for r in remove_reaction:
            r_idx = reaction_names.index(r)
            S_new = np.delete(S_new, r_idx, axis=1)
            reaction_names.remove(r)
            if r in kinetic_reaction_ids:
                kinetic_reaction_ids.remove(r)
                del kinetic[r]
                
    if force_reaction is not None:
        force_reaction_idx = np.array([reaction_names.index(r) for r in force_reaction])
    else:
        force_reaction_idx = force_reaction

    # Solve NetworkFlowModel
    model = NetworkFlowModel(
            stoich_arr=S_new,
            metabolites=metabolites,
            reactions=reaction_names,
            homeostatic_metabolites=metabolism.homeostatic_metabolites,
            kinetic_reactions=kinetic_reaction_ids,
            free_reactions=FREE_RXNS)
    model.set_up_exchanges(exchanges=exchange_molecules, uptakes=uptake)
    solution: FlowResult = model.solve(
            homeostatic_concs=homeostatic_count * metabolism.counts_to_molar.asNumber(), # in conc
            homeostatic_dm_targets=np.array(list(dict(homeostatic).values())), # *10^7
            maintenance_target=maintenance, # *10^6 ish
            kinetic_targets=np.array(list(dict(kinetic).values())), # *10^6 ish
            # binary_kinetic_idx=binary_kinetic_idx, #7646
            binary_kinetic_idx=None,
            force_flow_idx=force_reaction_idx,
            objective_weights=objective_weights, #same
            upper_flux_bound= 1000000000, # increase to 10^9 because notebook runs FlowResult using Counts, WC runs using conc.   
            solver=solver_choice) #SCS. ECOS, MOSEK
    return solution.objective, solution.velocities, reaction_names, S_new, metabolites, kinetic

# Create Dataframes to Store Results

In [14]:
# all reactions
sim = pd.DataFrame(fba["estimated_fluxes"], columns= reaction_names).mean(axis=0).copy()

# kinetic reactions
kc_target_cp2 = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()

In [15]:
df_all = sim.copy()
df_all.columns = ['sim_cp2_basal']

In [16]:
df_all

1-ACYLGLYCEROL-3-P-ACYLTRANSFER-RXN                                                                               0.000000e+00
1.1.1.127-RXN                                                                                                     0.000000e+00
1.1.1.127-RXN (reverse)                                                                                           0.000000e+00
1.1.1.215-RXN (reverse)                                                                                           0.000000e+00
1.1.1.251-RXN                                                                                                     0.000000e+00
                                                                                                                      ...     
XYLULOKIN-RXN-CPD-24961/ATP//XYLULOSE-5-PHOSPHATE/ADP/PROTON.47.                                                  0.000000e+00
YIAE1-RXN (reverse)                                                                                            

# Network Flow New Enviornments

In [9]:
# microarray plate 1: ~ tests 96 carbon sources
conditions = {
    'A1 - Carbon Negative Control - MIX0-80': {
        'Add': set([]),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A2 - L-Arabinose* - MIX0-420': {
        'Add': set(['L-ARABINOSE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A3 - N-Acetyl-D- Glucosamine* - MIX0-421': {
        'Add': set(['N-acetyl-D-glucosamine[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A4 - D-Saccharic acid - MIX0-422': {
        'Add': set(['D-GLUCARATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A5 - Succinic acid - MIX0-423': {
        'Add': set(['SUC[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A6 - D-Galactose* - MIX0-424': {
        'Add': set(['GALACTOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A7 - L-Aspartic acid - MIX0-425 ': {
        'Add': set(['L-ASPARTATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A8 - L-Proline - MIX0-426': {
        'Add': set(['PRO[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A9 - D-Alanine - MIX0-427': {
        'Add': set(['D-ALANINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A10 - D-Trehalose - MIX0-428': {
        'Add': set(['TREHALOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A11 - D-Mannose - MIX0-429': {
        'Add': set(['CPD-13559[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'A12 - Dulcitol - MIX0-430': {
        'Add': set(['GALACTITOL[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B1 - D-Serine - MIX0-431': {
        'Add': set(['D-SERINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B2 - D-Sorbitol - MIX0-432': {
        'Add': set(['SORBITOL[e]']),
        'Remove': set(['GLC[p]','CA+2[p]']),
    },
    'B3 - Glycerol - MIX0-433': {
        'Add': set(['GLYCEROL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B4 - L-Fucose* - MIX0-434': {
        'Add': set(['L-fucoses[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B5 - D-Glucuronic acid* - MIX0-435': {
        'Add': set(['CPD-15530[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B6 - D-Gluconic acid - MIX0-436': {
        'Add': set(['GLUCONATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B7 - DL-α- Glycerol Phosphate - MIX0-437': {
        'Add': set(['GLYCEROL-3P[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B8 - D-Xylose* - MIX0-438': {
        'Add': set(['CPD-15377[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B9 - L-Lactic acid - MIX0-439': {
        'Add': set(['L-LACTATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, 
    'B10 - Formic acid - MIX0-440': {
        'Add': set(['FORMATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B11 - D-Mannitol - MIX0-441': {
        'Add': set(['MANNITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'B12 - L-Glutamic acid - MIX0-442': {
        'Add': set(['GLT[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C1 - D-Glucose- 6-Phosphate* - MIX0-443': {
        'Add': set(['GLC-6-P[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C2 - D-Galactonic acid-γ- Lactone - MIX0-444': {
        'Add': set(['D-GALACTONO-1-4-LACTONE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C3 - DL-Malic acid - MIX0-445': {
        'Add': set(['MAL[e]', 'CPD-660[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C4 - D-Ribose* - MIX0-446': {
        'Add': set(['CPD0-1110[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C5 - Tween 20 - MIX0-797' : None, # not in the model
    'C6 - L-Rhamnose* - MIX0-447': {
        'Add': set(['RHAMNOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C7 - D-Fructose - MIX0-448': {
        'Add': set(['BETA-D-FRUCTOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C8 - Acetic acid - MIX0-449': {
        'Add': set(['ACET[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C9 - α-D-Glucose - MIX0-450': {
        'Add': set(['ALPHA-GLUCOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C10 - Maltose - MIX0-451': {
        'Add': set(['MALTOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C11 - D-Melibiose - MIX0-452': {
        'Add': set(['MELIBIOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'C12 - Thymidine - MIX0-453': {
        'Add': set(['THYMIDINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D1 - L-Asparagine - MIX0-454': {
        'Add': set(['ASN[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D2 - D-Aspartic acid - MIX0-455': {
        'Add': set(['CPD-302[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D3 - D-Glucosaminic acid - MIX0-456': {
        'Add': set(['GLUCOSAMINATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D4 - 1,2-Propanediol - MIX0-457': {
        'Add': set(['PROPANE-1-2-DIOL[c]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D5 - Tween 40 - MIX0-798': None, # not in the model
    'D6 - α-Ketoglutaric acid - MIX0-458': {
        'Add': set(['2-KETOGLUTARATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D7 - α-Ketobutyric acid - MIX0-459': {
        'Add': set(['2-OXOBUTANOATE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D8 - α-Methyl-D- Galactoside - MIX0-786': {
        'Add': set(['CPD-3565[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D9 - α-D-Lactose - MIX0-460': {
        'Add': set(['Alpha-lactose[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D10 - Lactulose - MIX0-461': {
        'Add': set(['CPD-3561[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D11 - Sucrose - MIX0-462': {
        'Add': set(['SUCROSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'D12 - Uridine - MIX0-463': {
        'Add': set(['URIDINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E1 - L-Glutamine - MIX0-464': {
        'Add': set(['GLN[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),        
    },
    'E2 - M-Tartaric acid - MIX0-465': None, # not in the model
    'E3 - D-Glucose- 1-Phosphate - MIX0-466': {
        'Add': set(['GLC-1-P[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E4 - D-Fructose- 6-Phosphate - MIX0-467': {
        'Add': set(['FRUCTOSE-6P[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E5 - Tween 80 - MIX0-799': None, # not in the model
    'E6 - α-Hydroxy glutaric_acid γ-Lactone - MIX0-793': {
        'Add': set(['CPD-13414[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E7 - α-Hydroxy butyric_acid - MIX0-790': {
        'Add': set(['CPD-3564[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E8 - β-Methyl-D- Glucoside - MIX0-784': {
        'Add': set(['CPD-3570[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E9 - Adonitol - MIX0-468': {
        'Add': set(['RIBITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E10 - Maltotriose - MIX0-469': {
        'Add': set(['MALTOTRIOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E11 - 2-Deoxyadenosine - MIX0-470': {
        'Add': set(['DEOXYADENOSINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'E12 - Adenosine - MIX0-471': {
        'Add': set(['ADENOSINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F1 - Gly-Asp - MIX0-778': {
        'Add': set(['CPD-13406[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F2 - Citric acid - MIX0-472': {
        'Add': set(['CIT[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F3 - M-Inositol  - MIX0-473': {
        'Add': set(['MYO-INOSITOL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # lacks uptake and metabolic pathway
    'F4 - D-Threonine - MIX0-474': {
        'Add': set(['D-THREONINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F5 - Fumaric acid - MIX0-475': {
        'Add': set(['FUM[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F6 - Bromosuccinic acid - MIX0-779': None, # not in the model
    'F7 - Propionic acid - MIX0-476': {
        'Add': set(['PROPIONATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F8 - Mucic acid - MIX0-477': {
        'Add': set(['D-GALACTARATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F9 - Glycolic acid - MIX0-478': {
        'Add': set(['GLYCOLLATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F10 - Glyoxylic acid - MIX0-479': {
        'Add': set(['GLYOX[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F11 - D-Cellobiose - MIX0-480': {
        'Add': set(['CELLOBIOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'F12 - Inosine - MIX0-481': {
        'Add': set(['INOSINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G1 - Gly-Glu - MIX0-482': {
        'Add': set(['CPD-3569[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G2 - Tricarballylic acid - MIX0-483': None, # not in the model
    'G3 - L-Serine - MIX0-484': {
        'Add': set(['SER[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G4 - L-Threonine - MIX0-485': {
        'Add': set(['THR[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G5 - L-Alanine - MIX0-486': {
        'Add': set(['L-ALPHA-ALANINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G6 - Ala-Gly - MIX0-772': {
        'Add': set(['ALA-GLY[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G7 - Acetoacetic acid - MIX0-487': {
        'Add': set(['3-KETOBUTYRATE[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G8 - N-Acetyl-D Mannosamine* - MIX0-488': {
        'Add': set(['N-acetyl-D-mannosamine[p]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G9 - Mono- Methylsuccinate - MIX0-489': None, # not in the model
    'G10 - Methyl pyruvate - MIX0-490': None, # not in the model
    'G11 - D-Malic acid - MIX0-491': {
        'Add': set(['CPD-660[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'G12 - L-Malic acid - MIX0-492': {
        'Add': set(['MAL[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H1 - Gly-Pro - MIX0-493': {
        'Add': set(['CPD-10814[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H2 - p-Hydroxy phenyl Acetic_acid - MIX0-494': {
        'Add': set(['4-HYDROXYPHENYLACETATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # lacks uptake and metabolic pathway
    'H3 - m-Hydroxy phenyl Acetic_acid - MIX0-495': {
        'Add': set(['3-HYDROXYPHENYLACETATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # lacks uptake and metabolic pathway
    'H4 - Tyramine - MIX0-496': {
        'Add': set(['TYRAMINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # lacks uptake and metabolic pathway
    'H5 - D-Psicose - MIX0-497': {
        'Add': set(['Ket0-D-Psicose[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    }, # lacks uptake and metabolic pathway
    'H6 - L-Lyxose - MIX0-498': {
        'Add': set(['L-LYXOSE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H7 - Glucuronamide - MIX0-499': None, # not in the model
    'H8 - Pyrunic acid - MIX0-500': {
        'Add': set(['PYRUVATE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H9 - L-Galactonic acid-γ- Lactone - MIX0-794': {
        'Add': set(['CPD-330[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H10 - D-Galacturonic acid* - MIX0-501': {
        'Add': set(['CPD-15633[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H11 - Phenyl ethylamine - MIX0-502': {
        'Add': set(['PHENYLETHYLAMINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
    'H12 - 2-Aminoethanol - MIX0-503': {
        'Add': set(['ETHANOL-AMINE[e]']),
        'Remove': set(['GLC[p]', 'CA+2[p]']),
    },
}

In [10]:
# run tests
condition_names = []
cp3_oofv = dict({})
plateID_to_condition = dict({})
for condition_name, condition in conditions.items():
    
    # store in dictionary the optimal objective function value
    temp = condition_name.split(' - ')
    plate_ID = temp[0]
    carbon_source = temp[1]
    plateID_to_condition[plate_ID] = carbon_source
    
    # solve the cvxpy problem
    objective_weights = {'secretion': 0.01, 'efficiency': 0.000001, 'kinetics': 0.000001}
    if condition == None:
        cp3_oofv[plate_ID] = None
        continue
    oofv, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                            objective_weights,
                                            uptake_addition=condition['Add'], uptake_removal=condition['Remove'],
                                            solver_choice=cp.GLOP,)    
    
    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp3_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp3_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    cp3_oofv[plate_ID] = oofv
    
    print(f"""Finished enviornment: {condition_name} with objective function value: {oofv}""")

Finished enviornment: A1 - Carbon Negative Control - MIX0-80 with objective function value: 42321.62630251991
Finished enviornment: A2 - L-Arabinose* - MIX0-420 with objective function value: 19670.798120538555
Finished enviornment: A3 - N-Acetyl-D- Glucosamine* - MIX0-421 with objective function value: 17421.5863067453
Finished enviornment: A4 - D-Saccharic acid - MIX0-422 with objective function value: 30114.316165394564
Finished enviornment: A5 - Succinic acid - MIX0-423 with objective function value: 32268.152330641537
Finished enviornment: A6 - D-Galactose* - MIX0-424 with objective function value: 19629.40513537185
Finished enviornment: A7 - L-Aspartic acid - MIX0-425  with objective function value: 38897.31300645821
Finished enviornment: A8 - L-Proline - MIX0-426 with objective function value: 16203.368318825991
Finished enviornment: A9 - D-Alanine - MIX0-427 with objective function value: 19969.36904139477
Finished enviornment: A10 - D-Trehalose - MIX0-428 with objective functi

In [11]:
df_all['is_new'] = 'Old Reactions'
df_all.loc[ADDED_RXNS, 'is_new'] = 'Heena\'s Reactions'
df_all.loc[fba_new_reaction_ids, 'is_new'] = 'New Reactions'

df_all['kinetic'] = [kinetic[r] if r in kinetic_reaction_ids else False for r in df_all.index]

In [12]:
df_all

Unnamed: 0,0,sim_cp3_A1 - Carbon Negative Control - MIX0-80,sim_cp3_A2 - L-Arabinose* - MIX0-420,sim_cp3_A3 - N-Acetyl-D- Glucosamine* - MIX0-421,sim_cp3_A4 - D-Saccharic acid - MIX0-422,sim_cp3_A5 - Succinic acid - MIX0-423,sim_cp3_A6 - D-Galactose* - MIX0-424,sim_cp3_A7 - L-Aspartic acid - MIX0-425,sim_cp3_A8 - L-Proline - MIX0-426,sim_cp3_A9 - D-Alanine - MIX0-427,...,sim_cp3_H4 - Tyramine - MIX0-496,sim_cp3_H5 - D-Psicose - MIX0-497,sim_cp3_H6 - L-Lyxose - MIX0-498,sim_cp3_H8 - Pyrunic acid - MIX0-500,sim_cp3_H9 - L-Galactonic acid-γ- Lactone - MIX0-794,sim_cp3_H10 - D-Galacturonic acid* - MIX0-501,sim_cp3_H11 - Phenyl ethylamine - MIX0-502,sim_cp3_H12 - 2-Aminoethanol - MIX0-503,is_new,kinetic
1-ACYLGLYCEROL-3-P-ACYLTRANSFER-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False
1.1.1.127-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False
1.1.1.127-RXN (reverse),0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False
1.1.1.215-RXN (reverse),0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False
1.1.1.251-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XYLULOKIN-RXN-CPD-24961/ATP//XYLULOSE-5-PHOSPHATE/ADP/PROTON.47.,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Old Reactions,False
YIAE1-RXN (reverse),0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False
glycogen-monomer-extension,9.005122e+03,-0.0,8662.0,8662.0,-0.0,-0.0,8662.0,-0.0,8662.0,-0.0,...,-0.0,-0.0,8662.0,-0.0,8662.0,-0.0,8662.0,-0.0,Old Reactions,False
DISULFOXRED-RXN[CCO-PERI-BAC]-MONOMER0-4152/MONOMER0-4438//MONOMER0-4438/MONOMER0-4152.71.DEPHOSICITDEHASE-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,Old Reactions,False


In [13]:
%store -r df_gene_usage genes_to_enzymes df_all_gene_usage new_genes

In [14]:
conditions_previous = ['basal', 'acetate', 'rich', 'anaerobic_basal', 'anaerobic_acetate', 'anaerobic_rich']
conditions_all = conditions_previous + condition_names

df_all_gene_usage = df_all_gene_usage.copy()

gene_used_before = df_gene_usage['is_used']
all_gene_used_before = df_all_gene_usage['is_used']

df_all_gene_usage = df_all_gene_usage.drop(['is_used'], axis = 1)
df_all_gene_usage_cp3 = df_all_gene_usage.copy()
reaction_catalysts = metabolism.parameters["reaction_catalysts"]

for condition in condition_names:
    df_all_gene_usage_cp3[condition] = 0
    for rxn in reaction_names:
        is_used = df_all.loc[rxn, condition]
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                if is_used:
                    df_all_gene_usage_cp3.loc[gene, condition] += 1
                if rxn in kinetic_reaction_ids:
                    df_all_gene_usage_cp3.loc[gene, 'has_kinetic'] = True


# tally usage
df_all_gene_usage_cp3['is_used'] = np.any(df_all_gene_usage_cp3[conditions_all] > 0, axis=1)
df_gene_usage_cp3 = df_all_gene_usage_cp3.loc[new_genes,:]

In [15]:
# previous % new gene usage
num_genes = len(gene_used_before)
assert len(gene_used_before) == len(df_gene_usage_cp3['is_used']), 'Different number of new genes'

perc_gene_usage_prev = sum(gene_used_before)/num_genes
perc_gene_usage_new  = sum(df_gene_usage_cp3['is_used'])/num_genes

print(f'% new genes usage before checkpoints is: {perc_gene_usage_prev: 0.2%}')
print(f'          % new genes usage added through microarray 1 is: {perc_gene_usage_new: 0.2%}')

% new genes usage before checkpoints is:  4.56%
          % new genes usage added through microarray 1 is:  21.17%


# Add checkpoint 2 reactions and condition testings

In [16]:
conditions = {
    'G7408-α-D-fructopyranose': {
        'Add': set(['CPD-10725[e]']),
        'Remove': set(['GLC[p]']),
    },
    'G7408-β-D-fructopyranose': {
        'Add': set(['CPD-10726[e]']),
        'Remove': set(['GLC[p]']),
    },
    'G7408-keto-D-fructose':{
        'Add': set(['CPD-15382[e]']),
        'Remove': set(['GLC[p]']),
    },
    'EG11724-KO_EG11691':{
        'Add': set(['ADENINE[e]']),
        'Remove': set([]),
        'Remove Reaction': ['TRANS-RXN0-577']
    },
    'EG10816-α-D-ribopyranose':{
        'Add': set(['CPD-15829[e]']),
        'Remove': set(['GLC[p]'])
    },
    'EG10818-aldehydo-D-ribose':{
        'Add': set(['CPD-15818[e]']),
        'Remove': set(['GLC[p]'])
    },
    'EG11055-1-O-methyl-β-D-glucuronate':{
        'Add': set(['CPD-3611[e]']),
        'Remove': set(['GLC[p]'])
    },
    'EG12393-ADD_TARTRATE[p]':{
        'Add': set(['TARTRATE[p]']),
        'Remove': set([])
    },
    'EG11054-α-D-mannopyranose 6-phosphate':{
        'Add': set(['MANNOSE-6P[e]']),
        'Remove': set(['GLC[p]'])
    },
    'EG10172-Oxidative_Stress':{
        'Add': set(['SUPER-OXIDE[e]']),
        'Remove': set([])
    },
    'EG10780-Thiosulfate':{
        'Add': set(['S2O3[e]','HCN[e]']),
        'Remove': set(['SULFATE[p]'])
    },
    'Isoprimeverose': {
        'Add': set(['CPD0-1202[e]']),
        'Remove': set(['GLC[p]']),
    },
    'Oxamate': {
        'Add': set(['S-ALLANTOIN[e]']),
        'Remove': set(['AMMONIUM[c]', 'OXYGEN-MOLECULE[p]']),
    },
    '3-HYDROXYPHENYL-PROPIONATE':{
        'Add': set(['3-HYDROXYPHENYL-PROPIONATE[e]']),
        'Remove': set(['GLC[p]'])
    },
    '3-PHENYLPROPIONATE':{
        'Add': set(['3-PHENYLPROPIONATE[e]']),
        'Remove': set(['GLC[p]'])
    },
    'cinnate':{
        'Add': set(['CPD-10797[e]', 'CPD-674[e]']),
        'Remove': set(['GLC[p]'])
    },
    'L-galactonate':{
        'Add': set(['CPD0-1083[e]']),
        'Remove': set(['GLC[p]'])
    },
}

In [17]:
condition_names = []
for condition_name, condition in conditions.items():
    
    if 'Remove Reaction' in condition:
        remove_reaction = condition['Remove Reaction']
    else:
        remove_reaction = None
    
    objective_weights = {'secretion': 0.01, 'efficiency': 0.000001, 'kinetics': 0.000001}
    
    _, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                            objective_weights,
                                            uptake_addition=condition['Add'], uptake_removal=condition['Remove'],
                                            remove_reaction=remove_reaction,
                                            solver_choice=cp.GLOP,
    )    
    
    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp2_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp2_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    
    print(f"""Finished enviornment: {condition_name}""")

Finished enviornment: G7408-α-D-fructopyranose
Finished enviornment: G7408-β-D-fructopyranose
Finished enviornment: G7408-keto-D-fructose
Finished enviornment: EG11724-KO_EG11691
Finished enviornment: EG10816-α-D-ribopyranose
Finished enviornment: EG10818-aldehydo-D-ribose
Finished enviornment: EG11055-1-O-methyl-β-D-glucuronate
Finished enviornment: EG12393-ADD_TARTRATE[p]
Finished enviornment: EG11054-α-D-mannopyranose 6-phosphate
Finished enviornment: EG10172-Oxidative_Stress
Finished enviornment: EG10780-Thiosulfate
Finished enviornment: Isoprimeverose
Finished enviornment: Oxamate
Finished enviornment: 3-HYDROXYPHENYL-PROPIONATE
Finished enviornment: 3-PHENYLPROPIONATE
Finished enviornment: cinnate
Finished enviornment: L-galactonate


# calculate % gene usage

In [18]:
df_all_gene_usage_cp3_w_cp2 = df_all_gene_usage_cp3.copy()
reaction_catalysts = metabolism.parameters["reaction_catalysts"]
conditions_all = conditions_all + condition_names
for condition in condition_names:
    df_all_gene_usage_cp3_w_cp2[condition] = 0
    for rxn in reaction_names:
        is_used = df_all.loc[rxn, condition]
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                if is_used:
                    df_all_gene_usage_cp3_w_cp2.loc[gene, condition] += 1
                if rxn in kinetic_reaction_ids:
                    df_all_gene_usage_cp3_w_cp2.loc[gene, 'has_kinetic'] = True


# tally usage
df_all_gene_usage_cp3_w_cp2['is_used'] = np.any(df_all_gene_usage_cp3_w_cp2[conditions_all] > 0, axis=1)
df_gene_usage_cp3_w_cp2 = df_all_gene_usage_cp3_w_cp2.loc[new_genes,:]

In [19]:
df_all_gene_usage_cp3_GLOP = df_all_gene_usage_cp3_w_cp2.copy()

In [20]:
%store df_all_gene_usage_cp3_GLOP
# %store df_gene_usage_cp3_GLOP

Stored 'df_all_gene_usage_cp3_GLOP' (DataFrame)


In [None]:
# previous % new gene usage
num_genes = len(gene_used_before)
assert len(gene_used_before) == len(df_gene_usage_cp3_w_cp2['is_used']), 'Different number of new genes'

perc_gene_usage_prev = sum(gene_used_before)/num_genes
perc_gene_usage_cp3_w_cp2  = sum(df_gene_usage_cp3_w_cp2['is_used'])/num_genes

print(f'% new genes usage before checkpoint 1 and 2 is: {perc_gene_usage_prev: 0.2%}')
print(f'         %new genes enabled by microarray 1 is: {perc_gene_usage_new: 0.2%}')
print(f'          % new genes usage at checkpoint 3 is: {perc_gene_usage_cp3_w_cp2: 0.2%}')

In [None]:
%store -r df_all_gene_usage
# % all gene usage
num_genes = len(df_all_gene_usage)
perc_all_gene_usage_basal = sum(df_all_gene_usage_cp3_w_cp2.basal > 0)/num_genes * 100
perc_all_gene_usage_basic_conditions = sum(df_all_gene_usage['is_used'])/num_genes * 100
perc_all_gene_usage_cp3_w_cp2  = sum(df_all_gene_usage_cp3_w_cp2['is_used'])/num_genes * 100

perc_all = [perc_all_gene_usage_basal, perc_all_gene_usage_basic_conditions, perc_all_gene_usage_cp3_w_cp2]
perc_all

# Plot 1: Gene Usage Histogram at checkpoint 3

In [None]:
%store -r df_all_gene_usage_cp1 df_all_gene_usage_cp2

In [None]:
# % new gene usage
df_gene_usage = df_all_gene_usage.loc[new_genes,:]
df_gene_usage_cp1 = df_all_gene_usage_cp1.loc[new_genes,:]
df_gene_usage_cp2 = df_all_gene_usage_cp2.loc[new_genes,:]

num_genes = len(df_gene_usage)
perc_gene_usage_basal = sum(df_gene_usage_cp2.basal > 0)/num_genes * 100
perc_gene_usage_basic_conditions = sum(gene_used_before)/num_genes * 100
perc_gene_usage_cp1  = sum(df_gene_usage_cp1['is_used'])/num_genes * 100
perc_gene_usage_cp2  = sum(df_gene_usage_cp2['is_used'])/num_genes * 100
perc_gene_usage_cp3  = sum(df_gene_usage_cp3_w_cp2['is_used'])/num_genes * 100

perc = [perc_gene_usage_basal, perc_gene_usage_basic_conditions, perc_gene_usage_cp1, perc_gene_usage_cp2, perc_gene_usage_cp3] 
x_label = ['Basal', 'Basal, Acetate, Rich, Anaerobic', 'Checkpoint 1', 'Checkpoint 2', 'Cp2 + Microarray 1']

In [None]:
# % all gene usage
num_genes = len(df_all_gene_usage)
perc_all_gene_usage_basal = sum(df_all_gene_usage_cp2.basal > 0)/num_genes * 100
perc_all_gene_usage_basic_conditions = sum(df_all_gene_usage['is_used'])/num_genes * 100
perc_all_gene_usage_cp1  = sum(df_all_gene_usage_cp1['is_used'])/num_genes * 100
perc_all_gene_usage_cp2  = sum(df_all_gene_usage_cp2['is_used'])/num_genes * 100
perc_all_gene_usage_cp3_w_cp2  = sum(df_all_gene_usage_cp3_w_cp2['is_used'])/num_genes * 100


perc_all = [perc_all_gene_usage_basal, perc_all_gene_usage_basic_conditions, perc_all_gene_usage_cp1, perc_all_gene_usage_cp2, perc_all_gene_usage_cp3_w_cp2]

In [None]:
# Create a DataFrame
df = pd.DataFrame({
    "Condition": x_label,
    "New Metabolic Gene": perc,
    "All Metabolic Gene": perc_all,
})

# Reshape to long format
df_long = df.melt(id_vars="Condition", 
                  value_vars=["New Metabolic Gene", "All Metabolic Gene"],
                  var_name="Gene Group",
                  value_name="Percent Usage")

# Create grouped bar plot
fig = px.bar(
    df_long,
    x="Condition",
    y="Percent Usage",
    color="Gene Group",              # creates a legend
    barmode="group",                 # side-by-side bars
    text="Percent Usage",
    title="New vs All Metabolic Gene Usage by Condition",
    labels={"Percent Usage": "Percent gene usage (%)"},
    color_discrete_map={
        "New Metabolic Gene": "#4C78A8",
        "All Metabolic Gene": "#c26426"
    }
)

# Customize appearance
fig.update_traces(texttemplate='%{text:.2f}%', textposition='outside')
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    yaxis_title="Percent gene usage (%)",
    # yaxis=dict(range=[0, 60]),
    xaxis_title=None,
    xaxis_tickangle=-35,
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(t=50, b=50, l=50, r=50),
    width=800,   # width in pixels
    height=500
)

fig.show(renderer='browser')
# save 
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/gene_usage_histogram_cp3.png", scale=5, width=800, height=500)

# Plot 2: Histogram showing optimal objective function value distribution

In [None]:
oofv_val = list(cp3_oofv.values())
oofv_basal, _, _, _, _, _ = test_NetworkFlowModel(objective_weights)

In [None]:
values = [v if v is not None else np.nan for v in oofv_val]

# Create histogram
hist = go.Histogram(
    x=values,
    nbinsx=50,
    marker_color='lightblue',
    name="Distribution"
)

# Highlight the first value
vline = go.Scatter(
    x=[values[0], values[0]],
    y=[0, 15],  
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Neg Control"],
    textposition="bottom center",
    showlegend=False
)

# Highlight the basal value
vline2 = go.Scatter(
    x=[oofv_basal, oofv_basal],
    y=[0, 20],  
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Basal"],
    textposition="bottom center",
    showlegend=False
)

# Combine and plot
fig = go.Figure(data=[hist, vline, vline2])
fig.add_vrect(x0=40000, x1=43500, line_width=0, fillcolor="LightSalmon", opacity=0.1, annotation_text='No Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=15000, x1=26000, line_width=0, fillcolor="aqua", opacity=0.1, annotation_text='Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=26500, x1=39500, line_width=0, fillcolor="yellow", opacity=0.1, annotation_text='Slow Growth', annotation_position="top", annotation_font_size = 16, layer="below")

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Histogram with First Value Highlighted",
    xaxis_title="Optimal Objective Function Value",
    yaxis_title="Count",
    bargap=0.05
)
fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/oofv_histogram_cp3.png", scale=5, width=1000, height=500)

# Plot 3: Heatmap showing 'growth' 'no growth'

In [None]:
def discrete_colorscale(bvals, colors):
    """
    bvals - list of values bounding intervals/ranges of interest
    colors - list of rgb or hex colorcodes for values in [bvals[k], bvals[k+1]],0<=k < len(bvals)-1
    returns the plotly  discrete colorscale
    """
    if len(bvals) != len(colors)+1:
        raise ValueError('len(boundary values) should be equal to  len(colors)+1')
    bvals = sorted(bvals)     
    nvals = [(v-bvals[0])/(bvals[-1]-bvals[0]) for v in bvals]  #normalized values
    
    dcolorscale = [] #discrete colorscale
    for k in range(len(colors)):
        dcolorscale.extend([[nvals[k], colors[k]], [nvals[k+1], colors[k]]])
    return dcolorscale  

In [None]:
bvals = [14000, 25500, 40000, 43500]
colors = ['#dda0dd', '#ffb6c2', '#fcf3bf']
dcolorsc = discrete_colorscale(bvals, colors)
dcolorsc

In [None]:
bvals = np.array(bvals)
tickvals = [np.mean(bvals[k:k+2]) for k in range(len(bvals)-1)] #position with respect to bvals where ticktext is displayed
ticktext = [f'<{bvals[1]}'] + [f'{bvals[k]}-{bvals[k+1]}' for k in range(1, len(bvals)-2)]+[f'>{bvals[-2]}']
tickvals

In [None]:
# Plate dimensions
rows = list("ABCDEFGH")
cols = list(range(1, 13))

# Create a 2D array with None as default
matrix = []
label = []
for r in rows:
    row_data = []
    row_text = []
    for c in cols:
        key = f"{r}{c}"
        row_data.append(cp3_oofv.get(key))  # Fill missing wells with None
        text = plateID_to_condition.get(key)
        text_segment = text.split(' ')
        text = '<br>'.join(text_segment)
        row_text.append(text)
    matrix.append(row_data)
    label.append(row_text)

In [None]:
import plotly.graph_objects as go
heatmap = go.Heatmap(z=matrix, 
                     x=[str(c) for c in cols],
                     y=rows,
                     text=label,
                     texttemplate="%{text}",
                     textfont={"size": 10},
                     colorscale = dcolorsc, 
                     colorbar = dict(thickness=25, 
                                     tickvals=tickvals, 
                                     ticktext=ticktext),
                     )

fig = go.Figure(data=[heatmap])
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Plate Reader Heatmap with Values",
    xaxis_title="Column",
    yaxis_title="Row",
    yaxis_autorange='reversed'  
)

fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/heatmap_microarray1_C_source.png", scale=5, width=1200, height=800)

# Plot 4: combined kinetic sim versus target plot - no more shiny

In [None]:
kc_target_cp3 = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()
cols = conditions_all[6:] + ['is_new', 'kinetic']
df_kc_sim_cp3 = df_all.loc[kinetic_reaction_ids, cols]

In [None]:
df_kc_sim_cp3_log = df_kc_sim_cp3.copy()
df_kc_sim_cp3_log['kinetic'] = df_kc_sim_cp3_log['kinetic'].apply(pd.to_numeric, errors='coerce')
cols = conditions_all[6:] + [ 'kinetic']
df_kc_sim_cp3_log.loc[:,cols] += 1e-6
df_kc_sim_cp3_log.loc[:,cols] = np.log10(df_kc_sim_cp3_log.loc[:,cols])
df_kc_sim_cp3_log

In [None]:
# work with df_kc_cp1 or df_kc_cp1_log
# create a dataframe with rows as reactions, col1 as flux closest to target, col2 being condition(s) that have that flux
df_kc_cp3_log_combined = pd.DataFrame({'flux': np.NaN, 'condition': 'none', 'target': df_kc_sim_cp3_log['kinetic'], 'is_new': df_kc_sim_cp3_log['is_new']}, index=kinetic_reaction_ids)
condition_names = conditions_all[6:]
# create temporary dataframe that tracks the difference between sim flux and target
temp = df_kc_sim_cp3_log.copy()
temp.replace(0, np.nan, inplace=True)
for condition in condition_names:
    temp.loc[:, condition] = np.abs(temp.loc[:, condition] - df_kc_sim_cp3_log['kinetic'].values)

# parse by reaction (index) to find the conditions with flux closest to target (minimal difference)
for reaction in temp.index:
    # get the condition with the minimum difference
    min_diff = temp.loc[reaction, condition_names].min()
    # get the condition(s) with the minimum difference
    conditions = temp.loc[reaction, condition_names][temp.loc[reaction, condition_names] == min_diff].index.tolist()
    # update the dataframe
    if np.all(df_kc_sim_cp3_log.loc[reaction,condition_names] == -6):
        df_kc_cp3_log_combined.loc[reaction, 'flux'] = -6
    else:
        df_kc_cp3_log_combined.loc[reaction, 'flux'] = df_kc_sim_cp3_log.loc[reaction,conditions[0]]
        if conditions == condition_names:
            df_kc_cp3_log_combined.loc[reaction, 'condition'] = 'all'
        else:
            df_kc_cp3_log_combined.loc[reaction, 'condition'] = ', '.join(conditions)
# 
# df_kc_cp1_log_combined = df_kc_cp1_combined.copy()         
# df_kc_cp1_log_combined['target'] = df_kc_cp1_log_combined['target'].apply(pd.to_numeric, errors='coerce')
# df_kc_cp1_log_combined.loc[:,['flux','target']] += 1e-6
# df_kc_cp1_log_combined.loc[:,['flux','target']] = np.log10(df_kc_cp1_log_combined.loc[:,['flux','target']])
# 
df_kc_cp3_log_combined.to_csv('notebooks/Heena notebooks/Metabolism_New Genes/df_kc_cp3_combined.csv', index=True)

In [None]:
df_kc_cp3_log_combined

In [None]:
# plotly figure
import plotly.express as px
fig = px.scatter(
    df_kc_cp3_log_combined,
    x='target',
    y='flux',
    color='is_new',
    hover_name=df_kc_cp3_log_combined.index,
    hover_data=['condition'],
    labels={'flux': 'Simulated Flux (log10)', 'target': 'Target Flux (log10)'},
    title='Kinetic Simulated Flux vs Target Flux at Checkpoint 3',
)
fig.update_traces(marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis_title="Target Flux (log10)",
    yaxis_title="Simulated Flux (log10)",
    legend_title_text='New Reactions',
    width=800,
    height=600
)
fig.show(renderer='browser')

# Scratch: Get difference of gene usage between checkpoint 2 and 3

In [None]:
# get the difference of gene usage between checkpoint 2 and 3
df_gene_usage_diff = df_all_gene_usage_cp3_w_cp2.copy()
df_all_gene_usage_cp3_w_cp2['is_used']

In [None]:
df_all_gene_usage_cp2['is_used']

In [None]:
df_all_gene_usage_cp3_w_cp2['is_used'] - df_all_gene_usage_cp2['is_used']

In [None]:
cp2_new = df_all_gene_usage_cp2.loc[new_genes].copy()
cp3_new = df_all_gene_usage_cp3_w_cp2.loc[new_genes].copy()

In [None]:
temp = cp2_new.join(cp3_new['is_used'], lsuffix='_cp2', rsuffix='_cp3', how='outer')
temp = temp[['is_used_cp2','is_used_cp3']]

In [None]:
temp['diff'] = ~temp['is_used_cp2'] & temp['is_used_cp3']
temp[temp['diff']]

In [None]:
sum(temp['diff'])

In [None]:
cp2_new.loc['EG20053']

In [None]:
cp3_new.loc['EG20053']

In [1]:
df_all_gene_usage_cp3_w_cp2

NameError: name 'df_all_gene_usage_cp3_w_cp2' is not defined