# Implementing Microarray3+4 and Checkpoint Gene Fixes

Description
This script aims to test all carbon source media defined by Ecocyc (https://ecocyc.org/ECOLI/new-image?object=Growth-Media): individual growth media (the ones with data), microarray Plate ID: Biolog PM3 and 4. This script will use the basal simulation after cp4 and the implementation of microarray 2 as the base (which incorporates checkpoint 3 and microarray 1 too), and runs FBA externally to the model to see if model solves and see if there is growth (needs definition) and analyze gene usage. It will also implement some of the easy fixes that were identified in all checkpoint 5 and onward!

Below are the specific easy gene fixes that were implemented in this notebook:
<br>
mtlA-EG10615, mmuP-G6135, panF-EG10685, frlA-EG12908, psuK-EG11646, shiA-G7067, thiK-G6566
<br>

See an upward trend in the gene usage from this new implementation.


In [1]:
from html.parser import interesting_normal

import numpy as np
import ast
import pandas as pd
import os
import matplotlib.pyplot as plt
import dill
import requests
import xmltodict
import cvxpy as cp
import itertools
import networkx as nx
import plotly.express as px

from scipy.special import logsumexp

%matplotlib inline
# sns.set(style='dar|kgrid', palette='viridis', context='talk')

os.chdir(os.path.expanduser('~/dev/vivarium-ecoli')) 

from ecoli.processes.metabolism_redux_classic import NetworkFlowModel, FlowResult
from ecoli.library.schema import numpy_schema, bulk_name_to_idx, listener_schema, counts
from ecoli.processes.registries import topology_registry
TOPOLOGY = topology_registry.access("ecoli-metabolism-redux")
import plotly.express as px

In [2]:
# load checkpoint 2 model
time = '400'
date = '2025-05-15'
experiment = 'NEW_NewGenes_checkpoint2'
condition = 'basal'
entry = f'{experiment}_{time}_{date}'
folder = f'out/metabolism-comparison/{condition}/{entry}/'

output = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
# output = np.load(r"out/geneRxnVerifData/output_glc.npy", allow_pickle=True, encoding='ASCII').tolist()
output = output['agents']['0']
fba = output['listeners']['fba_results']
bulk = pd.DataFrame(output['bulk'])
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

In [3]:
# get commonly stored variables
metabolism = agent['ecoli-metabolism-redux-classic']
stoichiometry = metabolism.stoichiometry.copy()
reaction_names = metabolism.reaction_names
fba_new_reaction_ids = metabolism.parameters["fba_new_reaction_ids"]
fba_reaction_ids_to_base_reaction_ids = metabolism.parameters['fba_reaction_ids_to_base_reaction_ids']
metabolites = metabolism.metabolite_names.copy()
binary_kinetic_idx = metabolism.binary_kinetic_idx
exchange_molecules = metabolism.exchange_molecules

S = stoichiometry .copy()
S = pd.DataFrame(S, index=metabolites , columns=reaction_names )
homeostatic_count = pd.DataFrame(fba["homeostatic_metabolite_counts"], columns=metabolism.homeostatic_metabolites).loc[24, :]
homeostatic = pd.DataFrame(fba["target_homeostatic_dmdt"], columns=metabolism.homeostatic_metabolites).loc[24, :]
maintenance = pd.DataFrame(fba["maintenance_target"][1:], columns=['maintenance_reaction']).iat[24, 0]
kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy()

In [4]:
# parameters that are the same across the two simulation
kinetic_reaction_ids = metabolism.kinetic_constraint_reactions
allowed_exchange_uptake = metabolism.allowed_exchange_uptake
FREE_RXNS = ["TRANS-RXN-145", "TRANS-RXN0-545", "TRANS-RXN0-474"]
ADDED_RXNS = ['HS-TRANSPORT-RXN-CPD0-1202', 'HS-TRANSPORT-RXN-CPD0-1202 (reverse)',
                   'HS-TRANSPORT-RXN[CCO-OUTER-MEM]-OXAMATE', 'HS-TRANSPORT-RXN[CCO-OUTER-MEM]-OXAMATE (reverse)',
                   'HS-TRANSPORT-RXN[CCO-PM-BAC-NEG]-OXAMATE', 'HS-TRANSPORT-RXN[CCO-PM-BAC-NEG]-OXAMATE (reverse)',
                   'HS-BETA-GLUCURONID-RXN_CPD-3611//METOH', 'HS-SPONTANEOUS-TRANSPORT[CCO-OUTER-MEM]-HCN', 'HS-SPONTANEOUS-TRANSPORT[CCO-OUTER-MEM]-HCN (reverse)',
                   'HS-SPONTANEOUS-TRANSPORT[CCO-PM-BAC-NEG]-HCN','HS-SPONTANEOUS-TRANSPORT[CCO-PM-BAC-NEG]-HCN (reverse)']

# Define Functions

In [51]:
def get_subset_S(S, met_of_interest):
    S_met = S.loc[met_of_interest, :]
    S_met = S_met.loc[:,~np.all(S_met == 0, axis=0)]
    return S_met, S_met.columns

def test_NetworkFlowModel(objective_weights,
                          uptake_addition = set([]), uptake_removal = set([]), new_exchange_molecules = set([]),
                          add_metabolite = None, add_reaction = None, add_kinetic = None, remove_reaction = None,
                          add_homeostatic_demand = None, force_reaction = None):
    # update exchanges
    uptake = metabolism.allowed_exchange_uptake.copy()
    uptake = set(uptake)
    uptake = uptake | uptake_addition
    uptake = uptake - uptake_removal

    exchange_molecules = metabolism.exchange_molecules.copy()
    exchange_molecules = exchange_molecules | new_exchange_molecules

    # update stoichiometry
    reaction_names = metabolism.reaction_names.copy()
    kinetic_reaction_ids = metabolism.kinetic_constraint_reactions.copy()
    kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy()
    metabolites = metabolism.metabolite_names.copy()
    homeostatic = pd.DataFrame(fba["target_homeostatic_dmdt"], columns=metabolism.homeostatic_metabolites).loc[24, :].copy()
    homeostatic_counts = homeostatic_count.copy() * metabolism.counts_to_molar.asNumber()

    S_new = stoichiometry.copy()

    if add_metabolite is not None: # add to metabolites list because they are currently not included in the model
        for m in add_metabolite:
            if m not in metabolites:
                metabolites.append(m)
        # append rows of zeros to S_new of length add_metabolite
        S_new = np.concatenate((S_new, np.zeros((len(add_metabolite), S_new.shape[1]))), axis=0)
        print(S_new.shape)

    if add_reaction is not None:
        # assert add_reaction is a dictionary
        assert isinstance(add_reaction, dict)

        for r,s in add_reaction.items():
            if r not in reaction_names:
                reaction_names.append(r)
            # append columns of reaction stoich to S_new of length add_reaction
            new_reaction = np.zeros((S_new.shape[0], 1))
            for m, v in s.items():
                new_reaction[metabolites.index(m), 0] = v
            S_new = np.concatenate((S_new, new_reaction), axis=1)
        print(S_new.shape)

    if add_kinetic is not None:
        # assert add_kinetic is a dictionary
        assert isinstance(add_kinetic, dict)

        for r, v in add_kinetic.items():
            if r not in kinetic_reaction_ids:
                kinetic_reaction_ids.append(r)
                kinetic[r] = v

    if remove_reaction is not None:
        for r in remove_reaction:
            r_idx = reaction_names.index(r)
            S_new = np.delete(S_new, r_idx, axis=1)
            reaction_names.remove(r)
            if r in kinetic_reaction_ids:
                kinetic_reaction_ids.remove(r)
                del kinetic[r]

    if force_reaction is not None:
        force_reaction_idx = np.array([reaction_names.index(r) for r in force_reaction])
    else:
        force_reaction_idx = force_reaction

    if add_homeostatic_demand is not None:
        # assert add_homeostatic_demand is a set
        assert isinstance(add_homeostatic_demand, list)

        for met in add_homeostatic_demand:
            homeostatic[met] = 100
            homeostatic_counts[met] = 1

    # Solve NetworkFlowModel
    model = NetworkFlowModel(
            stoich_arr=S_new,
            metabolites=metabolites,
            reactions=reaction_names,
            homeostatic_metabolites=list(dict(homeostatic).keys()),
            kinetic_reactions=kinetic_reaction_ids,
            free_reactions=FREE_RXNS)
    model.set_up_exchanges(exchanges=exchange_molecules, uptakes=uptake)
    solution: FlowResult = model.solve(
            homeostatic_concs=homeostatic_counts, # in conc
            homeostatic_dm_targets=np.array(list(dict(homeostatic).values())), # *10^7
            maintenance_target=maintenance, # *10^6 ish
            kinetic_targets=np.array(list(dict(kinetic).values())), # *10^6 ish
            # binary_kinetic_idx=binary_kinetic_idx, #7646
            binary_kinetic_idx=None,
            force_flow_idx=force_reaction_idx,
            objective_weights=objective_weights, #same
            upper_flux_bound= 1000000000, # increase to 10^9 because notebook runs FlowResult using Counts, WC runs using conc.
            solver=cp.GLOP) #SCS. ECOS
    print(f'Optimal objective function reached is: {solution.objective}')
    return solution.objective, solution.velocities, reaction_names, S_new, metabolites, kinetic

# Create DataFrames to Store Results

In [6]:
# all reactions
sim = pd.DataFrame(fba["estimated_fluxes"], columns= reaction_names).mean(axis=0).copy()
# kinetic reactions
kc_target_cp2 = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()

In [7]:
#accumulating the simulations
df_all = sim.copy()
df_all.columns = ['sim_cp2_basal']

### Implement the changes added through Microarray 3 (Nitrogen Sole Source) (PM3 on EcoCyc)

In [8]:
# microarray plate 3: ~ tests 96 Nitrogen sources; 86 in the model
conditions = {
    'A1 - nitrogen negative control- MIX0-81': {
        'Add': set([]),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A2 - AMMONIUM - MIX0-590': {
        'Add': set(['AMMONIA[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A3 - Nitrite - MIX0-591': {
        'Add': set(['NITRITE[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A4 - Nitrate - MIX0-592': {
        'Add': set(['NITRITE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A5 - Urea - MIX0-593': {
        'Add': set(['UREA[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A6 - Biuret - MIX0-594': None, # not in the model
    'A7 - L-Alanine - MIX0-595 ': {
        'Add': set(['L-ALPHA-ALANINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A8 - L-Arginine - MIX0-596': {
        'Add': set(['ARG[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A9 - L-Asparagine - MIX0-597': {
        'Add': set(['ASN[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A10 - L-Aspartic acid - MIX0-598': {
        'Add': set(['L-ASPARTATE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A11 - L-Cysteine - MIX0-599': {
        'Add': set(['CYS[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'A12 - L-Glutamic acid - MIX0-600': {
        'Add': set(['GLT[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B1 - L-Glutamine - MIX0-601': {
        'Add': set(['GLN[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B2 - Glycine - MIX0-602': {
        'Add': set(['GLY[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B3 - L-Histidine - MIX0-603': {
        'Add': set(['HIS[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B4 - L-Isoleucine - MIX0-604': {
        'Add': set(['ILE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B5 - L-Leucine - MIX0-605': {
        'Add': set(['LEU[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B6 - L-Lysine - MIX0-606': {
        'Add': set(['LYS[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B7 - L-Methionine - MIX0-607': {
        'Add': set(['MET[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B8 - L-Phenylalanine - MIX0-608': {
        'Add': set(['PHE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B9 - L-Proline - MIX0-609': {
        'Add': set(['PRO[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B10 - L-Serine - MIX0-610': {
        'Add': set(['SER[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'B11 - L-Threonine - MIX0-611': {
        'Add': set(['THR[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'B12 - L-Tryptophan - MIX0-612': {
        'Add': set(['TRP[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C1 - L-Tyrosine - MIX0-613': {
        'Add': set(['TYR[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C2 - L-Valine - MIX0-614': {
        'Add': set(['VAL[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C3 - D-Alanine - MIX0-615': {
        'Add': set(['D-ALANINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C4 - D-Asparagine - MIX0-616': {
        'Add': set(['CPD-3633[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C5 - D-Aspartic acid - MIX0-617' : {
        'Add': set(['CPD-302[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),    },
    'C6 - D-Glutamic acid - MIX0-618': {
        'Add': set(['D-GLT[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C7 - D-Lysine - MIX0-619': {
        'Add': set(['CPD-219[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C8 - D-Serine - MIX0-620': {
        'Add': set(['D-SERINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C9 - D-Valine - MIX0-621': {
        'Add': set(['CPD-3642[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C10 - L-Citrulline - MIX0-622': {
        'Add': set(['L-CITRULLINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C11 - L-Homoserine - MIX0-623': {
        'Add': set(['HOMO-SER[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'C12 - L-Ornithine - MIX0-624': {
        'Add': set(['L-ORNITHINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },

    'D1 - N-Acetyl-L- Glutamic acid - MIX0-792': {
        'Add': set(['ACETYL-GLU[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D2 - N-Phthaloyl-L- Glutamic acid - MIX0-625': None, #not in the model
    'D3 - L-Pyroglutamic acid - MIX0-626': {
        'Add': set(['5-OXOPROLINE[c]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D4 - Hydroxylamine - MIX0-627': {
        'Add': set(['HYDROXYLAMINE[c]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D5 - Methylamine - MIX0-628': {
        'Add': set(['METHYLAMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D6 - N-Amylamine - MIX0-629': {
        'Add': set(['CPD-3681[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D7 - N-Butylamine - MIX0-630': None, #not in the model
    'D8 - Ethylamine - MIX0-631': None, #not in the model

    'D9 - Ethanolamine - MIX0-632': {
        'Add': set(['ETHANOL-AMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D10 - Ethylenediamine - MIX0-633': {
        'Add': set(['CPD-3682[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D11 - Putrescine - MIX0-634': {
        'Add': set(['PUTRESCINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'D12 - Agmatine - MIX0-635': {
        'Add': set(['AGMATHINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E1 - Histamine - MIX0-636': {
        'Add': set(['HISTAMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },

    'E2 - b-Phenylethylamine - MIX0-637': {
        'Add': set(['PHENYLETHYLAMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E3 - Tyramine - MIX0-638': {
        'Add': set(['TYRAMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E4 - Acetamide - MIX0-639': {
        'Add': set(['ACETAMIDE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E5 - Formamide - MIX0-640': None, # not in the model
    'E6 - Glucuronamide - MIX0-641': None, # not in the model
    'E7 - DL-Lactamide - MIX0-781': {
        'Add': set(['CPD-13407[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E8 - D-Glucosamine - MIX0-642': {
        'Add': set(['GLUCOSAMINE[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E9 - D-Galactosamine - MIX0-643': None, #not in the model
    'E10 - D-Mannosamine - MIX0-785': None, #not in the model
    'E11 - N-Acetyl-D- Glucosamine - MIX0-644': {
        'Add': set(['N-acetyl-D-glucosamine[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'E12 - N-Acetyl-D- Galactosamine - MIX0-645': None, #not in the model

    'F1 - N-Acetyl-D- Mannosamine - MIX0-646': {
        'Add': set(['N-ACETYL-BETA-D-MANNOSAMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F2 - Adenine - MIX0-647': {
        'Add': set(['ADENINE[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    # 'F3 - Adenosine  - MIX0-648': {
    #     'Add': set(['ADENOSINE[e]']),
    #     'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    # },
    'F3 - Adenosine  - MIX0-648': None,
    'F4 - Cytidine - MIX0-649': {
        'Add': set(['CYTIDINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F5 - Cytosine - MIX0-650': {
        'Add': set(['CYTOSINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },

    'F6 - Guanine - MIX0-651': {
        'Add': set(['GUANINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },

    'F7 - Guanosine - MIX0-652': {
        'Add': set(['GUANOSINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F8 - Thymine - MIX0-653': {
        'Add': set(['THYMINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F9 - Thymidine - MIX0-654': {
        'Add': set(['THYMIDINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F10 - Uracil - MIX0-655': {
        'Add': set(['URACIL[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F11 - Uridine - MIX0-656': {
        'Add': set(['URIDINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'F12 - Inosine - MIX0-657': {
        'Add': set(['INOSINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },

    'G1 - Xanthine - MIX0-658': {
        'Add': set(['XANTHINE[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G2 - Xanthosine - MIX0-659': {
        'Add': set(['XANTHOSINE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G3 - Uric acid - MIX0-660': {
        'Add': set(['URATE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G4 - Alloxan - MIX0-661': {
        'Add': set(['CPD-3684[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G5 - Allantoin - MIX0-662': {
        'Add': set(['ALLANTOIN[p]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G6 - Parabanic acid - MIX0-663': {
        'Add': set(['CPD-3685[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G7 - DL-α- Amino-N- Butyric acid - MIX0-664': None, #not in the model
    'G8 - γ- Amino-N- Butyric acid - MIX0-665': {
        'Add': set(['4-AMINO-BUTYRATE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G9 - ε- Amino-N- Caproic acid - MIX0-666': {
        'Add': set(['CPD-884[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G10 - DL-α- Amino- Caprylic acid - MIX0-667': {
        'Add': set(['CPD-3687[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G11 - δ- Amino-N- Valeric acid - MIX0-668': {
        'Add': set(['5-AMINOPENTANOATE[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'G12 - α- Amino-N- Valeric acid - MIX0-669': {
        'Add': set(['L-2-AMINOPENTANOIC-ACID[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H1 - Ala-Asp - MIX0-782': {
        'Add': set(['CPD-13404[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H2 - Ala-Gln - MIX0-783': {
        'Add': set(['CPD-13403[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H3 - Ala-Glu - MIX0-776': {
        'Add': set(['CPD0-1445[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H4 - Ala-Gly - MIX0-670': {
        'Add': set(['ALA-GLY[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H5 - Ala-His - MIX0-777': {
        'Add': set(['CPD-13401[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H6 - Ala-Leu - MIX0-774': {
        'Add': set(['CPD-13398[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },

    'H7 - Ala-Thr - MIX0-775': {
        'Add': set(['CPD-13397[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H8 - Gly-Asn - MIX0-770': {
        'Add': set(['CPD-13395[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H9 - Gly-Gln - MIX0-771': {
        'Add': set(['CPD-13394[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H10 - Gly-Glu - MIX0-671': {
        'Add': set(['CPD-3569[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H11 - Gly-Met - MIX0-766': {
        'Add': set(['CPD-13393[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
    'H12 - Met-Ala - MIX0-767': {
        'Add': set(['CPD-13390[e]']),
        'Remove': set(['AMMONIUM[c]', 'CA+2[p]']),
    },
}


In [9]:
# run tests in order to add microarray 3 to the previous checkpoint (cp4+microarray2)
condition_names = []
cp5_oofv = dict({})
plateID_to_condition_nit = dict({})
for condition_name, condition in conditions.items():

    # store in dictionary the optimal objective function value
    temp = condition_name.split(' - ')
    plate_ID = temp[0]
    carbon_source = temp[1]
    plateID_to_condition_nit[plate_ID] = carbon_source

    # solve the cvxpy problem
    objective_weights = {'secretion': 0.001, 'efficiency': 0.000001, 'kinetics': 0.00001}
    if condition == None:
        cp5_oofv[plate_ID] = None
        continue
    oofv, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                            objective_weights,
                                            uptake_addition=condition['Add'], uptake_removal=condition['Remove'],)

    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp5_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp5_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    cp5_oofv[plate_ID] = oofv
    df_all_for_compare = df_all.copy()
    print(f"""Finished enviornment: {condition_name} with objective function value: {oofv}""")

Finished enviornment: A1 - nitrogen negative control- MIX0-81 with objective function value: 31572.06811338655
Finished enviornment: A2 - AMMONIUM - MIX0-590 with objective function value: 15311.33130289018
Finished enviornment: A3 - Nitrite - MIX0-591 with objective function value: 17794.492060439807
Finished enviornment: A4 - Nitrate - MIX0-592 with objective function value: 17794.961470164886
Finished enviornment: A5 - Urea - MIX0-593 with objective function value: 31572.068113341636
Finished enviornment: A7 - L-Alanine - MIX0-595  with objective function value: 16714.006202207907
Finished enviornment: A8 - L-Arginine - MIX0-596 with objective function value: 16886.82759962156
Finished enviornment: A9 - L-Asparagine - MIX0-597 with objective function value: 17917.057843669376
Finished enviornment: A10 - L-Aspartic acid - MIX0-598 with objective function value: 20979.20650391817
Finished enviornment: A11 - L-Cysteine - MIX0-599 with objective function value: 30365.16352729432
Finishe

### Implement the changes added through Microarray 4 (Phosphate/Sulfur Sole Source) (PM4 on EcoCyc)

In [10]:
#phosphate and sulfur conditions PM4 - 96 conditions 84 used in the model
conditions= {
    'A1 - phosphorus negative control- MIX0-83': {
        'Add': set([]),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A2 - Phosphate - MIX0-672': {
        'Add': set(['Pi[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A3 - Pyrophosphate - MIX0-673': {
        'Add': set(['PPI[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A4 - Trimetaphosphate - MIX0-674': {
        'Add': set(['CPD-610[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A5 - Tripolyphosphate - MIX0-675': {
        'Add': set(['P3I[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A6 - Triethyl Phosphate - MIX0-676': None, # not in the model
    'A7 - Hypophosphite - MIX0-758 ': {
        'Add': set(['CPD-27[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A8 - Adenosine 2- Monophosphate - MIX0-677': {
        'Add': set(['CPD-3705[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A9 - Adenosine 3- Monophosphate - MIX0-759': {
        'Add': set(['CPD-3706[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A10 - Adenosine 5- Monophosphate - MIX0-678': {
        'Add': set(['AMP[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A11 - Adenosine 2,3-Cyclic Monophosphate - MIX0-679': {
        'Add': set(['CPD-3707[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'A12 - Adenosine 3,5-Cyclic Monophosphate - MIX0-760': {
        'Add': set(['CAMP[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B1 - Thiophosphate - MIX0-680': None, # not in the model
    'B2 - Dithiophosphate - MIX0-681': None, # not in the model
    'B3 - DL-α- Glycerol Phosphate - MIX0-682': {
        'Add': set(['GLYCEROL-3P[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B4 - β- Glycerol Phosphate - MIX0-683': {
        'Add': set(['CPD-536[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B5 - Carbamyl Phosphate - MIX0-684': {
        'Add': set(['CARBAMOYL-P[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B6 - D-2-Phospho- Glyceric acid - MIX0-761': {
        'Add': set(['2-PG[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B7 - D-3-Phospho- Glyceric acid - MIX0-762': {
        'Add': set(['G3P[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B8 - Guanosine 2- Monophosphate - MIX0-685': {
        'Add': set(['CPD-13025[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B9 - Guanosine 3- Monophosphate - MIX0-686': {
        'Add': set(['CPD-3708[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B10 - Guanosine 5- Monophosphate - MIX0-687': {
        'Add': set(['GMP[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'B11 - Guanosine 2,3-Cyclic Monophosphatee - MIX0-688': {
        'Add': set(['CPD-3709[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'B12 - Guanosine 3,5-Cyclic Monophosphate - MIX0-689': {
        'Add': set(['CGMP[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C1 - Phosphoenol Pyruvate - MIX0-690': {
        'Add': set(['PHOSPHO-ENOL-PYRUVATE[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C2 - Phospho- Glycolic acid - MIX0-691': {
        'Add': set(['CPD-67[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C3 - D-Glucose- 1-Phosphate - MIX0-692': None, #not in the model
    'C4 - D-Glucose- 6-Phosphate - MIX0-693': {
        'Add': set(['Glucose-6-phosphate[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C5 - 2-Deoxy-D- Glucose 6- Phosphate - MIX0-694' : {
        'Add': set(['2-DEOXY-D-GLUCOSE-6-PHOSPHATE[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'C6 - D-Glucosamine- 6-Phosphate - MIX0-695': {
        'Add': set(['D-GLUCOSAMINE-6-P[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C7 - 6-Phospho- Gluconic acid - MIX0-696': {
        'Add': set(['CPD-2961[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C8 - Cytidine 2- Monophosphate - MIX0-697': {
        'Add': set(['CPD-3710[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C9 - Cytidine 3- Monophosphate - MIX0-698': {
        'Add': set(['CPD-3711[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C10 - Cytidine 5- Monophosphate - MIX0-699': {
        'Add': set(['CMP[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C11 - Cytidine 2,3-Cyclic Monophosphate - MIX0-700': {
        'Add': set(['CPD-3713[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'C12 - Cytidine 3,5-Cyclic Monophosphate - MIX0-701': {
        'Add': set(['CPD-683[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },

    'D1 - D-Mannose- 1-Phosphate - MIX0-702': {
        'Add': set(['MANNOSE-1P[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'D2 - D-Mannose- 6-Phosphate - MIX0-703': {
        'Add': set(['MANNOSE-6P[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D3 - Cysteamine- S-Phosphate - MIX0-704': {
        'Add': set(['CPD-3721[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D4 - Phospho- L-Arginine - MIX0-705': {
        'Add': set(['L-ARGININE-P[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D5 - O-Phospho- D-Serine - MIX0-706': {
        'Add': set(['CPD-3722[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D6 - O-Phospho- L-Serine - MIX0-707': {
        'Add': set(['3-P-SERINE[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'D7 - O-Phospho- L-Threonine - MIX0-708': {
        'Add': set(['L-THREONINE-O-3-PHOSPHATE[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'D8 - Uridine 2- Monophosphate - MIX0-709': {
        'Add': set(['CPD-3723[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'D9 - Uridine 3- Monophosphate - MIX0-710': {
        'Add': set(['CPD-3724[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D10 - Uridine 5- Monophosphate - MIX0-711': {
        'Add': set(['UMP[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D11 - Uridine 2,3-Cyclic Monophosphate - MIX0-712': {
        'Add': set(['CPD-3725[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'D12 - Uridine 3,5-Cyclic Monophosphate - MIX0-713': {
        'Add': set(['CPD-3727[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'E1 - O-Phospho- D-Tyrosine - MIX0-714': {
        'Add': set(['CPD-3729[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'E2 - O-Phospho- L-Tyrosine - MIX0-715': {
        'Add': set(['CPD-3728[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'E3 - Phosphocreatine - MIX0-716': {
        'Add': set(['CREATINE-P[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'E4 - Phosphoryl Choline - MIX0-717': {
        'Add': set(['PHOSPHORYL-CHOLINE[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },

    'E5 - O-Phosphoryl- Ethanolamine - MIX0-718': {
        'Add': set(['PHOSPHORYL-ETHANOLAMINE[p]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },

    'E6 - Phosphono Acetic acid - MIX0-719': {
        'Add': set(['CPD-764[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },

    'E7 - 2-Aminoethyl Phosphonic acid - MIX0-720': {
        'Add': set(['CPD-1106[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'E8 - Methylene Diphosphonic acid - MIX0-721': {
        'Add': set(['CPD-3731[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'E9 - Thymidine 3- Monophosphate - MIX0-722': {
        'Add': set(['CPD-3732[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'E10 - Thymidine 5- Monophosphate - MIX0-723': {
        'Add': set(['TMP[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
    'E11 - Inositol Hexaphosphate - MIX0-763': {
        'Add': set(['MI-HEXAKISPHOSPHATE[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),

    },
    'E12 - Thymidine 3,5-Cyclic Monophosphate - MIX0-724': {
        'Add': set(['CPD-3733[e]']),
        'Remove': set(['Pi[p]','CA+2[p]']),
    },
     'F1 - sulfur negative control - MIX0-82': {
        'Add': set([]),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F2 - Sulfate - MIX0-725': {
        'Add': set(['SULFATE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F3 - Thiosulfate  - MIX0-726': {
        'Add': set(['S2O3[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F4 - Tetrathionate - MIX0-727': {
        'Add': set(['CPD-14[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F5 - Thiophosphate - MIX0-728': None, #not in the model

    'F6 - Dithiophosphate - MIX0-729': None, #not in the model

    'F7 - L-Cysteine - MIX0-730': {
        'Add': set(['CYS[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F8 - D-Cysteine - MIX0-731': {
        'Add': set(['D-CYSTEINE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F9 - Cys-Gly - MIX0-764': {
        'Add': set(['CYS-GLY[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F10 - L-Cysteic acid - MIX0-732': {
        'Add': set(['L-CYSTEATE[p]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F11 - Cysteamine - MIX0-733': {
        'Add': set(['CPD-239[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'F12 - L-Cysteine Sulfinic acid - MIX0-734': {
        'Add': set(['3-SULFINOALANINE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },

    'G1 - N-Acetyl- L-Cysteine - MIX0-735': {
        'Add': set(['CPD-9175[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G2 - S-Methyl- L-Cysteine - MIX0-736': {
        'Add': set(['S-METHYL-L-CYSTEINE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G3 - Cystathionine - MIX0-737': {
        'Add': set(['L-CYSTATHIONINE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G4 - Lanthionine - MIX0-738': {
        'Add': set(['CPD-3736[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G5 - Glutathione - MIX0-739': {
        'Add': set(['GLUTATHIONE[p]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G6 - DL-Ethionine - MIX0-756': {
        'Add': set(['ETHIONINE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },

    'G7 - L-Methionine - MIX0-740': {
        'Add': set(['MET[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),
    },
    'G8 - D-Methionine - MIX0-741': {
        'Add': set(['CPD-218[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G9 - Gly-Met - MIX0-765': {
        'Add': set(['CPD-13393[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G10 - N-Acetyl-D,L- Methionine - MIX0-742': None, #not in the model
    'G11 - L-Methionine Sulfoxide - MIX0-743': {
        'Add': set(['L-Methionine-sulfoxides[p]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'G12 - L-Methionine Sulfone - MIX0-744': {
        'Add': set(['CPD-3739[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H1 - L-Djenkolic acid - MIX0-745': {
        'Add': set(['CPD-3740[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H2 - Thiourea - MIX0-746': None, #not in the model
    'H3 - 1-Thio- β-D- Glucose - MIX0-747': None, #not in the model
    'H4 - DL-Lipoamide - MIX0-748': None, #not in the model
    'H5 - Taurocholic acid - MIX0-749': None, #not in the model
    'H6 - Taurine - MIX0-750': {
        'Add': set(['TAURINE[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),
    },
    'H7 - Hypotaurine - MIX0-751': {
        'Add': set(['HYPOTAURINE[p]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H8 - p-Aminobenzene Sulfonic acid - MIX0-757': {
        'Add': set(['CPD-10427[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H9 - Butane Sulfonic acid - MIX0-752': {
        'Add': set(['CPD-3744[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H10 - 2-Hydroxyethane Sulfonic acid - MIX0-753': {
        'Add': set(['CPD-3745[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H11 - Methane Sulfonic acid - MIX0-754': {
        'Add': set(['CPD-3746[e]']),
        'Remove': set(['SULFATE[p]', 'CA+2[p]']),

    },
    'H12 - Tetramethylene Sulfone - MIX0-767': None,
}

In [11]:
# run tests in order to add microarray 4 to the previous checkpoint (cp4+microarray2)
cp6_oofv = dict({})
plateID_to_condition_sulfur_phos = dict({})
for condition_name, condition in conditions.items():

    # store in dictionary the optimal objective function value
    temp = condition_name.split(' - ')
    plate_ID = temp[0]
    carbon_source = temp[1]
    plateID_to_condition_sulfur_phos[plate_ID] = carbon_source

    # solve the cvxpy problem
    objective_weights = {'secretion': 0.001, 'efficiency': 0.000001, 'kinetics': 0.00001}
    if condition == None:
        cp5_oofv[plate_ID] = None
        continue
    oofv, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                            objective_weights,
                                            uptake_addition=condition['Add'], uptake_removal=condition['Remove'],)

    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp6_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp6_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    cp6_oofv[plate_ID] = oofv
    df_all_for_compare = df_all.copy()
    print(f"""Finished enviornment: {condition_name} with objective function value: {oofv}""")

Finished enviornment: A1 - phosphorus negative control- MIX0-83 with objective function value: 20906.195335937577
Finished enviornment: A2 - Phosphate - MIX0-672 with objective function value: 20906.195335937577
Finished enviornment: A3 - Pyrophosphate - MIX0-673 with objective function value: 19722.223915677423
Finished enviornment: A4 - Trimetaphosphate - MIX0-674 with objective function value: 20906.195335937577
Finished enviornment: A5 - Tripolyphosphate - MIX0-675 with objective function value: 19822.46231120835
Finished enviornment: A7 - Hypophosphite - MIX0-758  with objective function value: 20906.195335937577
Finished enviornment: A8 - Adenosine 2- Monophosphate - MIX0-677 with objective function value: 17882.37420493563
Finished enviornment: A9 - Adenosine 3- Monophosphate - MIX0-759 with objective function value: 17882.313766052463
Finished enviornment: A10 - Adenosine 5- Monophosphate - MIX0-678 with objective function value: 17882.37398695013
Finished enviornment: A11 - Ad

In [12]:
df_all_for_compare

Unnamed: 0,0,sim_cp5_A1 - nitrogen negative control- MIX0-81,sim_cp5_A2 - AMMONIUM - MIX0-590,sim_cp5_A3 - Nitrite - MIX0-591,sim_cp5_A4 - Nitrate - MIX0-592,sim_cp5_A5 - Urea - MIX0-593,sim_cp5_A7 - L-Alanine - MIX0-595,sim_cp5_A8 - L-Arginine - MIX0-596,sim_cp5_A9 - L-Asparagine - MIX0-597,sim_cp5_A10 - L-Aspartic acid - MIX0-598,...,sim_cp6_G9 - Gly-Met - MIX0-765,sim_cp6_G11 - L-Methionine Sulfoxide - MIX0-743,sim_cp6_G12 - L-Methionine Sulfone - MIX0-744,sim_cp6_H1 - L-Djenkolic acid - MIX0-745,sim_cp6_H6 - Taurine - MIX0-750,sim_cp6_H7 - Hypotaurine - MIX0-751,sim_cp6_H8 - p-Aminobenzene Sulfonic acid - MIX0-757,sim_cp6_H9 - Butane Sulfonic acid - MIX0-752,sim_cp6_H10 - 2-Hydroxyethane Sulfonic acid - MIX0-753,sim_cp6_H11 - Methane Sulfonic acid - MIX0-754
1-ACYLGLYCEROL-3-P-ACYLTRANSFER-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
1.1.1.127-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
1.1.1.127-RXN (reverse),0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
1.1.1.215-RXN (reverse),0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
1.1.1.251-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XYLULOKIN-RXN-CPD-24961/ATP//XYLULOSE-5-PHOSPHATE/ADP/PROTON.47.,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
YIAE1-RXN (reverse),0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
glycogen-monomer-extension,9.005122e+03,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,...,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0,8662.0
DISULFOXRED-RXN[CCO-PERI-BAC]-MONOMER0-4152/MONOMER0-4438//MONOMER0-4438/MONOMER0-4152.71.DEPHOSICITDEHASE-RXN,0.000000e+00,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0


In [13]:
#add all of the new reactions to df_all
df_all_for_compare['is_new'] = 'Old Reactions'
df_all_for_compare.loc[ADDED_RXNS, 'is_new'] = 'Heena\'s Reactions'
df_all_for_compare.loc[fba_new_reaction_ids, 'is_new'] = 'New Reactions'

df_all_for_compare['kinetic'] = [kinetic[r] if r in kinetic_reaction_ids else False for r in df_all.index]

In [14]:
%store -r df_gene_usage genes_to_enzymes df_all_gene_usage new_genes

In [15]:
%store -r df_all_gene_usage df_all_gene_usage_cp1 df_all_gene_usage_cp2 df_all_gene_usage_cp3_GLOP df_all_gene_usage_cp4_w_cp23

In [16]:
condition_names

['sim_cp5_A1 - nitrogen negative control- MIX0-81',
 'sim_cp5_A2 - AMMONIUM - MIX0-590',
 'sim_cp5_A3 - Nitrite - MIX0-591',
 'sim_cp5_A4 - Nitrate - MIX0-592',
 'sim_cp5_A5 - Urea - MIX0-593',
 'sim_cp5_A7 - L-Alanine - MIX0-595 ',
 'sim_cp5_A8 - L-Arginine - MIX0-596',
 'sim_cp5_A9 - L-Asparagine - MIX0-597',
 'sim_cp5_A10 - L-Aspartic acid - MIX0-598',
 'sim_cp5_A11 - L-Cysteine - MIX0-599',
 'sim_cp5_A12 - L-Glutamic acid - MIX0-600',
 'sim_cp5_B1 - L-Glutamine - MIX0-601',
 'sim_cp5_B2 - Glycine - MIX0-602',
 'sim_cp5_B3 - L-Histidine - MIX0-603',
 'sim_cp5_B4 - L-Isoleucine - MIX0-604',
 'sim_cp5_B5 - L-Leucine - MIX0-605',
 'sim_cp5_B6 - L-Lysine - MIX0-606',
 'sim_cp5_B7 - L-Methionine - MIX0-607',
 'sim_cp5_B8 - L-Phenylalanine - MIX0-608',
 'sim_cp5_B9 - L-Proline - MIX0-609',
 'sim_cp5_B10 - L-Serine - MIX0-610',
 'sim_cp5_B11 - L-Threonine - MIX0-611',
 'sim_cp5_B12 - L-Tryptophan - MIX0-612',
 'sim_cp5_C1 - L-Tyrosine - MIX0-613',
 'sim_cp5_C2 - L-Valine - MIX0-614',
 'sim

In [17]:
conditions_previous = ['basal', 'acetate', 'rich', 'anaerobic_basal', 'anaerobic_acetate', 'anaerobic_rich']
conditions_all = conditions_previous + condition_names

df_all_gene_usage = df_all_gene_usage.copy()

gene_used_before = df_gene_usage['is_used']
all_gene_used_before = df_all_gene_usage['is_used']

df_all_gene_usage = df_all_gene_usage.drop(['is_used'], axis = 1)
df_all_gene_usage_microarray34 = df_all_gene_usage.copy()
reaction_catalysts = metabolism.parameters["reaction_catalysts"]

for condition in condition_names:
    df_all_gene_usage_microarray34[condition] = 0
    new_reaction_usage = df_all_for_compare.loc[fba_new_reaction_ids, condition]
    for rxn in fba_new_reaction_ids:
        is_used = new_reaction_usage.loc[rxn]
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                if is_used:
                    df_all_gene_usage_microarray34.loc[gene, condition] += 1
                if rxn in kinetic_reaction_ids:
                    df_all_gene_usage_microarray34.loc[gene, 'has_kinetic'] = True


# tally usage
df_all_gene_usage_microarray34['is_used'] = np.any(df_all_gene_usage_microarray34[conditions_all] > 0, axis=1)
df_gene_usage_microarray34 = df_all_gene_usage_microarray34.loc[new_genes,:]

  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[condition] = 0
  df_all_gene_usage_microarray34[conditi

In [18]:
%store -r df_all_gene_usage df_all_gene_usage_cp1 df_all_gene_usage_cp2 df_all_gene_usage_cp3_w_cp2 df_all_gene_usage_cp4_w_cp23

no stored variable or alias df_all_gene_usage_cp3_w_cp2


In [33]:
df_all_gene_usage_cp4_w_cp23

Unnamed: 0_level_0,Enzyme encoded,Reactions,has_kinetic,basal,acetate,rich,anaerobic_basal,anaerobic_acetate,anaerobic_rich,sim_cp3_B2 - N-Acetyl- Neuraminic acid - MIX0-515,...,"sim_cp3_H11 - 2,3-Butanedione - MIX0-769",is_used,sim_cp2_EG10593-α-D-galactopyranose,sim_cp2_EG11869-2-dehydro-3-deoxy-D-gluconate,sim_cp2_EG12495-L-ascorbate,sim_cp2_EG20053-D-galactonate,sim_cp2_EG11700-1-(β-D ribofuranosyl)nicotinamide,sim_cp2_EG12281-L-glutamate,sim_cp2_EG10406-L-glutamate,is_used_combined
Gene ID (EcoCyc),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
EG11679,AAS-MONOMER,"[ACYLGPEACYLTRANS-RXN, RXN-5741, RXN0-5513]",False,0,0,0,0,0,0,0,...,0,False,0,0,0,0,0,0,0,True
EG11647,ACETYL-COA-CARBOXYLTRANSFER-CPLX,[RXN0-5055],False,1,1,1,1,0,0,0,...,0,True,0,0,0,0,0,0,0,True
EG10276,BIOTIN-CARBOXYL-CPLX,[BIOTIN-CARBOXYL-RXN],True,1,1,1,1,0,0,0,...,0,True,0,0,0,0,0,0,0,True
EG10217,ACETYL-COA-CARBOXYLTRANSFER-CPLX,[RXN0-5055],False,1,1,1,1,0,0,0,...,0,True,0,0,0,0,0,0,0,True
EG10022,ISOCIT-LYASE,[ISOCIT-CLEAV-RXN],False,1,1,0,0,1,0,0,...,1,True,0,0,1,0,1,0,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EG12215,YHHO-MONOMER,"[RXN0-5205, 3.6.3.3-RXN, TRANS-RXN-452]",False,0,0,0,0,0,0,0,...,0,False,0,0,0,0,0,0,0,False
G7017,ABC-63-CPLX,[ABC-63-RXN],False,0,0,0,0,0,0,0,...,0,False,0,0,0,0,0,0,0,False
G7018,ABC-63-CPLX,[ABC-63-RXN],False,0,0,0,0,0,0,0,...,0,False,0,0,0,0,0,0,0,False
EG11167,CPLX0-8617,"[RXN0-12, TRANS-RXN-8, RXN0-16, RXN0-10, TRANS...",False,3,3,3,3,2,0,0,...,0,True,0,0,0,0,0,0,0,True


In [19]:
# previous % new gene usage
num_genes = len(gene_used_before)
all_gene_usage_cp4_w_microarray3 = df_all_gene_usage_cp4_w_cp23['is_used_combined'] | df_all_gene_usage_microarray34['is_used']
new_gene_usage_cp4_w_microarray3 = all_gene_usage_cp4_w_microarray3[new_genes]

perc_gene_usage_prev = sum(gene_used_before)/num_genes #baseline from before all new checkpoints were implemented
perc_gene_usage_all_micro = sum(all_gene_usage_cp4_w_microarray3)/len(all_gene_usage_cp4_w_microarray3)
perc_gene_usage_new_micro  = sum(all_gene_usage_cp4_w_microarray3.loc[new_genes])/num_genes

print(f'% new genes usage before checkpoints is: {perc_gene_usage_prev: 0.2%}')
print(f'% ALL genes usage added through microarray 3 and 4 are: {perc_gene_usage_all_micro: 0.2%}')
print(f'% NEW genes enabled at microarray 3 and 4 are: {perc_gene_usage_new_micro: 0.2%}')


% new genes usage before checkpoints is:  4.56%
% ALL genes usage added through microarray 3 and 4 are:  54.21%
% NEW genes enabled at microarray 3 and 4 are:  31.60%


## Implement Checkpoints Easy Fixes

In [47]:
#checkpoint conditions added that were easy fixes
conditions = {
     'EG10615-D-Mannitol and D-Sorbitol':{
        'Add': set(['MANNITOL[e]', 'SORBITOL[e]']),
        'Remove': set([]),
    },
    'G6135-S-methyl-L-methionine':{
        'Add': set(['CPD-397[e]']),
        'Remove': set([]),
    },
    'EG10685-(R)-pantothenate':{
        'Add': set(['PANTOTHENATE[p]']),
        'Remove': set([]),
    },
    'EG12908-N6-(D-psicosyl)-L-lysine and N6-(1-deoxy-D-fructos-1-yl)-L-lysine':{
        'Add': set(['PSICOSELYSINE[p]', 'FRUCTOSELYSINE[p]']),
        'Remove': set([]),
    },
    'EG11646-pseudouridine':{
        'Add': set(['CPD-497[e]']),
        'Remove': set([]),
    },
    'G7067-shikimate':{
        'Add': set(['SHIKIMATE[e]']),
        'Remove': set([]),
    },
    'G6566-thiamine':{
        'Add': set(['THIAMINE[e]']),
        'Remove': set([]),
    },
    'EG10593-α-D-galactopyranose': {
        'Add': set(['ALPHA-D-GALACTOSE[e]']),
        'Remove': set(['GLC[p]']),
        'Remove Reaction': ['TRANS-RXN-21'] #might also need to find solution to the "disabling" will i need to list them all out here
    },
    'EG11869-2-dehydro-3-deoxy-D-gluconate': {
        'Add': set(['2-DEHYDRO-3-DEOXY-D-GLUCONATE[p]']),
        'Remove': set(['GLC[p]']),
    },
    'EG12495-L-ascorbate': {
        'Add': set(['ASCORBATE[p]']),
        'Remove': set([]),

    },
    'EG20053-D-galactonate':{
        'Add': set(['D-GALACTONATE[e]']),
        'Remove': set([]),

    },
    'EG11700-1-(β-D ribofuranosyl)nicotinamide':{
        'Add': set(['NICOTINAMIDE_RIBOSE[p]']),
        'Remove': set(['GLC[p]']),
    },
    'EG12281-L-glutamate':{
        'Add': set(['GLT[p]']),
        'Remove': set(['GLC[p]']),
        'Remove Reaction': ['TRANS-RXN-261', 'TRANS-RXN-16']
    },
    'EG10406-L-glutamate':{
        'Add': set(['GLT[p]']),
        'Remove': set(['GLC[p]']),
        'Remove Reaction': ['TRANS-RXN-261', 'TRANS-RXN-162']
    },
}



In [52]:
#add all of the conditions from above into the df_all
condition_names = []
for condition_name, condition in conditions.items():
    
    if 'Remove Reaction' in condition:
        remove_reaction = condition['Remove Reaction']
    else:
        remove_reaction = None
    
    if 'Add Demand' in condition:
        demand = condition['Add Demand']
    else:
        demand = None
    
    objective_weights = {'secretion': 0.001, 'efficiency': 0.000001, 'kinetics': 0.00001}
    
    _, solution_flux, test_reaction_names, S_new, test_metabolites, test_kinetic = test_NetworkFlowModel(
                                        objective_weights, uptake_addition=condition['Add'],
                                        uptake_removal=condition['Remove'],
                                        add_homeostatic_demand=demand,
                                        remove_reaction=remove_reaction,

    )    
    
    # get the fluxes
    sim_flux = pd.DataFrame({f'sim_cp4+_{condition_name}': solution_flux}, index = test_reaction_names)
    condition_names.append(f'sim_cp4+_{condition_name}')
    df_all = pd.concat([df_all, sim_flux], axis=1)
    
    print(f"""Finished enviornment: {condition_name}""")

Optimal objective function reached is: 17761.59048753839
Finished enviornment: EG10615-D-Mannitol and D-Sorbitol
Optimal objective function reached is: 19209.495131579297
Finished enviornment: G6135-S-methyl-L-methionine
Optimal objective function reached is: 19264.998570216998
Finished enviornment: EG10685-(R)-pantothenate
Optimal objective function reached is: 18147.433605300535
Finished enviornment: EG12908-N6-(D-psicosyl)-L-lysine and N6-(1-deoxy-D-fructos-1-yl)-L-lysine
Optimal objective function reached is: 18934.385270227325
Finished enviornment: EG11646-pseudouridine
Optimal objective function reached is: 19100.690186239903
Finished enviornment: G7067-shikimate
Optimal objective function reached is: 19271.892551722536
Finished enviornment: G6566-thiamine
Optimal objective function reached is: 19481.54508541614
Finished enviornment: EG10593-α-D-galactopyranose
Optimal objective function reached is: 18204.80611587953
Finished enviornment: EG11869-2-dehydro-3-deoxy-D-gluconate
Opt

In [53]:
df_all['is_new'] = 'Old Reactions'
df_all.loc[fba_new_reaction_ids, 'is_new'] = 'New Reactions'
df_all.loc[ADDED_RXNS, 'is_new'] = 'Heena\'s Reactions'

df_all['kinetic'] = [kinetic[r] if r in kinetic_reaction_ids else False for r in df_all.index]


# Calculate % Gene Usage with cp5 and Microarray 3 and 4

In [23]:
%store -r df_gene_usage genes_to_enzymes df_all_gene_usage new_genes df_all_gene_usage_cp4_w_cp23

In [24]:
# add checkpoint 5 to microarray 3 and 4: add easy fixes above
df_all_gene_usage_cp5_w_m3 = df_all_gene_usage_microarray34.copy()
reaction_catalysts = metabolism.parameters["reaction_catalysts"]
conditions_all = conditions_all + condition_names

for condition in condition_names:
    df_all_gene_usage_cp5_w_m3[condition] = 0
    new_reaction_usage = df_all.loc[fba_new_reaction_ids, condition]
    for rxn in fba_new_reaction_ids:
        is_used = new_reaction_usage.loc[rxn]
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                if is_used:
                    df_all_gene_usage_cp5_w_m3.loc[gene, condition] += 1
                if rxn in kinetic_reaction_ids:
                    df_all_gene_usage_cp5_w_m3.loc[gene, 'has_kinetic'] = True


# tally usage
df_all_gene_usage_cp5_w_m3['is_used'] = np.any(df_all_gene_usage_cp5_w_m3[conditions_all] > 0, axis=1)
df_gene_usage_cp5_w_m3 = df_all_gene_usage_cp5_w_m3.loc[new_genes,:]

In [25]:
%store -r df_all_gene_usage df_all_gene_usage_cp1 df_all_gene_usage_cp2 df_all_gene_usage_cp3_GLOP df_all_gene_usage_cp4_w_cp23 

In [36]:
num_genes = len(gene_used_before)

perc_gene_usage_prev = sum(gene_used_before)/num_genes
all_gene_usage_cp5_w_microarray3 = all_gene_usage_cp4_w_microarray3 | df_all_gene_usage_cp5_w_m3['is_used'] | df_all_gene_usage_cp1['is_used'] | df_all_gene_usage_cp2['is_used']
new_gene_usage_cp5_w_microarray3 = all_gene_usage_cp5_w_microarray3[new_genes]

perc_gene_usage_all_cp5= sum(all_gene_usage_cp5_w_microarray3)/len(all_gene_usage_cp5_w_microarray3)
perc_gene_usage_new_cp5  = sum(all_gene_usage_cp5_w_microarray3.loc[new_genes])/num_genes

print(f'% new genes usage before checkpoint 1 and 2 are: {perc_gene_usage_prev: 0.2%}')
print(f'%new genes enabled by microarray 3 and 4 are: {perc_gene_usage_new_micro: 0.2%}')
print(f'% new genes usage at checkpoint 5 are: {perc_gene_usage_new_cp5: 0.2%}')
print(f'% all genes usage at checkpoint 5 with microarray 3 and 4 are: {perc_gene_usage_all_cp5: 0.2%}')

% new genes usage before checkpoint 1 and 2 are:  4.56%
%new genes enabled by microarray 3 and 4 are:  31.60%
% new genes usage at checkpoint 5 are:  34.53%
% all genes usage at checkpoint 5 with microarray 3 and 4 are:  54.93%


In [44]:
df_all_gene_usage_cp2['sim'].loc['G6286']

0

In [54]:
genes_of_interest = ["EG10615", "G6135", "EG10685", "EG12908", "EG11646", "G7067", "G6566", "EG11787"]
all_gene_usage_cp5_w_microarray3[genes_of_interest]

Gene ID (EcoCyc)
EG10615     True
G6135       True
EG10685     True
EG12908     True
EG11646     True
G7067       True
G6566       True
EG11787    False
dtype: bool

In [35]:
all_gene_usage_cp5_w_microarray3[new_genes]

Gene ID (EcoCyc)
EG10022     True
EG10023     True
EG11942    False
EG11724     True
EG12462    False
           ...  
G7248      False
G7408       True
EG12517    False
EG12518    False
EG12520    False
Length: 307, dtype: bool

In [None]:
ids_to_check = [
    "EG11557", "EG11559", "EG11560", "EG11561", "EG11787", "EG11788", "EG10144",
    "EG10140", "EG10141", "EG10142", "G7096", "G7097", "G7099", "G7100", "G7102",
    "G7103", "G7104", "EG10177", "EG10175", "EG10160", "EG11327", "EG11871",
    "EG11104", "EG11469", "EG10556", "EG12495", "G7855", "EG10522", "EG11869",
    "EG20051", "EG20053", "GB4478", "EG10592", "EG11700", "EG12281", "EG10401",
    "EG10406", "EG12282", "EG12283", "EG12494", "G7856", "EG12522", "EG10615",
    "G6135", "EG10685", "EG12908", "EG11646", "G6518", "G7067", "G6347",
    "EG10953", "EG10954", "G6217", "G6218", "G6566", "EG11574", "G6219",
    "EG11573", "EG11572"
]
column_name = "Gene ID (EcoCyc)"  # <-- change this to your actual column name

# Filter rows where the column value is in ids_to_check
matches_df = all_gene_usage_cp5_w_microarray3[all_gene_usage_cp5_w_microarray3.index.isin(ids_to_check)]

# Save results if needed
# matches_df.to_csv("matched_rows.csv", index=False)

print(f"Found {len(matches_df)} matches:")
print(matches_df)

In [None]:
matches_df[matches_df]

In [None]:
df_diff = new_gene_usage_cp5_w_microarray3.compare(new_gene_usage_cp4_w_microarray2)

df_diff

# Plot 1: Create histogram to track gene usage

In [39]:
# % new gene usage
df_gene_usage = df_all_gene_usage.loc[new_genes,:]
df_gene_usage_cp1 = df_all_gene_usage_cp1.loc[new_genes,:]
df_gene_usage_cp2 = df_all_gene_usage_cp2.loc[new_genes,:]
df_gene_usage_cp3_w_cp2 = df_all_gene_usage_cp3_GLOP.loc[new_genes,:]
df_gene_usage_cp4_w_cp3 = df_all_gene_usage_cp4_w_cp23.loc[new_genes,:]

all_gene_usage_cp5_w_microarray3 = all_gene_usage_cp4_w_microarray3 | df_all_gene_usage_cp5_w_m3['is_used'] | df_all_gene_usage_cp1['is_used'] | df_all_gene_usage_cp2['is_used'] 
new_gene_usage_cp5_w_microarray3 = all_gene_usage_cp5_w_microarray3[new_genes]

num_genes = len(df_gene_usage)
perc_gene_usage_basal = sum(df_gene_usage_cp2.basal > 0)/num_genes * 100
perc_gene_usage_basic_conditions = sum(gene_used_before)/num_genes * 100
perc_gene_usage_cp1  = sum(df_gene_usage_cp1['is_used'])/num_genes * 100
perc_gene_usage_cp2  = sum(df_gene_usage_cp2['is_used'])/num_genes * 100
perc_gene_usage_cp3  = sum(df_gene_usage_cp3_w_cp2['is_used'])/num_genes * 100
perc_gene_usage_cp4 = sum(df_gene_usage_cp4_w_cp3['is_used_combined'])/num_genes * 100 #bc one columned df
perc_gene_usage_cp5 = sum(new_gene_usage_cp5_w_microarray3)/num_genes * 100

perc = [perc_gene_usage_basal, perc_gene_usage_basic_conditions, perc_gene_usage_cp1, perc_gene_usage_cp2, perc_gene_usage_cp3, perc_gene_usage_cp4, perc_gene_usage_cp5]
x_label = ['Basal', 'Basal, Acetate, Rich, Anaerobic', 'Checkpoint 1', 'Checkpoint 2', 'Checkpoint 3 (CP2 + Microarray 1)', 'Checkpoint 4 (CP2 + Microarray 1,2)', 'Checkpoint 5 (All microarrays + easy fixes)']

In [40]:
# % all gene usage
num_genes = len(df_all_gene_usage)
perc_all_gene_usage_basal = sum(df_all_gene_usage_cp2.basal > 0)/num_genes * 100
perc_all_gene_usage_basic_conditions = sum(df_all_gene_usage['is_used'])/num_genes * 100
perc_all_gene_usage_cp1  = sum(df_all_gene_usage_cp1['is_used'])/num_genes * 100
perc_all_gene_usage_cp2  = sum(df_all_gene_usage_cp2['is_used'])/num_genes * 100
perc_all_gene_usage_cp3_w_cp2  = sum(df_all_gene_usage_cp3_GLOP['is_used'])/num_genes * 100
perc_all_gene_usage_cp4 = sum(df_all_gene_usage_cp4_w_cp23['is_used_combined'])/num_genes * 100
perc_all_gene_usage_cp5 = sum(all_gene_usage_cp5_w_microarray3)/num_genes * 100

perc_all = [perc_all_gene_usage_basal, perc_all_gene_usage_basic_conditions, perc_all_gene_usage_cp1, perc_all_gene_usage_cp2, perc_all_gene_usage_cp3_w_cp2, perc_all_gene_usage_cp4,perc_all_gene_usage_cp5]

In [41]:
# Create a DataFrame
df = pd.DataFrame({
    "Condition": x_label,
    "New Metabolic Gene": perc,
    "All Metabolic Gene": perc_all,
})

# Reshape to long format
df_long = df.melt(id_vars="Condition",
                  value_vars=["New Metabolic Gene", "All Metabolic Gene"],
                  var_name="Gene Group",
                  value_name="Percent Usage")

# Create grouped bar plot
fig = px.bar(
    df_long,
    x="Condition",
    y="Percent Usage",
    color="Gene Group",              # creates a legend
    barmode="group",                 # side-by-side bars
    text="Percent Usage",
    title="New vs All Metabolic Gene Usage by Condition",
    labels={"Percent Usage": "Percent gene usage (%)"},
    color_discrete_map={
        "New Metabolic Gene": "#4C78A8",
        "All Metabolic Gene": "#c26426"
    }
)

# Customize appearance
fig.update_traces(texttemplate='%{text:.2f}%', textposition='outside')
fig.update_layout(
    paper_bgcolor='rgba(255, 255, 255, 0)',
    yaxis_title="Percent gene usage (%)",
    yaxis=dict(range=[0, 60]),
    xaxis_title=None,
    font_color = 'black',
    xaxis_tickangle=-35,
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(t=50, b=50, l=50, r=50),
    width=1000,   # width in pixels
    height=650
)

fig.show(renderer='browser')
# save
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/gene_usage_histogram_cp3.png", scale=5, width=800, height=500)

In [None]:
pip install --upgrade kaleido

## Histogram showing optimal Objective Function Value Distribution

In [None]:
%store cp5_oofv cp6_oofv

In [None]:
%store -r cp4_oofv

In [None]:
cp6_oofv

In [None]:
cp5_oofv

In [None]:
cp4_oofv

In [None]:
pre_master = list(cp5_oofv.values()) + list(cp6_oofv.values())
master = list(pre_master) + list(cp4_oofv.values())

In [None]:
master
master_basal, _, _, _, _, _ = test_NetworkFlowModel(objective_weights)

In [None]:
master

In [None]:
type(master)
new_list = [x if x is not None else np.nan for x in master]
clean_data = [x for x in new_list if not np.isnan(x)]

In [None]:
len(clean_data)

In [None]:
from collections import defaultdict
num_bins = 50

# Step 1: Get bin edges exactly like Plotly's histogram with nbinsx=50
bins = np.histogram_bin_edges(clean_data, bins=num_bins)

# Step 2: Assign each data point to a bin
bin_indices = np.digitize(clean_data, bins)

# Step 3: Group points by bin index
points_in_bins = defaultdict(list)
for point, bidx in zip(clean_data, bin_indices):
    # Handle the edge case where a point equals the last bin edge
    if bidx == len(bins):
        bidx -= 1
    points_in_bins[bidx].append(point)

# Step 4: Print points count and bin ranges
for bidx in range(1, len(bins)):
    bin_range = (bins[bidx - 1], bins[bidx])
    count = len(points_in_bins[bidx])
    print(f"Bin {bidx}: Range {bin_range}, Count: {count}")

In [None]:
print(f"Bin 16 range: {bins[16]} to {bins[19]}")
print("Points in bin 16:", points_in_bins[16])
print("Points in bin 19:", points_in_bins[19])
'''
Since there are some that are classified as
growth in ecocyc it wouldn't be right to establish a
new threshold so allowing current bounds to remain
as is.
'''

In [None]:
values = [v if v is not None else np.nan for v in master]
import plotly.graph_objects as go

# Create histogram
hist = go.Histogram(
    x=new_list,
    nbinsx=50,
    marker_color='lightblue',
    name="Distribution"
)

# Highlight the negative control
vline = go.Scatter(
    x=[42321.62630251915, 42321.62630251915], # TODO (Abby): find out the actual value
    y=[0, 15],
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Neg Control"],
    textposition="bottom center",
    showlegend=False
)

# Highlight the basal value
vline2 = go.Scatter(
    x=[master_basal, master_basal],
    y=[0, 20],
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Basal"],
    textposition="bottom center",
    showlegend=False
)

# Combine and plot
fig = go.Figure(data=[hist, vline, vline2])
fig.add_vrect(x0=38000, x1=43500, line_width=0, fillcolor="LightSalmon", opacity=0.1, annotation_text='No Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=12000, x1=26000, line_width=0, fillcolor="aqua", opacity=0.1, annotation_text='Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=26500, x1=37500, line_width=0, fillcolor="yellow", opacity=0.1, annotation_text='Slow Growth', annotation_position="top", annotation_font_size = 16, layer="below")

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Histogram (Three New Arrays) with First Value Highlighted",
    xaxis_title="Optimal Objective Function Value",
    yaxis_title="Count",
    bargap=0.05
)
fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/oofv_histogram_cp3.png", scale=5, width=1000, height=500)

In [None]:
hist.nbinsx

In [None]:
oofv4_val = list(cp4_oofv.values())
oofv4_basal, _, _, _, _, _ = test_NetworkFlowModel(objective_weights)

In [None]:
#data for PM3 (of the nitrogen data)
oofv5_val = list(cp5_oofv.values())
oofv5_basal, _, _, _, _, _ = test_NetworkFlowModel(objective_weights)

In [None]:
values = [v if v is not None else np.nan for v in oofv5_val]
import plotly.graph_objects as go

# Create histogram
hist = go.Histogram(
    x=values,
    nbinsx=50,
    marker_color='lightblue',
    name="Distribution"
)

# Highlight the negative control
vline = go.Scatter(
    x=[42321.62630251915, 42321.62630251915], # TODO (Abby): find out the actual value
    y=[0, 15],
    mode="lines+text",
    line=dict(color="red", width=3, dash="dash"),
    textfont=dict(color="white"),
    text=["Neg Control"],
    textposition="bottom center",
    showlegend=False
)

# Highlight the basal value
vline2 = go.Scatter(
    x=[oofv5_basal, oofv5_basal],
    y=[0, 20],
    mode="lines+text",
    line=dict(color="red", width=3, dash="dash"),
    text=["Basal"],
    textfont=dict(color="white"),
    textposition="bottom center",
    showlegend=False
)

# Combine and plot
fig = go.Figure(data=[hist, vline, vline2])
fig.add_vrect(x0=38000, x1=43500, line_width=0, fillcolor="LightSalmon", opacity=0.1, annotation_text='No Growth', annotation_position="top", annotation_font_size = 16, layer="below", annotation_font_color="white")
fig.add_vrect(x0=12000, x1=26000, line_width=0, fillcolor="aqua", opacity=0.1, annotation_text='Growth', annotation_position="top", annotation_font_size = 16, layer="below", annotation_font_color="white")
fig.add_vrect(x0=26500, x1=37500, line_width=0, fillcolor="yellow", opacity=0.1, annotation_text='Slow Growth', annotation_position="top", annotation_font_size = 16, layer="below", annotation_font_color="white")

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Histogram (Nitrogen Array) with First Value Highlighted",
    xaxis_title="Optimal Objective Function Value",
    yaxis_title="Count",
    font_color="black",
    bargap=0.05
)
fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/oofv_histogram_cp3.png", scale=5, width=1000, height=500)

In [None]:
#data for PM4 (of the phosphate and sulfur data)
oofv6_val = list(cp6_oofv.values())
oofv6_basal, _, _, _, _, _ = test_NetworkFlowModel(objective_weights)

In [None]:
values = [v if v is not None else np.nan for v in oofv6_val]
import plotly.graph_objects as go

# Create histogram
hist = go.Histogram(
    x=values,
    nbinsx=50,
    marker_color='lightblue',
    name="Distribution"
)

# Highlight the negative control
vline = go.Scatter(
    x=[42321.62630251915, 42321.62630251915], # TODO (Abby): find out the actual value
    y=[0, 15],
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Neg Control"],
    textposition="bottom center",
    showlegend=False
)

# Highlight the basal value
vline2 = go.Scatter(
    x=[oofv6_basal, oofv6_basal],
    y=[0, 20],
    mode="lines+text",
    line=dict(color="Navy", width=3, dash="dash"),
    text=["Basal"],
    textposition="bottom center",
    showlegend=False
)

# Combine and plot
fig = go.Figure(data=[hist, vline, vline2])
fig.add_vrect(x0=38000, x1=43500, line_width=0, fillcolor="LightSalmon", opacity=0.1, annotation_text='No Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=12000, x1=26000, line_width=0, fillcolor="aqua", opacity=0.1, annotation_text='Growth', annotation_position="top", annotation_font_size = 16, layer="below")
fig.add_vrect(x0=26500, x1=37500, line_width=0, fillcolor="yellow", opacity=0.1, annotation_text='Slow Growth', annotation_position="top", annotation_font_size = 16, layer="below")

fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Histogram (Phosphate/Sulfur Array) with First Value Highlighted",
    xaxis_title="Optimal Objective Function Value",
    yaxis_title="Count",
    font_color="black",
    bargap=0.05
)
fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/oofv_histogram_cp3.png", scale=5, width=1000, height=500)

In [None]:
hist

# Plot 2: Heat Map

In [None]:
def discrete_colorscale(bvals, colors):
    """
    bvals - list of values bounding intervals/ranges of interest
    colors - list of rgb or hex colorcodes for values in [bvals[k], bvals[k+1]],0<=k < len(bvals)-1
    returns the plotly  discrete colorscale
    """
    if len(bvals) != len(colors)+1:
        raise ValueError('len(boundary values) should be equal to  len(colors)+1')
    bvals = sorted(bvals)
    nvals = [(v-bvals[0])/(bvals[-1]-bvals[0]) for v in bvals]  #normalized values

    dcolorscale = [] #discrete colorscale
    for k in range(len(colors)):
        dcolorscale.extend([[nvals[k], colors[k]], [nvals[k+1], colors[k]]])
    return dcolorscale

In [None]:
growth_bound = 25500
slow_growth_bound = 38000
no_growth_bound = 43500
lowest_bound = 12000
"""
Definition of growth, slow growth,
and no growth should be consistently
applied to all C-source Microarray
"""
bvals = [lowest_bound, growth_bound, slow_growth_bound, no_growth_bound]
colors = ['#dda0dd', '#ffb6c2', '#fcf3bf']
dcolorsc = discrete_colorscale(bvals, colors)
dcolorsc

In [None]:
bvals = np.array(bvals)
tickvals = [np.mean(bvals[k:k+2]) for k in range(len(bvals)-1)] #position with respect to bvals where ticktext is displayed
ticktext = [f'<{bvals[1]}'] + [f'{bvals[k]}-{bvals[k+1]}' for k in range(1, len(bvals)-2)]+[f'>{bvals[-2]}']
tickvals

In [None]:
text = "hellowwww"
a = text.split('w')
'<br>'.join(a)

In [None]:
# Plate dimensions
rows = list("ABCDEFGH")
cols = list(range(1, 13))

#for nitrogen array
# Create a 2D array with None as default
matrix_nit = []
label = []
for r in rows:
    row_data = []
    row_text = []
    for c in cols:
        key = f"{r}{c}"
        row_data.append(cp5_oofv.get(key))  # Fill missing wells with None
        text = plateID_to_condition_nit.get(key)
        if text is None:
            row_text.append("")  # or 'Missing' or key itself
        else:
            text_segment = text.split(' ')
            text = '<br>'.join(text_segment)
            row_text.append(text)
    matrix_nit.append(row_data)
    label.append(row_text)

In [None]:
nit_matrix = pd.DataFrame(matrix_nit)
def assign_bin(val):
    if pd.isna(val):
        return np.nan
    elif val < lowest_bound:
        return 0
    elif val < growth_bound: #purple
        return 1
    elif val < slow_growth_bound: #pink
        return 2
    elif val < no_growth_bound: #yellow
        return 3
    else:
        return 4
df_binned_nit = nit_matrix.map(assign_bin)


In [None]:
info_nit = np.array([
    [3.0, 1,   3, 3, 3, 3, 1.0, 1, 1, 1.0, np.nan, np.nan],
    [1, 1,   3, 3, 3, np.nan, np.nan, np.nan, 1, 1, np.nan, 1],
    [3.0, 3,   1, np.nan, 3, 3, np.nan, 1, np.nan, 3.0, np.nan, 1],
    [3.0, 3,   3, 3, 3, 3, 3.0, 3, 3, 3.0, np.nan, np.nan],
    [3.0, 3,   3, 3, 3, 1, 3.0, 1, 3, np.nan, 1.0, 3],
    [np.nan, np.nan, 1, 1, 1, 3, np.nan, 3, 3, np.nan, np.nan, 3],
    [np.nan, np.nan, np.nan, np.nan, 3, 3, np.nan, np.nan, 3, np.nan, 1.0, 3],
    [1.0, 1,   1, 1, 1, 1, 1.0, 1, 1, 1.0, 1.0, 1]
])
df_nitrogen_sole_OFFICIAL = pd.DataFrame(info_nit)


In [None]:
y_true = df_nitrogen_sole_OFFICIAL.to_numpy().flatten()
y_pred = df_binned_nit.to_numpy().flatten()

# Mask to ignore NaNs in true labels
mask = ~np.isnan(y_true)

# Overall accuracy ignoring NaNs
accuracy = (y_true[mask] == y_pred[mask]).mean()
print(f"Overall accuracy (ignoring NaNs): {accuracy:.2%}")

# --- Accuracy per category ---

# Find unique categories ignoring NaN
categories = np.unique(y_true[mask])

for cat in categories:
    # Mask for current category (only valid indices without NaNs)
    cat_mask = (y_true == cat) & mask
    cat_accuracy = (y_true[cat_mask] == y_pred[cat_mask]).mean()
    print(f"Accuracy for category {cat}: {cat_accuracy:.2%}")

In [None]:
# Flatten both DataFrames to 1D arrays
y_true = df_nitrogen_sole_OFFICIAL.to_numpy().flatten()
y_pred = df_binned_nit.to_numpy().flatten()

mask = ~np.isnan(y_true)

accuracy = (y_true[mask] == y_pred[mask]).mean()

print(f"Accuracy without considering the NaN in our model (aka not in the model): {accuracy:.2%}")


In [None]:
# Create mask that is True only where BOTH are not NaN
mask = ~np.isnan(y_true) & ~np.isnan(y_pred)

# Filter both arrays to remove NaNs
y_true_clean = y_true[mask]
y_pred_clean = y_pred[mask]

accuracy = (y_true_clean == y_pred_clean).mean()

print(f"Accuracy without NaNs from either: {accuracy:.2%}")

In [None]:
import plotly.graph_objects as go
heatmap = go.Heatmap(z=matrix_nit,
                     x=[str(c) for c in cols],
                     y=rows,
                     text=label,
                     texttemplate="%{text}",
                     textfont={"size": 10},
                     colorscale = dcolorsc,
                     zmin = lowest_bound,
                     zmax = 43000,
                     colorbar = dict(thickness=25,
                                     tickvals=tickvals,
                                     ticktext=ticktext),
                     )

fig = go.Figure(data=[heatmap])
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Plate Reader Nitrogen Heatmap with Values",
    font_color="black",
    xaxis_title="Column",
    yaxis_title="Row",
    yaxis_autorange='reversed'
)

fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/heatmap_microarray1_C_source.png", scale=5, width=1200, height=800)

In [None]:
# Plate dimensions
rows = list("ABCDEFGH")
cols = list(range(1, 13))

#for phosphate/sulfur array
# Create a 2D array with None as default
matrix_phos = []
label = []
for r in rows:
    row_data = []
    row_text = []
    for c in cols:
        key = f"{r}{c}"
        row_data.append(cp6_oofv.get(key))  # Fill missing wells with None
        text = plateID_to_condition_sulfur_phos.get(key)
        if text is None:
            row_text.append("")  # or 'Missing' or key itself
        else:
            text_segment = text.split(' ')
            text = '<br>'.join(text_segment)
            row_text.append(text)
    matrix_phos.append(row_data)
    label.append(row_text)

In [None]:
phos_matrix = pd.DataFrame(matrix_phos)

In [None]:
def assign_bin(val):
    if pd.isna(val):
        return np.nan
    elif val < lowest_bound:
        return 0
    elif val < growth_bound: #purple
        return 1
    elif val < slow_growth_bound: #pink
        return 2
    elif val < no_growth_bound: #yellow
        return 3
    else:
        return 4
df_binned_phos = phos_matrix.map(assign_bin)


In [None]:
info_phos = np.array([
    [3, 1,   1, 1, 1, 3, 3, 1, 1, 1, 1, 1],
    [1, 1,   1, 1, 1, 1, 1, 1, 1, 1, 1, 3],
    [1, 1,   1, 1, 1, 1, 1, 1, 1, 1, 1, np.nan],
    [1, 1,   1, 1, 1, 1, 1, 1, 1, 1, 1, 3],
    [1, 1,   1, 1, 1, np.nan, np.nan, np.nan, 1, 1, 1, 3],
    [3, 1,   1, 1, 1, 1, 1, 1, 1, np.nan, np.nan, 1],
    [3, 1,   1, 1, 1, np.nan, 1, 1, 1, 1, 1, 3],
    [3, np.nan, np.nan, np.nan, np.nan, 1, 1, 3, 1, 1, 1, 3],
])
df_phos_sul_sole_OFFICIAL = pd.DataFrame(info_phos)


In [None]:
y_true = df_phos_sul_sole_OFFICIAL.to_numpy().flatten()
y_pred = df_binned_phos.to_numpy().flatten()

# Mask to ignore NaNs in true labels
mask = ~np.isnan(y_true)

# Overall accuracy ignoring NaNs
accuracy = (y_true[mask] == y_pred[mask]).mean()
print(f"Overall accuracy (ignoring NaNs): {accuracy:.2%}")

# --- Accuracy per category ---

# Find unique categories ignoring NaN
categories = np.unique(y_true[mask])

for cat in categories:
    # Mask for current category (only valid indices without NaNs)
    cat_mask = (y_true == cat) & mask
    cat_accuracy = (y_true[cat_mask] == y_pred[cat_mask]).mean()
    print(f"Accuracy for category {cat}: {cat_accuracy:.2%}")

In [None]:
# Flatten both
y_true = df_phos_sul_sole_OFFICIAL.to_numpy().flatten()
y_pred = df_binned_phos.to_numpy().flatten()

mask = ~np.isnan(y_true)

accuracy = (y_true[mask] == y_pred[mask]).mean()

print(f"Accuracy without considering the NaN in our model (aka not in the model): {accuracy:.2%}")

In [None]:
# Create mask that is True only where BOTH are not NaN
mask = ~np.isnan(y_true) & ~np.isnan(y_pred)

# Filter both arrays to remove NaNs
y_true_clean = y_true[mask]
y_pred_clean = y_pred[mask]

accuracy = (y_true_clean == y_pred_clean).mean()

print(f"Accuracy without NaNs from either: {accuracy:.2%}")

In [None]:
import plotly.graph_objects as go
heatmap = go.Heatmap(z=matrix_phos,
                     x=[str(c) for c in cols],
                     y=rows,
                     text=label,
                     texttemplate="%{text}",
                     textfont={"size": 10},
                     colorscale = dcolorsc,
                     zmin = lowest_bound,
                     zmax = 43000,
                     colorbar = dict(thickness=25,
                                     tickvals=tickvals,
                                     ticktext=ticktext),
                     )

fig = go.Figure(data=[heatmap])
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    title="Plate Reader Phosphate and Sulfur Heatmap with Values",
    xaxis_title="Column",
    yaxis_title="Row",
    yaxis_autorange='reversed'
)

fig.show(renderer='browser')
# fig.write_image("notebooks/Heena notebooks/Metabolism_New Genes/out/heatmap_microarray1_C_source.png", scale=5, width=1200, height=800)

# Plot with kinetic scatter

In [None]:
kc_target_cp4 = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()
cols = conditions_all[6:] + ['is_new', 'kinetic']
df_kc_sim_cp4 = df_all.loc[kinetic_reaction_ids, cols]

In [None]:
df_kc_sim_cp4_log = df_kc_sim_cp4.copy()
df_kc_sim_cp4_log['kinetic'] = df_kc_sim_cp4_log['kinetic'].apply(pd.to_numeric, errors='coerce')
cols = conditions_all[6:] + [ 'kinetic']
df_kc_sim_cp4_log.loc[:,cols] += 1e-6
df_kc_sim_cp4_log.loc[:,cols] = np.log10(df_kc_sim_cp4_log.loc[:,cols])
df_kc_sim_cp4_log

In [None]:
# work with df_kc_cp1 or df_kc_cp1_log
# create a dataframe with rows as reactions, col1 as flux closest to target, col2 being condition(s) that have that flux
df_kc_cp3_log_combined = pd.DataFrame({'flux': np.nan, 'condition': 'none', 'target': df_kc_sim_cp4_log['kinetic'], 'is_new': df_kc_sim_cp4_log['is_new']}, index=kinetic_reaction_ids)
condition_names = conditions_all[6:]
# create temporary dataframe that tracks the difference between sim flux and target
temp = df_kc_sim_cp4_log.copy()
temp.replace(0, np.nan, inplace=True)
for condition in condition_names:
    temp.loc[:, condition] = np.abs(temp.loc[:, condition] - df_kc_sim_cp4_log['kinetic'].values)

# parse by reaction (index) to find the conditions with flux closest to target (minimal difference)
for reaction in temp.index:
    # get the condition with the minimum difference
    min_diff = temp.loc[reaction, condition_names].min()
    # get the condition(s) with the minimum difference
    conditions = temp.loc[reaction, condition_names][temp.loc[reaction, condition_names] == min_diff].index.tolist()
    # update the dataframe
    if np.all(df_kc_sim_cp4_log.loc[reaction,condition_names] == -6):
        df_kc_cp4_log_combined.loc[reaction, 'flux'] = -6
    else:
        df_kc_cp4_log_combined.loc[reaction, 'flux'] = df_kc_sim_cp4_log.loc[reaction,conditions[0]]
        if conditions == condition_names:
            df_kc_cp4_log_combined.loc[reaction, 'condition'] = 'all'
        else:
            df_kc_cp4_log_combined.loc[reaction, 'condition'] = ', '.join(conditions)
#
# df_kc_cp1_log_combined = df_kc_cp1_combined.copy()
# df_kc_cp1_log_combined['target'] = df_kc_cp1_log_combined['target'].apply(pd.to_numeric, errors='coerce')
# df_kc_cp1_log_combined.loc[:,['flux','target']] += 1e-6
# df_kc_cp1_log_combined.loc[:,['flux','target']] = np.log10(df_kc_cp1_log_combined.loc[:,['flux','target']])
#
df_kc_cp4_log_combined.to_csv('notebooks/Heena notebooks/Metabolism_New Genes/df_kc_cp3_combined.csv', index=True)

In [None]:
# plotly figure
import plotly.express as px
fig = px.scatter(
    df_kc_cp4_log_combined,
    x='target',
    y='flux',
    color='is_new',
    hover_name=df_kc_cp3_log_combined.index,
    hover_data=['condition'],
    labels={'flux': 'Simulated Flux (log10)', 'target': 'Target Flux (log10)'},
    title='Kinetic Simulated Flux vs Target Flux at Checkpoint 3',
)
fig.update_traces(marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers'))
fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    xaxis_title="Target Flux (log10)",
    yaxis_title="Simulated Flux (log10)",
    legend_title_text='New Reactions',
    width=800,
    height=600
)
fig.show(renderer='browser')

# Plot 3: Create heatmap of fluxes going through the genes

In [None]:
%store -r genes_to_enzymes

In [None]:
def get_keys(dict, value):
    return [key for key in dict if dict[key] == value]

In [None]:
reaction_catalysts = metabolism.parameters["reaction_catalysts"]
conditions = df_all.columns[:-2]

heatmap_gene_flux = pd.DataFrame(index = df_all_gene_usage.index, columns = conditions, data=0)

# len(reaction_names)
# basal_new_reaction_usage = df_reaction_track.loc[fba_new_reaction_ids,'basal']
for condition in conditions:
    for rxn in reaction_names:
        enzymes = reaction_catalysts.get(rxn,[])
        for enz in enzymes:
            enz = enz[:-3]
            genes = get_keys(genes_to_enzymes, enz)
            for gene in genes:
                heatmap_gene_flux.loc[gene, condition] += float(df_all.loc[rxn, condition])

                # heatmap_gene_flux.loc[gene, condition] += df_all.loc[rxn, condition]

In [None]:
# normalize the fluxes by row - condition
heatmap_gene_flux_normalized = heatmap_gene_flux.copy()
heatmap_gene_flux_normalized = heatmap_gene_flux_normalized.div(heatmap_gene_flux_normalized.max(axis=1), axis=0)
heatmap_gene_flux_normalized.fillna(-1, inplace=True)
heatmap_gene_flux_normalized

In [None]:
heatmap_new_gene_flux_normalized = heatmap_gene_flux_normalized.loc[new_genes,:]

In [None]:
from plotly import graph_objects as go
data = heatmap_new_gene_flux_normalized

# Convert to numpy array for masking
z = data.values

# Create custom colorscale: map 0 to grey, and nonzero values to white→red
custom_colorscale = [
    [0.0, 'rgb(211,211,211)'],  # grey for zeros
    [1e-8, 'rgb(255,255,255)'], # white just above zero
    [1.0, '#b53131']   # soft red for max
]

# Mask 0s slightly to ensure they stay grey

fig = go.Figure(data=go.Heatmap(
    z=z,
    x=data.columns,
    y=data.index,
    colorscale=custom_colorscale,
    colorbar=dict(title='Normalized Flux'),
    zmin=0,
    zmax=1,  # ensures white → red applies to nonzeros
    hoverongaps=False
))

fig.update_layout(
    title='Gene Flux Heatmap (Normalized)',
    xaxis_title='Condition',
    yaxis_title='Gene ID',
    height=1000,
)

# fig.show(renderer='browser')
fig.write_html('notebooks/Heena notebooks/Metabolism_New Genes/gene_usage_bt_condition_heatmap_cp2.html', include_plotlyjs='cdn')

In [None]:
%store df_all_gene_usage_cp2