#### Riley's new gene project has shown that when a GFP gene is induced in the e.coli, there is a consistent shift of proteome fraction of essential proteins. i.e. most essential proteins have like 10% smaller fraction compared to wild type e.coli. The purpose of this notebook is to identify any connection between the decrease in expression of these essential proteins and potential metabolic defect. For this purpose, we will 1). import the complex ids of essential proteins that have decreased proteome fraction, 2). identify the kinetic reactions catalyzed by some of these complexes, 3). perform FBA to see if there is any metabolic defect when these reactions are knocked downed (80% for instance).

#### Since nothing was observed when knocking down by target, now we will try forcing the flux through these reactions to be at most 80% of WT fluxes via constraints.

In [1]:
import pandas as pd
import os, dill
import numpy as np
import cvxpy as cp
from typing import Iterable, Optional, Mapping, cast
from plotly.graph_objects import Scatter, Figure

from ecoli.processes.metabolism_redux_classic import NetworkFlowModel, FlowResult
os.chdir(os.path.expanduser('~/dev/vEcoli'))

%load_ext autoreload

In [2]:
# import complex id of interest from txt
complex_ids_txt = pd.read_csv('notebooks/Heena notebooks/Riley New Genes/proteome_fraction_scatterplot_below_line_essential_complex_ids_16.txt', header=None)
complex_ids = np.unique(complex_ids_txt[0].tolist())

# import kinetic reaction info from sim result
time = '600'
date = '2025-11-30'
experiment = 'output_objective_weights'
condition = 'basal'
entry = f'{experiment}_{time}_{date}'
folder = f'out/objective_weight/{condition}/{entry}/'

output = np.load(folder + '0_output.npy',allow_pickle='TRUE').item()
output = output['agents']['0']
fba = output['listeners']['fba_results']
bulk = pd.DataFrame(output['bulk'])
f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

In [3]:
# get commonly stored variables
metabolism = agent['ecoli-metabolism-redux-classic']
stoichiometry = metabolism.stoichiometry.copy()
reaction_names = metabolism.reaction_names
fba_new_reaction_ids = metabolism.parameters["fba_new_reaction_ids"]
fba_reaction_ids_to_base_reaction_ids = metabolism.parameters['fba_reaction_ids_to_base_reaction_ids']
metabolites = metabolism.metabolite_names.copy()
binary_kinetic_idx = metabolism.binary_kinetic_idx
exchange_molecules = metabolism.exchange_molecules
kinetic_enzymes = metabolism.parameters['kinetic_constraint_enzymes']

S = stoichiometry .copy()
S = pd.DataFrame(S, index=metabolites , columns=reaction_names )
homeostatic_count = pd.DataFrame(fba["homeostatic_metabolite_counts"], columns=metabolism.homeostatic_metabolites).mean(axis=0)
homeostatic = pd.DataFrame(fba["target_homeostatic_dmdt"], columns=metabolism.homeostatic_metabolites).mean(axis=0)
maintenance = pd.DataFrame(fba["maintenance_target"][1:], columns=['maintenance_reaction']).mean(axis=0)
kinetic = pd.DataFrame(fba["target_kinetic_fluxes"], columns=metabolism.kinetic_constraint_reactions).mean(axis=0).copy()

kinetic_reaction_ids = metabolism.kinetic_constraint_reactions
allowed_exchange_uptake = metabolism.allowed_exchange_uptake
FREE_RXNS = ["TRANS-RXN-145", "TRANS-RXN0-545", "TRANS-RXN0-474",
             "ATPSYN-RXN (reverse)" # TODO: Riley added this one
             ]
# TODO: load in the above from script directly to avoid mismatches

In [4]:
# get overlap between kinetic catalyst and complexes of interest
kinetic_catalyst_overlapped = np.intersect1d(kinetic_enzymes, complex_ids)
print(f'There are {len(kinetic_catalyst_overlapped)} complexes of interest that are also kinetic catalysts.')

There are 54 complexes of interest that are also kinetic catalysts.


In [5]:
# link them to kinetic reactions
kinetic_reaction_catalysts = {key:metabolism.parameters['reaction_catalysts'].get(key) for key in kinetic_reaction_ids}
kinetic_reaction_catalysts_essential = {key:value for key, value in kinetic_reaction_catalysts.items() if np.any(np.isin(value, kinetic_catalyst_overlapped))}

## Test FBA

In [6]:
kinetic_reaction_id_essential = kinetic_reaction_catalysts_essential.keys()
kinetic_target_essential = kinetic.loc[kinetic_reaction_id_essential]

# All average reaction fluxes from short vEcoli sim
sim = pd.DataFrame(fba["estimated_fluxes"], columns= reaction_names).mean(axis=0).copy()

# Get baseline WT fluxes for kinetic reactions
kinetic_reaction_idx = np.array([reaction_names.index(id) for id in kinetic_reaction_ids])
WT_fluxes = sim.iloc[kinetic_reaction_idx]
kinetic_constraint_dict = dict(zip(kinetic_reaction_idx,WT_fluxes))

# Kinetic reaction target fluxes
kc_target = pd.DataFrame(fba["target_kinetic_fluxes"], columns= kinetic_reaction_ids).mean(axis=0).copy()

In [7]:
fba.keys()

dict_keys(['solution_fluxes', 'solution_dmdt', 'solution_residuals', 'time_per_step', 'estimated_fluxes', 'estimated_homeostatic_dmdt', 'homeostatic_metabolite_counts', 'target_homeostatic_dmdt', 'estimated_exchange_dmdt', 'estimated_intermediate_dmdt', 'target_kinetic_fluxes', 'target_kinetic_bounds', 'reaction_catalyst_counts', 'homeostatic_term', 'secretion_term', 'efficiency_term', 'kinetic_term', 'maintenance_target', 'loss_total', 'loss_kinetic', 'loss_homeostatic', 'loss_secretion', 'loss_efficiency', 'loss_diversity'])

In [8]:
# Create row from input data for comparison
input_row = {
    "overall_loss": np.array(fba['loss_total']).mean(),
    "kinetic_loss": np.array(fba['loss_kinetic']).mean(),
    "homeostatic_loss": np.array(fba['loss_homeostatic']).mean(),
    "overall_objective": None,
    "kinetic_counts_unweighted": None,
    "homeostatic_counts": None,
    "description": "Input data from short vEcoli sim"
}

In [9]:
# Plot total loaa, kinetic loss, homeostatic loss over time
import plotly.express as px
loss_df = pd.DataFrame({
    "time": np.arange(len(fba['loss_total'])),
    "total_loss": fba['loss_total'],
    "kinetic_loss": fba['loss_kinetic'],
    "homeostatic_loss": fba['loss_homeostatic']
})
loss_df_melted = loss_df.melt(id_vars=['time'], value_vars=['total_loss', 'kinetic_loss', 'homeostatic_loss'], var_name='loss_type', value_name='loss_value')
fig = px.line(loss_df_melted, x='time', y='loss_value', color='loss_type', title='Losses over time')
fig.show()

In [10]:
def get_subset_S(S, met_of_interest):
    S_met = S.loc[met_of_interest, :]
    S_met = S_met.loc[:,~np.all(S_met == 0, axis=0)]
    return S_met, S_met.columns

def get_keys(dict, value):
    return [key for key in dict if np.any(np.isin(value, dict[key]))]

def test_NetworkFlowModel(
            objective_weights,
            kinetic_targets = pd.DataFrame(fba["target_kinetic_fluxes"],
                                           columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy(),
            uptake_addition = set([]), uptake_removal = set([]), new_exchange_molecules = set([]),
            add_metabolite = None, add_reaction = None, add_kinetic = None, remove_reaction = None, force_reaction = None,
            add_homeostatic_demand = None,
            kinetic_constraint = None, kinetic_adjustment = None,
            solver_choice=cp.GLOP):
    # update exchanges
    uptake = metabolism.allowed_exchange_uptake.copy()
    uptake = set(uptake)
    uptake = uptake | uptake_addition
    uptake = uptake - uptake_removal

    exchange_molecules = metabolism.exchange_molecules.copy()
    exchange_molecules = exchange_molecules | new_exchange_molecules

    # update stoichiometry
    reaction_names = metabolism.reaction_names.copy()
    kinetic_reaction_ids = metabolism.kinetic_constraint_reactions.copy()
    kinetic = kinetic_targets
    metabolites = metabolism.metabolite_names.copy()
    homeostatic_counts = homeostatic_count.copy() * metabolism.counts_to_molar.asNumber()

    S_new = stoichiometry.copy()

    if add_metabolite is not None: # add to metabolites list because they are currently not included in the model
        for m in add_metabolite:
            if m not in metabolites:
                metabolites.append(m)
        # append rows of zeros to S_new of length add_metabolite
        S_new = np.concatenate((S_new, np.zeros((len(add_metabolite), S_new.shape[1]))), axis=0)

    if add_reaction is not None:
        # assert add_reaction is a dictionary
        assert isinstance(add_reaction, dict)

        for r,s in add_reaction.items():
            if r not in reaction_names:
                reaction_names.append(r)
            # append columns of reaction stoich to S_new of length add_reaction
            new_reaction = np.zeros((S_new.shape[0], 1))
            for m, v in s.items():
                new_reaction[metabolites.index(m), 0] = v
            S_new = np.concatenate((S_new, new_reaction), axis=1)

    if add_kinetic is not None:
        # assert add_kinetic is a dictionary
        assert isinstance(add_kinetic, dict)

        for r, v in add_kinetic.items():
            if r not in kinetic_reaction_ids:
                kinetic_reaction_ids.append(r)
                kinetic[r] = v
            if r in kinetic_reaction_ids:
                kinetic[r] = v

    if remove_reaction is not None:
        for r in remove_reaction:
            r_idx = reaction_names.index(r)
            S_new = np.delete(S_new, r_idx, axis=1)
            reaction_names.remove(r)
            if r in kinetic_reaction_ids:
                kinetic_reaction_ids.remove(r)
                del kinetic[r]

    if force_reaction is not None:
        force_reaction_idx = np.array([reaction_names.index(r) for r in force_reaction])
    else:
        force_reaction_idx = force_reaction

    if add_homeostatic_demand is not None:
        # assert add_homeostatic_demand is a set
        assert isinstance(add_homeostatic_demand, list)

        for met in add_homeostatic_demand:
            homeostatic[met] = 100
            homeostatic_counts[met] = 1

    # Solve NetworkFlowModel
    model = NetworkFlowModel(
            stoich_arr=S_new,
            metabolites=metabolites,
            reactions=reaction_names,
            homeostatic_metabolites=metabolism.homeostatic_metabolites,
            kinetic_reactions=kinetic_reaction_ids,
            free_reactions=FREE_RXNS)
    model.set_up_exchanges(exchanges=exchange_molecules, uptakes=uptake)
    solution: FlowResult = model.solve(
            homeostatic_concs=homeostatic_counts, # in conc
            homeostatic_dm_targets=np.array(list(dict(homeostatic).values())), # *10^7
            maintenance_target=maintenance, # *10^6 ish
            kinetic_targets=np.array(list(dict(kinetic).values())), # *10^6 ish
            # binary_kinetic_idx=binary_kinetic_idx, #7646
            binary_kinetic_idx=None,
            force_flow_idx=force_reaction_idx,
            objective_weights=objective_weights, #same
            upper_flux_bound= 1000000000, # increase to 10^9 because notebook runs FlowResult using Counts, WC runs using conc.,
            kinetic_constraint=kinetic_constraint,
            kinetic_adjustment=kinetic_adjustment,
            solver=solver_choice) #SCS. ECOS, MOSEK
    return solution

In [11]:
obj_by_perturbation = {}

In [12]:
# Normal FBA
objective_weights = {'secretion': 0.01, 'efficiency': 1e-6, 'kinetics': 1e-5, 'homeostatic': 1}
normal_solution = (
    test_NetworkFlowModel(
        objective_weights))
loss_overall = normal_solution.loss_total
loss_kinetic = normal_solution.loss_kinetic
loss_homeostatic = normal_solution.loss_homeostatic
oofv = normal_solution.objective
kinetic_soln = normal_solution.kinetic_term
homeostatic_soln = normal_solution.homeostatic_term
obj_by_perturbation['normal_fba'] = {
    "overall_loss": loss_overall,
    "kinetic_loss": loss_kinetic,
    "homeostatic_loss": loss_homeostatic,
    "overall_objective": oofv,
    "kinetic_counts_unweighted": kinetic_soln,
    "homeostatic_counts": homeostatic_soln,
    "description": "Normal FBA"}

In [13]:
# FBA with 80% kinetic constraint on all kinetic reactions
objective_weights = {'secretion': 0.01, 'efficiency': 1e-6, 'kinetics': 1e-5, 'homeostatic': 1}
solution_80pct_all_kinetic = (
    test_NetworkFlowModel(
        objective_weights,
        kinetic_constraint=kinetic_constraint_dict,
        kinetic_adjustment=0.8
    ))
loss_overall = solution_80pct_all_kinetic.loss_total
loss_kinetic = solution_80pct_all_kinetic.loss_kinetic
loss_homeostatic = solution_80pct_all_kinetic.loss_homeostatic
oofv = solution_80pct_all_kinetic.objective
kinetic_soln = solution_80pct_all_kinetic.kinetic_term
homeostatic_soln = solution_80pct_all_kinetic.homeostatic_term
obj_by_perturbation['fba_80pct_all_kinetic'] = {
    "overall_loss": loss_overall,
    "kinetic_loss": loss_kinetic,
    "homeostatic_loss": loss_homeostatic,
    "overall_objective": oofv,
    "kinetic_counts_unweighted": kinetic_soln,
    "homeostatic_counts": homeostatic_soln,
    "description": "FBA with 80% kinetic constraint on all kinetic reactions"}

In [14]:
# FBA with 50% kinetic constraint on all kinetic reactions
objective_weights = {'secretion': 0.01, 'efficiency': 1e-6, 'kinetics': 1e-5, 'homeostatic': 1}
solution_50pct_all_kinetic = (
    test_NetworkFlowModel(
        objective_weights,
        kinetic_constraint=kinetic_constraint_dict,
        kinetic_adjustment=0.5
    ))
loss_overall = solution_50pct_all_kinetic.loss_total
loss_kinetic = solution_50pct_all_kinetic.loss_kinetic
loss_homeostatic = solution_50pct_all_kinetic.loss_homeostatic
oofv = solution_50pct_all_kinetic.objective
kinetic_soln = solution_50pct_all_kinetic.kinetic_term
homeostatic_soln = solution_50pct_all_kinetic.homeostatic_term
obj_by_perturbation['fba_50pct_all_kinetic'] = {
    "overall_loss": loss_overall,
    "kinetic_loss": loss_kinetic,
    "homeostatic_loss": loss_homeostatic,
    "overall_objective": oofv,
    "kinetic_counts_unweighted": kinetic_soln,
    "homeostatic_counts": homeostatic_soln,
    "description": "FBA with 50% kinetic constraint on all kinetic reactions"}


In [15]:
# FBA with 80% kinetic constraint on all kinetic reactions and reduction in
# kinetic targets
objective_weights = {'secretion': 0.01, 'efficiency': 1e-6, 'kinetics': 1e-5, 'homeostatic': 1}
kinetic_targets_80pct = pd.DataFrame(
    fba["target_kinetic_fluxes"],
    columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy() * 0.80
solution_80pct_all_kinetic_reduced_targets = (
    test_NetworkFlowModel(
        objective_weights,
        kinetic_targets=kinetic_targets_80pct,
        kinetic_constraint=kinetic_constraint_dict,
        kinetic_adjustment=0.8
    ))
loss_overall = solution_80pct_all_kinetic_reduced_targets.loss_total
loss_kinetic = solution_80pct_all_kinetic_reduced_targets.loss_kinetic
loss_homeostatic = solution_80pct_all_kinetic_reduced_targets.loss_homeostatic
oofv = solution_80pct_all_kinetic_reduced_targets.objective
kinetic_soln = solution_80pct_all_kinetic_reduced_targets.kinetic_term
homeostatic_soln = solution_80pct_all_kinetic_reduced_targets.homeostatic_term
obj_by_perturbation['fba_80pct_all_kinetic_reduced_targets'] = {
    "overall_loss": loss_overall,
    "kinetic_loss": loss_kinetic,
    "homeostatic_loss": loss_homeostatic,
    "overall_objective": oofv,
    "kinetic_counts_unweighted": kinetic_soln,
    "homeostatic_counts": homeostatic_soln,
    "description": "FBA with 80% kinetic constraint on all kinetic reactions"
                   " and 80% reduction in kinetic targets"}

In [16]:
# FBA with 50% kinetic constraint on all kinetic reactions and reduction in
# kinetic targets
objective_weights = {'secretion': 0.01, 'efficiency': 1e-6, 'kinetics': 1e-5, 'homeostatic': 1}
kinetic_targets_50pct = pd.DataFrame(
    fba["target_kinetic_fluxes"],
    columns=metabolism.kinetic_constraint_reactions).loc[24, :].copy() * 0.50
solution_50pct_all_kinetic_reduced_targets = (
    test_NetworkFlowModel(
        objective_weights,
        kinetic_targets=kinetic_targets_50pct,
        kinetic_constraint=kinetic_constraint_dict,
        kinetic_adjustment=0.5
    ))
loss_overall = solution_50pct_all_kinetic_reduced_targets.loss_total
loss_kinetic = solution_50pct_all_kinetic_reduced_targets.loss_kinetic
loss_homeostatic = solution_50pct_all_kinetic_reduced_targets.loss_homeostatic
oofv = solution_50pct_all_kinetic_reduced_targets.objective
kinetic_soln = solution_50pct_all_kinetic_reduced_targets.kinetic_term
homeostatic_soln = solution_50pct_all_kinetic_reduced_targets.homeostatic_term
obj_by_perturbation['fba_50pct_all_kinetic_reduced_targets'] = {
    "overall_loss": loss_overall,
    "kinetic_loss": loss_kinetic,
    "homeostatic_loss": loss_homeostatic,
    "overall_objective": oofv,
    "kinetic_counts_unweighted": kinetic_soln,
    "homeostatic_counts": homeostatic_soln,
    "description": "FBA with 50% kinetic constraint on all kinetic reactions"
                   " and 50% reduction in kinetic targets"}

In [17]:
# Turn obj_by_perturbation into a df
obj_by_perturbation_df = pd.DataFrame(obj_by_perturbation).T
# Multiply _loss columns by 1e-6 to convert from counts to concentrations
loss_cols = [col for col in obj_by_perturbation_df.columns if 'loss' in col]
obj_by_perturbation_df[loss_cols] = obj_by_perturbation_df[loss_cols] * 1e-6

# Add column as the first row for the input data
input_row_df = pd.DataFrame([input_row], index=['input_data'])
obj_by_perturbation_df = pd.concat([input_row_df, obj_by_perturbation_df], axis=0)

obj_by_perturbation_df

Unnamed: 0,overall_loss,kinetic_loss,homeostatic_loss,overall_objective,kinetic_counts_unweighted,homeostatic_counts,description
input_data,0.238814,0.000262,0.223144,,,,Input data from short vEcoli sim
normal_fba,0.165772,0.000162,0.155495,165772.210767,16193542.608446,155494.734187,Normal FBA
fba_80pct_all_kinetic,0.240857,0.000205,0.229867,240856.508767,20460850.065556,229866.583698,FBA with 80% kinetic constraint on all kinetic...
fba_50pct_all_kinetic,0.372959,0.00031,0.35025,372958.86887,31001622.262674,350250.455892,FBA with 50% kinetic constraint on all kinetic...
fba_80pct_all_kinetic_reduced_targets,0.240777,0.000129,0.229867,240777.014601,12933688.367556,229866.583698,FBA with 80% kinetic constraint on all kinetic...
fba_50pct_all_kinetic_reduced_targets,0.372727,8.1e-05,0.35025,372727.060842,8083425.642674,350250.455892,FBA with 50% kinetic constraint on all kinetic...
