In [179]:
import os
import cvxpy as cp
import numpy as np
os.chdir(os.path.expanduser('~/dev/vEcoli')) #import repo

# data
P_L = 1.47e-04 * 10**-3 # mol/L
k_cat_exp = 0.071 #/sec
K_ms_exp = 3.7e-6  # mM


substrates = ['G6890-MONOMER[c]',
              'PD03938[c]',
              'G6737-MONOMER[c]',
              'RPOD-MONOMER[c]',
              'PD02936[c]',
              'RED-THIOREDOXIN2-MONOMER[c]']

# -- iteration 1 --
# S_protein_concs = np.array([2.20E-4, 5.26E-7, 8.38E-4, 1.03E-3, 2.69E-6, 6.44E-4]) * 10**-3 # (mol/L)
# S_std_dev = np.array([8.88E-5, 5.83E-7, 1.22E-4, 2.75E-4, 3.00E-6, 9.14E-5]) * 10**-3# (mol/L)

# -- iteration 2 --
S_protein_concs = np.array([2.20E-4, 1.07e-06, 6.35e-04, 9.58e-04, 4.28e-06, 6.27e-04]) * 10**-3 # (mol/L)
S_std_dev = np.array([3.35e-05, 3.46e-07, 3.04e-05, 9.36e-05, 2.17e-06, 8.23e-05]) * 10**-3# (mol/L)

t_half = np.array([396.3430, 69.0230, 415.2737,
                   1520.2125, 884.3924, 450.3317])*60  # seconds
c = np.log(2.0) / t_half  # desired bounds on k_active_i

n = 6
ALPHA = cp.Variable(n, pos=True)  # S/Km
beta = 1 + cp.sum(ALPHA)  # posynomial

constr = []
for i in range(n):
    lhs = (S_protein_concs[i] * beta) / (P_L * k_cat_exp * ALPHA[i])  # generalized posynomial
    rhs = 1.0 / c[i]                                                  # monomial (constant)
    constr.append(lhs <= rhs)

# t = cp.Variable(pos=True)
# constr += [
#     t <= cp.inv_pos(beta),         # t ≤ 1/β  (log–log concave RHS)
#     t <= cp.geo_mean(ALPHA),       # t ≤ geo_mean(α)
# ]
# obj = cp.Maximize(t)

# Choose a DGP-compatible objective
# Maximize beta^{-1} (equivalently maximizing each alpha term)
obj = cp.Maximize(cp.inv_pos(beta))

prob = cp.Problem(obj, constr)
prob.solve(gp=True, solver=cp.MOSEK)

0.9954319079615004

In [151]:
# retrieve Km from alpha
Kms = S_protein_concs/ALPHA.value

# compare k_active with half-lives
k_actives = k_cat_exp*P_L/(Kms*beta.value)
deg_half = c.copy()

print(f'Kms = {Kms} mol/L')
print(f'estimated k_actives = {k_actives} 1/sec')
print(f'k_deg from half-life = {deg_half} 1/sec')

Kms = [3.56437412e-04 6.20692794e-05 3.73462598e-04 1.36715105e-03
 7.95147320e-04 4.04990814e-04] mol/L
estimated k_actives = [2.91476777e-05 1.67382688e-04 2.78189111e-05 7.59925018e-06
 1.30659094e-05 2.56532308e-05] 1/sec
k_deg from half-life = [2.91476146e-05 1.67371065e-04 2.78188891e-05 7.59923564e-06
 1.30625874e-05 2.56532085e-05] 1/sec


# Use python equation solver instead of cvxpy

In [131]:
from scipy.optimize import fsolve

def equations(K_m_var):
    beta = np.sum(S_protein_concs/K_m_var) + 1
    eqs = []
    for i in range(6):
        k_active = P_L * k_cat_exp / (K_m_var[i] * beta)
        eqs.append(k_active - np.log(2)/t_half[i])
    return eqs

# provide initial guess
K_m_initial_guess = K_ms_exp*np.ones(6)

# solve
K_m_solution = fsolve(equations, K_m_initial_guess)

# print solution
print("Solved K_m values:", K_m_solution)

Solved K_m values: [5.38905309e-04 9.38501781e-05 5.64645273e-04 2.06702424e-03
 1.20250328e-03 6.12313435e-04]


In [138]:
# compare k_active with half-lives
beta_solved = np.sum(S_protein_concs/K_m_solution) + 1
k_actives = k_cat_exp*P_L/(K_m_solution*beta_solved)
deg_half = c.copy()

print(f'Kms = {K_m_solution} mol/L')
print(f'estimated k_actives = {k_actives} 1/sec')
print(f'k_deg from half-life = {deg_half} 1/sec')
print(f'estimated beta = {beta_solved}')

Kms = [5.38905309e-04 9.38501781e-05 5.64645273e-04 2.06702424e-03
 1.20250328e-03 6.12313435e-04] mol/L
estimated k_actives = [2.91476146e-05 1.67371065e-04 2.78188891e-05 7.59923564e-06
 1.30625874e-05 2.56532085e-05] 1/sec
k_deg from half-life = [2.91476146e-05 1.67371065e-04 2.78188891e-05 7.59923564e-06
 1.30625874e-05 2.56532085e-05] 1/sec
estimated beta = 1.0034502440948132


# Perform Bootstrapping to see if substrate concentration have strong fluxuation on Km

In [171]:
# -- iteration 2 --
S_protein_concs = np.array([2.20E-4, 1.07e-06, 6.35e-04, 9.58e-04, 4.28e-06, 6.27e-04]) * 10**-3 # (mol/L)
S_std_dev = np.array([3.35e-05, 3.46e-07, 3.04e-05, 9.36e-05, 2.17e-06, 8.23e-05]) * 10**-3# (mol/L)

c = np.log(2.0) / t_half  # desired bounds on k_active_i

# --- Get Positive Substrate Conc Sample  ---
def draw_positive_normals(mu, sigma, rng):
    """Draw a vector from N(mu, sigma) with all entries > 0 via rejection."""
    x = rng.normal(mu, sigma)
    # If any negatives/zeros, redraw those positions until all positive
    attempts = 0
    neg = x <= 0
    while np.any(neg):
        x[neg] = rng.normal(mu[neg], sigma[neg])
        neg = x <= 0
        attempts += 1
        if attempts > 10000:  # sanity break for pathological inputs
            raise RuntimeError("Too many attempts sampling positive concentrations.")
    return x

# --- CVXPY Problem ---
def solve_for_Km(S_vec):
    """
    Solve the DGP for given substrate concentrations S_vec (mol/L).
    Returns Km (mol/L) for each substrate, or np.nan on failure.
    """
    ALPHA = cp.Variable(n, pos=True)        # α_i = S_i / Km_i
    beta = 1 + cp.sum(ALPHA)

    constr = []
    for i in range(n):
        lhs = (S_vec[i] * beta) / (P_L * k_cat_exp * ALPHA[i])  # generalized posynomial
        rhs = 1.0 / c[i]                                        # monomial
        constr.append(lhs <= rhs)

    obj = cp.Maximize(cp.inv_pos(beta))  # maximize 1/beta (DGP-compatible)
    prob = cp.Problem(obj, constr)
    try:
        prob.solve(gp=True, solver=cp.MOSEK, ignore_dpp=True)
        if ALPHA.value is None or np.any(~np.isfinite(ALPHA.value)):
            return np.full(n, np.nan)
        # Km_i = S_i / α_i
        Km = S_vec / ALPHA.value
        return Km
    except Exception:
        return np.full(n, np.nan)

# --- Bootstrap driver ---
def bootstrap_Km(n_sample=1000, seed=0):
    rng = np.random.default_rng(seed)
    Km_samples = np.zeros((n_sample, n)) * np.nan
    S_samples  = np.zeros((n_sample, n)) * np.nan

    for b in range(n_sample):
        S_b = draw_positive_normals(S_protein_concs, S_std_dev, rng)
        Km_b = solve_for_Km(S_b)
        Km_samples[b, :] = Km_b
        S_samples[b, :] = S_b

    return Km_samples, S_samples # mol/L, mol/L

In [173]:
# --- Run the bootstrap ---
B = 1000  # increase as desired
Km_samples_molL, S_samples = bootstrap_Km(n_sample=B, seed=42)

In [174]:
Km_samples_molL

array([[3.56479820e-04, 6.20672287e-05, 3.73507640e-04, 1.36731436e-03,
        7.76235507e-04, 4.05039425e-04],
       [3.56397201e-04, 6.20583085e-05, 3.73420897e-04, 1.36699658e-03,
        7.95060171e-04, 4.04945626e-04],
       [3.56508380e-04, 6.20784573e-05, 3.73537681e-04, 1.36742311e-03,
        7.95133644e-04, 4.05072051e-04],
       ...,
       [3.56449807e-04, 6.20606744e-05, 3.73476116e-04, 1.36719902e-03,
        7.95058232e-04, 4.05005315e-04],
       [3.56493602e-04, 6.20820052e-05, 3.73521187e-04, 1.36736662e-03,
        7.95395620e-04, 4.05054376e-04],
       [3.56501597e-04, 6.20770629e-05, 3.73530861e-04, 1.36739878e-03,
        7.95239797e-04, 4.05064802e-04]], shape=(1000, 6))

In [175]:
# --- Summaries: mean, std, 95% CI, and correlation with S ---
def ci95(x):
    x = x[np.isfinite(x)]
    if x.size == 0:
        return (np.nan, np.nan)
    lo, hi = np.percentile(x, [2.5, 97.5])
    return lo, hi

summary = []
for i, name in enumerate(substrates):
    x = Km_samples_molL[:, i]
    s = S_samples[:, i]
    finite = np.isfinite(x) & np.isfinite(s)
    mean = np.nanmean(x)
    std  = np.nanstd(x, ddof=1)
    lo, hi = ci95(x)
    # Pearson correlation between sampled S and resulting Km
    corr = np.corrcoef(s[finite], x[finite])[0, 1] if np.sum(finite) > 2 else np.nan
    summary.append({
        "substrate": name,
        "Km_mean_M": mean,
        "Km_sd_M": std,
        "Km_95CI_M": (lo, hi),
        "corr_S_vs_Km": corr,
    })

for row in summary:
    print(f"{row['substrate']}: "
          f"Km = {row['Km_mean_M']:.3e} ± {row['Km_sd_M']:.3e} M, "
          f"95% CI [{row['Km_95CI_M'][0]:.3e}, {row['Km_95CI_M'][1]:.3e}], "
          f"corr(S, Km) = {row['corr_S_vs_Km']:.3f}")

G6890-MONOMER[c]: Km = 3.564e-04 ± 8.579e-08 M, 95% CI [3.563e-04, 3.566e-04], corr(S, Km) = -0.382
PD03938[c]: Km = 6.207e-05 ± 1.689e-08 M, 95% CI [6.203e-05, 6.210e-05], corr(S, Km) = 0.184
G6737-MONOMER[c]: Km = 3.735e-04 ± 8.995e-08 M, 95% CI [3.733e-04, 3.736e-04], corr(S, Km) = -0.360
RPOD-MONOMER[c]: Km = 1.367e-03 ± 3.292e-07 M, 95% CI [1.367e-03, 1.368e-03], corr(S, Km) = -0.237
PD02936[c]: Km = 7.949e-04 ± 1.565e-06 M, 95% CI [7.933e-04, 7.955e-04], corr(S, Km) = 0.288
RED-THIOREDOXIN2-MONOMER[c]: Km = 4.050e-04 ± 9.750e-08 M, 95% CI [4.048e-04, 4.052e-04], corr(S, Km) = -0.804


In [178]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Optional: filter NaNs per column
def col_clean(x):
    x = np.asarray(x).ravel()
    return x[np.isfinite(x)]

fig = make_subplots(
    rows=1, cols=len(substrates),
    shared_yaxes=False,
    subplot_titles=substrates,
    horizontal_spacing=0.03
)
Km_samples_nmolL = Km_samples_molL * 10**9

for i, name in enumerate(substrates):
    y = col_clean(Km_samples_nmolL[:, i])
    fig.add_trace(
        go.Violin(
            y=y,
            name=name,
            box_visible=True,          # show box (median & IQR)
            meanline_visible=True,     # show mean line
            points="all",              # show all samples
            jitter=0.35,               # spread points
            scalemode="count",         # violin width ~ number of points
            marker=dict(size=2),
            showlegend=False
        ),
        row=1, col=i+1
    )

# Toggle log-scale if helpful (violins support it; use if ranges span orders of magnitude)
USE_LOG_Y = False
yaxis_type = 'log' if USE_LOG_Y else 'linear'

fig.update_layout(
    height=400,
    width=1300,
    margin=dict(l=60, r=20, t=60, b=60),
    template="simple_white",
    # plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent plot area background
    # paper_bgcolor='rgba(0, 0, 0, 0)',  # Transparent entire figure background
    title="Bootstrap Km distributions (nmol/L)",
)
fig.update_yaxes(
    title_text="Km (nmol/L)",
    type=yaxis_type,
    row=1, col=1
)

fig.show()
# fig.write_image("notebooks/Heena notebooks/Mia Protein Degradation/out/bootstrap_Km.png", scale=5, height=400, width=1300)