In [None]:
from pytfa.io.json import load_json_model
import numpy as np
import pandas as pd
import seaborn as sns
from skimpy.analysis.oracle import *
import os.path
import matplotlib.pyplot as plt
import matplotlib
from skimpy.io.yaml import load_yaml_model
from skimpy.core.parameters import load_parameter_population, ParameterValuePopulation
from tqdm.auto import tqdm

matplotlib.rcParams['pdf.fonttype'] = 'truetype'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['pdf.fonttype'] = 42 

PHYSIOLOGY = 'WT'

import configparser
config = configparser.ConfigParser()
config_path = '../src/config.ini'
config.read(config_path)

# File paths from config.ini
base_dir = config['paths']['base_dir']
path_to_tmodel = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_tmodel_{PHYSIOLOGY}']))
path_to_kmodel = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_kmodel_{PHYSIOLOGY}']))
path_to_samples = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_samples_{PHYSIOLOGY}']))
path_to_fcc = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_fcc_{PHYSIOLOGY}']))
path_to_stratified_samples = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_stratified_samples_{PHYSIOLOGY}']))
path_to_lambda_values = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_lambda_values_{PHYSIOLOGY}']))
path_to_param_output = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_param_output_{PHYSIOLOGY}']))
path_to_stratified_params = os.path.abspath(os.path.join(base_dir, config['paths'][f'path_to_stratified_params_{PHYSIOLOGY}']))

In [None]:
# Load the model and the samples
samples_total = pd.read_csv(path_to_samples, header=0, index_col=0)
tmodel = load_json_model(path_to_tmodel)
kmodel = load_yaml_model(path_to_kmodel)
kmodel.prepare(mca=False)

# Find the steady states that were used for the MCA res
wt_ss = []
for file in os.listdir(path_to_fcc):
    wt_ss.append(int(file.split('_')[4]))

wt_ss = np.unique(wt_ss)
samples = samples_total.loc[wt_ss,:]

In [None]:
atp_producing_reactions = [tmodel.reactions.PYK, tmodel.reactions.PFK, tmodel.reactions.ATPS4mi]

# For each sample calculate the sum of the atp producing reactions and divide by the glucose uptake
# Also find how much comes from ATPS4mi, PYK and PFK
results = pd.DataFrame(columns=['atp_sum', 'oxphos', 'glycolysis'])
for i, sample in samples.iterrows():
    atp_sum = 0
    oxphos = 0
    glycolysis = 0
    for reaction in atp_producing_reactions:
        # Some reactions produce more than one atp
        met = [met for met in reaction.metabolites if met.id.startswith('atp_')] # should be either atp_c or atp_m
        coeff = abs(reaction.metabolites[met[0]])
        tot_flux = abs(sample[reaction.id]-sample[reaction.reverse_id]) * coeff
        atp_sum += tot_flux

        if reaction.id == 'ATPS4mi':
            oxphos += tot_flux
        elif reaction.id == 'PYK':
            glycolysis += tot_flux
        elif reaction.id == 'PFK':
            glycolysis += tot_flux
    glucose_upatke = sample[tmodel.reactions.EX_glc_D_e.reverse_id]
    results.loc[i] = [atp_sum/glucose_upatke, oxphos/glucose_upatke, glycolysis/glucose_upatke]


# Now find the yield with respect to the growth rate
results_growth = pd.DataFrame(columns=['atp_sum', 'oxphos', 'glycolysis'])
for i, sample in samples.iterrows():
    atp_sum = 0
    oxphos = 0
    glycolysis = 0
    for reaction in atp_producing_reactions:
        # Some reactions produce more than one atp
        met = [met for met in reaction.metabolites if met.id.startswith('atp_')] # should be either atp_c or atp_m
        coeff = abs(reaction.metabolites[met[0]])
        tot_flux = abs(sample[reaction.id]-sample[reaction.reverse_id]) * coeff
        atp_sum += tot_flux

        if reaction.id == 'ATPS4mi':
            oxphos += tot_flux
        elif reaction.id == 'PYK':





            
            glycolysis += tot_flux
        elif reaction.id == 'PFK':
            glycolysis += tot_flux
    growth_rate = sample[tmodel.reactions.biomass.id]
    results_growth.loc[i] = [atp_sum/growth_rate, oxphos/growth_rate, glycolysis/growth_rate]

In [None]:
# Make some statistics columns
results['sum'] = results['oxphos'] + results['glycolysis']
results['ratio'] = results['oxphos'] / results['glycolysis']

results.sort_values('ratio')

# Same for the growth rate
results_growth['sum'] = results_growth['oxphos'] + results_growth['glycolysis']
results_growth['ratio'] = results_growth['oxphos'] / results_growth['glycolysis']
results_growth.sort_values('ratio')

In [None]:
# Set aesthetic style
sns.set_theme(style="whitegrid", context="paper")

# Compute color values based on normalized glycolysis and oxphos
color_values = (
    (results['glycolysis'] - results['glycolysis'].min()) / (results['glycolysis'].max() - results['glycolysis'].min()) +
    (results['oxphos'] - results['oxphos'].min()) / (results['oxphos'].max() - results['oxphos'].min())
)

# Create the scatter plot
plt.figure(figsize=(12, 8))
sns.scatterplot(
    data=results,
    x='glycolysis',
    y='oxphos',
    hue=color_values,
    palette='flare',
    s=25,
    alpha=0.7,
    legend=False
)

# Enhance axis labels and ticks
plt.xlabel('Glycolytic ATP Yield', fontsize=16, labelpad=15)
plt.ylabel('Oxidative ATP Yield', fontsize=16, labelpad=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add grid and fine-tune limits
plt.grid(True, linestyle='--', alpha=0.3)
margin = 0.05
plt.xlim(results['glycolysis'].min() - margin, results['glycolysis'].max() + margin)
plt.ylim(results['oxphos'].min() - margin, results['oxphos'].max() + margin)

# Tight layout and show plot
plt.tight_layout()
plt.show()


In [None]:
# Stratified sampling
# Break the samples into 3 groups based on the glycolysis and oxphos values
results['group'] = pd.cut(results['glycolysis'], bins=3, labels=False)
results['group'] = results['group'].astype(int)
results['group'] = results['group'].astype(str) + '_' + pd.cut(results['oxphos'], bins=3, labels=False).astype(str)

# Pick ~50 samples in total. The number of samples in each group is proportional to the number of samples in each group
samples_to_pick = 52 # so that the final number of samples is 50
samples_picked = pd.DataFrame()
for group in results['group'].unique():
    group_samples = results[results['group'] == group]
    n_samples = np.max([int(samples_to_pick * len(group_samples) / len(results)),1])
    print('Group {} has {} samples, picking {}'.format(group, len(group_samples), n_samples))

    # Pick equally spaced samples from the group
    picked_pos = np.linspace(0, len(group_samples), n_samples, endpoint=False, dtype=int)
    group_samples_picked = group_samples.sort_values('ratio').iloc[picked_pos]
    samples_picked = pd.concat([samples_picked, group_samples_picked])
len(samples_picked)

In [None]:
# Set aesthetic style
sns.set_theme(style="whitegrid", context="paper")

# Compute color values based on normalized glycolysis and oxphos
color_values = (
    (results['glycolysis'] - results['glycolysis'].min()) / (results['glycolysis'].max() - results['glycolysis'].min()) +
    (results['oxphos'] - results['oxphos'].min()) / (results['oxphos'].max() - results['oxphos'].min())
)

# Create the scatter plot
plt.figure(figsize=(12, 8))

# Plot all points with low opacity
sns.scatterplot(
    data=results,
    x='glycolysis',
    y='oxphos',
    hue=color_values,
    palette='flare',
    s=25,
    alpha=0.1,
    legend=False
)

# Overlay the selected 50 samples with full opacity and a black edge
sns.scatterplot(
    data=samples_picked,
    x='glycolysis',
    y='oxphos',
    hue=color_values.loc[samples_picked.index],
    palette='flare',
    s=60,
    edgecolor='black',
    linewidth=0.5,
    alpha=1,
    legend=False
)

# Enhance axis labels and ticks
plt.xlabel('Glycolytic ATP Yield', fontsize=16, labelpad=15)
plt.ylabel('Oxidative ATP Yield', fontsize=16, labelpad=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add grid and fine-tune limits
plt.grid(True, linestyle='--', alpha=0.3)
margin = 0.05
plt.xlim(results['glycolysis'].min() - margin, results['glycolysis'].max() + margin)
plt.ylim(results['oxphos'].min() - margin, results['oxphos'].max() + margin)

# Tight layout and show plot
plt.tight_layout()
plt.show()


In [None]:
samples_picked.to_csv(path_to_stratified_samples)

In [None]:
# For each steady state, find the fast models
fast_parameters = []
fast_index = []

for ss_id in tqdm(samples_picked.index):

    # Load the parameter population and the max eigenvalues
    parameter_population = load_parameter_population(path_to_param_output.format(ss_id))
    max_eig = pd.read_csv(path_to_lambda_values.format(ss_id), index_col=0)

    # Keep only the parameters sets that have physiolocally relevant eigenvalues
    eig_cutoff = -3 * 1 / (np.log(2) / tmodel.optimize().objective_value)
    fast_models = max_eig[max_eig['max_eig'] < eig_cutoff].index
    fast_parameters.extend([parameter_population._data[k] for k in fast_models])
    fast_index.extend(["{},{}".format(ss_id, k) for k in fast_models])

print(len(fast_parameters))

try:
    fast_parameters = ParameterValuePopulation(fast_parameters,
                                               kmodel=kmodel,
                                               index=fast_index)

    fast_parameters.save(path_to_stratified_params)
except:
    print('Error occurred while saving picked parameters')
    pass