This is an expansion of example_optimize_simple.ipynb

In [51]:
import os

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import pandas as pd
import seaborn as sns

from delayed_reactant_labeling.predict import DRL
from delayed_reactant_labeling.optimize import RateConstantOptimizerTemplate
from delayed_reactant_labeling.visualize import VisualizeMultipleSolutions

In [52]:
reactions = [
    ('k1', ['A', 'cat'], ['B'],),
    ('k-1', ['B'], ['A', 'cat'],),
    ('k2', ['B'], ['C', 'cat']),

    # labeled
    ('k1', ['A-d10', 'cat'], ['B-d10'],),
    ('k-1', ['B-d10'], ['A-d10', 'cat'],),
    ('k2', ['B-d10'], ['C-d10', 'cat'])
]
concentration_initial = {'A': 1, 'cat': 1 / 5}
concentration_labeled = {'A-d10': 1}
dilution_factor = 1  # ideal situation: we achieve perfect mixing without dilution
time_pre = np.linspace(0, 10, 50)
time_post = np.linspace(10, 90, 8 * 50)
rate_values = [0.1, 1, 10]  # the model will try these values

In [74]:
def explore_boundary(k1, kr1, k2):
    path = f'optimization/example_model_boundaries/k1_{k1}_kr1_{kr1}_k2_{k2}/'
    os.mkdir(path)

    #"real" fake data
    rate_constants_real = {'k1': k1, 'k-1': kr1, 'k2': k2}
    drl_real = DRL(rate_constants=rate_constants_real, reactions=reactions)
    real_data_pre, real_data = drl_real.predict_concentration(
        t_eval_pre=time_pre,
        t_eval_post=time_post,
        dilution_factor=dilution_factor,
        initial_concentrations=concentration_initial,
        labeled_concentration=concentration_labeled)
    fig, axs = plt.subplots(1, 2, sharey='row', figsize=(10, 4), layout='tight', width_ratios=(1, 5))
    real_data_pre.to_pandas().plot('time', ax=axs[0])
    real_data.to_pandas().plot('time', ax=axs[1])
    fig.savefig(f'{path}/real_data.png', dpi=500)

    # add noise
    rng = np.random.default_rng(42)
    ax = axs[1]
    fake_data = []
    for col in real_data.columns[:-1]:  # last column contains time array
        noise_dynamic = rng.normal(loc=1, scale=0.06, size=real_data.shape[0])  # multiplied with intensity of signal
        noise_static =  rng.normal(loc=0, scale=0.01, size=real_data.shape[0])  # base noise
        fake_col = real_data[col]*noise_dynamic + noise_static

        fake_col[fake_col < 1e-10] = 1e-10  # no negative intensity
        fake_data.append(fake_col)
        ax.scatter(real_data['time'], fake_col, marker='.')

    fake_data.append(real_data['time'])
    fake_data = pl.DataFrame(fake_data, real_data.columns)
    fig.savefig(f'{path}/fake_data.png', dpi=500)
    plt.close(fig)

    class RateConstantOptimizer(RateConstantOptimizerTemplate):
        @staticmethod
        def create_prediction(x: np.ndarray, x_description: list[str]) -> pl.DataFrame:
            rate_constants = pd.Series(x, x_description)
            drl = DRL(reactions=reactions, rate_constants=rate_constants)
            _, pred_labeled = drl.predict_concentration(
                t_eval_pre=time_pre,
                t_eval_post=time_post,
                initial_concentrations=concentration_initial,
                labeled_concentration=concentration_labeled,
                dilution_factor=dilution_factor,
                rtol=1e-8,
                atol=1e-8, )
            return pred_labeled

        @staticmethod
        def calculate_curves(data: pl.DataFrame) -> dict[str, pl.Series]:
            curves = {}
            for chemical in ['A', 'B', 'C']:
                chemical_sum = data[[chemical, f'{chemical}-d10']].sum(axis=1)
                curves[f'ratio_{chemical}'] = data[chemical] / chemical_sum
            return curves

    def METRIC(y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return np.average(np.abs(y_pred - y_true), axis=0)

    RCO = RateConstantOptimizer(raw_weights={}, experimental=fake_data, metric=METRIC)

    dimension_description = ['k1', 'k-1', 'k2']
    bounds = [(1e-9, 100),    # k1
              (0,    100),    # k-1 / kr1 as input to the func.
              (1e-9, 100),]   # k2

    RCO.optimize_multiple(path=f'{path}/multiple_guess/', n_runs=500, x_bounds=bounds, x_description=dimension_description, n_jobs=-2, maxiter=1000)


In [76]:
for k1 in rate_values:
    for kr1 in rate_values:
        for k2 in rate_values:
            try:
                explore_boundary(k1, kr1, k2)
            except Exception as e:
                print(e)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=-2)]: Done   1 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-2)]: Done   2 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-2)]: Done   3 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-2)]: Done   5 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-2)]: Done   6 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-2)]: Done   7 tasks      | elapsed:    7.9s
[Parallel(n_jobs=-2)]: Done   8 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-2)]: Done   9 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-2)]: Done  10 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-2)]: Done  12 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-2)]: Done  13 tasks      | elapsed:   10.3s
[Parallel(n_jobs=-2)]: Done  14 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-2)]: Done  15 tasks      | elapsed:   

KeyboardInterrupt: 

In [42]:
from delayed_reactant_labeling.optimize import OptimizerProgress

    # error / run
    fig, ax = VMS.show_error_all_runs()
    ax.set_ylabel("error")
    eq = VMS.complete_found_error < 1.005 * VMS.complete_found_error.min()
    ax_ins = ax.inset_axes([0.15, 0.5, 0.4, 0.4])
    ax_ins.scatter(np.arange(sum(eq)), sorted(VMS.complete_found_error[eq]))
    ax.indicate_inset_zoom(ax_ins, edgecolor='black')
    ax.set_title(f"Error using real rate constants: {base_error:.4f}")
    fig.savefig(f'{path}/error_per_run.png', dpi=500)
    plt.close(fig)

    # k values for best runs
    fig, axs = plt.subplots(3, 1, layout='tight', figsize=(8, 6))
    for i in range(3):
        ax = axs[i]
        eq = np.where(VMS.complete_found_error < VMS.complete_found_error.min()*1.005)
        best_X = VMS.complete_optimal_X[eq]
        sns.histplot(best_X[:, i], ax=ax)
        yl, yu = ax.get_ylim()
        k = list(rate_constants_real.values())[i]
        ax.plot([k, k], [yl, yu], label='true', color="tab:orange")
        ax.set_ylim(yl, yu)
        ax.set_title(VMS.x_description[i])
    axs[0].legend()
    fig.savefig(f'{path}/best_ks.png', dpi=500)
    plt.close(fig)

    fig, ax = VMS.show_rate_constants(max_error=VMS.complete_found_error.min()*1.01, index_constant_values=None)
    ax.set_yscale("linear")
    ax.scatter([1, 2, 3], list(rate_constants_real.values()), label="true")
    ax.legend()
    fig.savefig(f'{path}/rate_constants_boxplot.png', dpi=500)
    plt.close(fig)

data = []

for k1 in rate_values:
    for kr1 in rate_values:
        for k2 in rate_values:
            path = f'optimization/example_model_boundaries/k1_{k1}_kr1_{kr1}_k2_{k2}/multiple_guess/'

            try:
                VMS = VisualizeMultipleSolutions(path)
            except Exception as e:
                print(path)
                print(e)
                continue

            best_run = VMS.complete_found_error.argmin()
            best_error = VMS.complete_found_error.min()
            eq = VMS.complete_found_error < best_error * 1.01
            mean_ks = VMS.complete_optimal_X[eq].mean()

            data.append([
                k1,
                kr1,
                k2,
                best_error,
                *VMS.complete_optimal_X[best_run] / np.array([k1, kr1, k2]),  # k found / k real
                VMS.complete_found_error[eq].mean(),
                VMS.complete_found_error[eq].std(),
                sum(eq),
                * VMS.complete_optimal_X[eq].mean(axis=0) / np.array([k1, kr1, k2]),
            ])

500it [00:06, 71.53it/s]
500it [00:07, 68.87it/s]
500it [00:06, 71.47it/s]
500it [00:06, 73.56it/s]
500it [00:07, 71.30it/s]
500it [00:07, 70.06it/s]
500it [00:06, 73.48it/s]
500it [00:06, 72.01it/s]
500it [00:07, 70.84it/s]
500it [00:07, 69.18it/s]
500it [00:08, 62.32it/s]
500it [00:08, 59.26it/s]
500it [00:07, 69.19it/s]
500it [00:07, 66.32it/s]
500it [00:08, 62.45it/s]
500it [00:07, 69.35it/s]
500it [00:07, 70.24it/s]
500it [00:07, 63.28it/s]
500it [00:07, 69.67it/s]
500it [00:07, 70.52it/s]
500it [00:07, 70.38it/s]
500it [00:07, 69.08it/s]
500it [00:11, 43.97it/s]
500it [00:07, 70.03it/s]
500it [00:07, 70.95it/s]
500it [00:07, 64.29it/s]
500it [00:07, 69.07it/s]


In [44]:
df = pd.DataFrame(data, columns=['k1', 'k-1', 'k2', 'best error', 'best k1/real', 'best k-1/real', 'best k2/real', 'mean_good_error', 'std_good_error', '#good runs',
                                 'good k1/real', 'good k-1/real', 'good k2/real'])

In [50]:
df.iloc[:, [0, 1, 2, 7, 8, 9, 10, 11, 12]]

Unnamed: 0,k1,k-1,k2,mean_good_error,std_good_error,#good runs,good k1/real,good k-1/real,good k2/real
0,0.1,0.1,0.1,0.114259,0.0001408731,106,0.488479,0.064254,1.938381
1,0.1,0.1,1.0,0.099282,0.0001597231,155,42.852323,2.4502,0.811151
2,0.1,0.1,10.0,0.095606,0.0001635809,171,93.821364,143.196559,4.351374
3,0.1,1.0,0.1,0.117287,0.0003823947,441,4.883236,0.589848,5.527144
4,0.1,1.0,1.0,0.106535,4.788973e-05,134,51.750906,0.534493,1.359461
5,0.1,1.0,10.0,0.097067,0.0001166605,157,72.178929,16.321849,4.354885
6,0.1,10.0,0.1,0.117817,5.93536e-05,460,19.894981,1.491178,136.032819
7,0.1,10.0,1.0,0.116052,0.0002364129,460,21.278619,1.299486,14.133602
8,0.1,10.0,10.0,0.105427,2.692828e-05,135,131.132748,3.500255,7.518519
9,1.0,0.1,0.1,0.103298,0.000149456,146,2.71125,0.275056,1.772637
