In [1]:
!pip install -q pyDOE2
!pip install -q definitive-screening-design
!git clone -q https://github.com/DataHow/analytics-course-scripts.git

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pyDOE2 (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m926.7/926.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for definitive-screening-design (setup.py) ... [?25l[?25hdone


In [2]:
#@title Import libraries
import importlib
import ipywidgets as widgets
from ipywidgets import interact,interactive,fixed
from google.colab import output
output.enable_custom_widget_manager()

import numpy as np
import pandas as pd
import os
import importlib
import scipy.integrate
import scipy.stats
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = "plotly_white"

# Script 0
from pyDOE2 import fullfact, ff2n, ccdesign
import definitive_screening_design as dsd

process = importlib.import_module("analytics-course-scripts.interactions.modules.process_introduction")

In [3]:
#@title Functions for the script

FEED_START= widgets.FloatRangeSlider(
    value=[1, 4],
    min=0,
    max=6,
    step=1,
    description='feed_start',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='',
)
FEED_END = widgets.FloatRangeSlider(
    value=[8, 12],
    min=6,
    max=14,
    step=1,
    description='feed_end',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='',
)
FEED_RATE = widgets.FloatRangeSlider(
    value=[5, 20],
    min=1,
    max=30,
    step=0.5,
    description='feed_rate',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='',
)
GLC_0 = widgets.FloatRangeSlider(
    value=[10, 40],
    min=1,
    max=80,
    step=0.5,
    description='glc_0',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
VCD_0 = widgets.FloatRangeSlider(
    value=[0.1, 1.0],
    min=0.1,
    max=2.0,
    step=0.1,
    description='vcd_0',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
NUM_RUNS = widgets.IntSlider(
    value=40,
    min=1,
    max=100,
    step=1,
    description='num_runs',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
DOE_DESIGN = widgets.Select(
    options=['Latin-Hypercube Sampling', '2-level Full-Factorial', '3-level Full-Factorial','Central-Composite','Definitive-Screening'],
    value='Latin-Hypercube Sampling',
    rows=5,
    description='doe_design',
    disabled=False
)



SELECT_RUNS = widgets.SelectMultiple(
    options=list(range(100)),
    value=[0,1],
    rows=10,
    description='select_runs',
    disabled=False
)


HIGHLIGHT_RUN = widgets.IntSlider(
    value=0,
    min=0,
    max=99,
    step=1,
    description='highlight run',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

SELECT_COLOR = widgets.Select(
    options=["run id","titer_14","feed_start","feed_end","feed_rate","glc_0","vcd_0"],
    value="titer_14",
    rows=7,
    description='select color',
    disabled=False
)


# Introduction to Cell Culture Fed-Batch Process Simulator

The simulator is aimed at providing in-silico data to test some of the machine learning tool discussed during the course. The simulator is mimicking the behavior of a fed-batch cell culture process, where only few components are present:


*   The cells, responsible for the production of the product, which are consuming glucose to sustain metabolism and producing lactate as by-product. These are indicated by VCD (viable cell density), typically expressed in million cells / ml.
*   Glucose (Glc) is consumed by the cells and it is continuously fed to the process (F_glc). Too low glucose concentrations are slowing down cell growth and product production. To high glucose concentrations are poisoning the system and accelerating cell death.
*   Lactate (Lac) is a by-product of the cells and it is poisoning the cells, so that too high lactate concentrations are slowing down cell metabolism and accelerating cell death
*   Product (Titer) is produced by the cells. The faster the cells are growing, the less are producing the product.

The simulator's results are based on the following equations to create the in-sillico data

### Model Equation Parameters

- Balance on VCD:  $\frac{dVCD}{dt}$ = (μ<sub>g</sub> - μ<sub>d</sub>)VCD
- Balance on Glucose: $\frac{dGlc}{dt}$ = $-k_{Glc} \frac{Glc}{Glc + 0.05}$ VCD + F<sub>Glc</sub>
- Balance on Lactate: $\frac{dLac}{dt}$ = k<sub>Lac</sub> VCD
- Balance on Product (Titer): $\frac{dProd}{dt}$ = k<sub>Prod</sub>$\frac{Glc}{Glc + K_{g, Glc} }$ ($\frac{μ_{g}}{μ_{g,max}}$)<sup>2</sup> VCD $-2 \frac{dAggr}{dt}$

Where:
- Growth rate: $μ_{g} =  μ_{g,max}\frac{Glc}{Glc +K_{g, Glc}}\frac{K_{i, Lac}}{Lac+K_{i, Lac}} $
- Death rate: $μ_{d} = μ_{d,max}(1+\frac{φ}{1+φ})\frac{Lac}{Lac+K_{d, Lac}}$
- Glc saturation: φ = e<sup>0.1(Glc-75)</sup>

The user can change the different rates on the simulator, in order to change the process behavior (please use default values at the beginning)


### Process Parameters

Please insert the values of the process manipulated variables:

- Feed start (day): day at which Glc feed is started
- Feed end (day): ay at which Glc feed is stopped
- Feed rate: mass rate (g/L/day) at which Glc is feed (continuous feed over 24hours)
- Initial Glc concentration (g/L): Glc at time t = 0
- Initial VCD (10^6 cell/mL): VCD at time t = 0

In [4]:
interactive(process.plot_process, feed_start=(0,6), feed_end = (7,14), feed_rate=(5.0,20.0), glc_0= (5,20), vcd_0= (0.1, 1.0))

interactive(children=(IntSlider(value=3, description='feed_start', max=6), IntSlider(value=10, description='fe…

# Generate Design of Experiments

In order to show the complexity of the model, in spite of the apparent simplicity and small number of components, in this section the user can similate the behavior of the process in a broad range of the process variables.

A number of simulations defined by "num_runs" will be generated. For this number of simulations, a latin hypercube design (LHD) is created, to uniformly map the 5-dimensional space of the variables.

### Manipulated Variables

For each of the manipulated variables defined in the section above, the use can define the limits of the exploration space (first value: lower limit; second value: upper limit).

### Design Selection






In [5]:
generated_doe = interactive(process.plot_doe, feed_start=FEED_START, feed_end =FEED_END, feed_rate=FEED_RATE, glc_0=GLC_0, vcd_0=VCD_0, num_runs=NUM_RUNS, doe_design=DOE_DESIGN)
generated_doe

interactive(children=(FloatRangeSlider(value=(1.0, 4.0), continuous_update=False, description='feed_start', ma…

# Plot Experiments

Here we visualise the generate experiments, especially ther evolution in time.

In [6]:
owu_df, doe_df = process.generate_data(generated_doe.result)
interactive(process.plot_data, select_runs=SELECT_RUNS, owu_df=fixed(owu_df))

interactive(children=(SelectMultiple(description='select_runs', index=(0, 1), options=(0, 1, 2, 3, 4, 5, 6, 7,…

## Analyse all experiments

Here you can visualize all run from the generated dataset. By changing the `select_color` you decide on the coloring of the experiments. The options are:
* `run_id` runs are colored by the order in which they appear in the dataset.
* `titer_14` runs are colored by the amount of Titer at day 14 or the experiments.
* `glc_0` run are colored by the designed initial Glucose level
* `vcd_0` run are colored by the designed initial VCD level
* `feed_start` run are colored by the designed feeding start day
* `feed_end` run are colored by the designed feeding end day
* `feed_rate` run are colored by the designed Glucose feed rate

In [7]:
owu_df, doe_df = process.generate_data(generated_doe.result)
interactive(process.plot_data_color, highlight_run=HIGHLIGHT_RUN, select_color=SELECT_COLOR,owu_df=fixed(owu_df), doe_df=fixed(doe_df))

interactive(children=(IntSlider(value=0, continuous_update=False, description='highlight run', max=99), Select…

# Tasks

1. Identify which process parameters produce the highest level of titer in the simulated experiments.
2. Replicate it in single experiment generation and try to modify it to produce even higher level of Titer.