# Preprocessing
Original raw data files are located in `./raw_data`.

This notebook transforms the relevant data to Excel sheets in `./processed` using the publicly available `bletl` and our internal packages `retl` package.

In [1]:
import pandas
import pathlib

import bletl
import retl

import models
import preprocessing

DP_RAW = pathlib.Path("raw_data")
DP_PROCESSED = pathlib.Path("processed")

### Glucose calibration data

In [2]:
X, Y = preprocessing.read_glucose_x_y(
    fp_dilutions=DP_RAW / "8EXA1W_dilution_factors_glc.xlsx",
    fp_rdata=DP_RAW / "8EXA1W_ReaderOutput_0_fresh.xml",
    stock_concentration=50.0,
)
df = pandas.DataFrame(data=dict(concentration=X, absorbance=Y)).set_index(
    "concentration"
)
df.to_excel(DP_PROCESSED / "glucose_calibration_data.xlsx")

### Biomass calibration data

In [3]:
df_data = pandas.DataFrame(
    columns=["data_point", "runid", "independent", "dependent"]
).set_index(["data_point"])
df_data.head()

for runid in ["7MFD4H", "7N3HF5"]:
    # get stock CDW
    stock_mean, stock_sem = preprocessing.read_biomass_stock_concentration(
        DP_RAW / f"{runid}_weights_before.csv",
        DP_RAW / f"{runid}_weights_after.csv",
        eppi_from=7,
        eppi_to=12,
    )
    print(
        f"Run {runid} was performed with a stock of {stock_mean} ± {stock_sem} gCDW/L"
    )

    # and the dilution factors from this run
    df_dilutions = preprocessing.read_biomass_dilution_factors(
        DP_RAW / f"{runid}_dilution_factors_cdw.xlsx"
    )

    independent, dependent = preprocessing.read_biomass_x_and_y(
        fp_bldata=DP_RAW / f"{runid}_Pahpshmir.csv",
        df_dilutions=df_dilutions,
        rpm=1400,
        filterset="BS3",
        stock_concentration=stock_mean,
    )
    # collect into the DataFrame
    for ind, dep in zip(independent, dependent):
        df_data.loc[len(df_data)] = (runid, ind, dep)

df_data.to_excel(DP_PROCESSED / "biomass_calibration_data.xlsx")

Run 7MFD4H was performed with a stock of 25.785416666666702 ± 0.15907992837284796 gCDW/L
Run 7N3HF5 was performed with a stock of 17.12916666666666 ± 0.17040596950938838 gCDW/L


### Cultivation dataset & parameter mapping

In [4]:
dataset = preprocessing.create_cultivation_dataset()
dataset.save(DP_PROCESSED / "cultivation_dataset.h5")
dataset

Dataset([('A02', Replicate(Pahpshmir_1400_BS3_CgWT[:1], A365[:1])),
         ('A03', Replicate(Pahpshmir_1400_BS3_CgWT[:34], A365[:1])),
         ('A04', Replicate(Pahpshmir_1400_BS3_CgWT[:68], A365[:1])),
         ('A05', Replicate(Pahpshmir_1400_BS3_CgWT[:101], A365[:1])),
         ('A06', Replicate(Pahpshmir_1400_BS3_CgWT[:134], A365[:1])),
         ('A07', Replicate(Pahpshmir_1400_BS3_CgWT[:170], A365[:1])),
         ('A08', Replicate(Pahpshmir_1400_BS3_CgWT[:201], A365[:1])),
         ('B02', Replicate(Pahpshmir_1400_BS3_CgWT[:9], A365[:1])),
         ('B03', Replicate(Pahpshmir_1400_BS3_CgWT[:44], A365[:1])),
         ('B04', Replicate(Pahpshmir_1400_BS3_CgWT[:76], A365[:1])),
         ('B05', Replicate(Pahpshmir_1400_BS3_CgWT[:109], A365[:1])),
         ('B06', Replicate(Pahpshmir_1400_BS3_CgWT[:143], A365[:1])),
         ('B07', Replicate(Pahpshmir_1400_BS3_CgWT[:176], A365[:1])),
         ('B08', Replicate(Pahpshmir_1400_BS3_CgWT[:209], A365[:1])),
         ('C02', Replicate(P

In [5]:
model = models.MonodModel()

df_mapping = pandas.DataFrame(columns=["rid"] + list(model.parameter_names)).set_index(
    "rid"
)
for rid in dataset.keys():
    df_mapping.loc[rid] = ("S0", f"X0_{rid}", "mu_max", "K_S", "Y_XS")
df_mapping.to_excel(DP_PROCESSED / "full_parameter_mapping.xlsx")
df_mapping.head()

Unnamed: 0_level_0,S0,X0,mu_max,K_S,Y_XS
rid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A02,S0,X0_A02,mu_max,K_S,Y_XS
A03,S0,X0_A03,mu_max,K_S,Y_XS
A04,S0,X0_A04,mu_max,K_S,Y_XS
A05,S0,X0_A05,mu_max,K_S,Y_XS
A06,S0,X0_A06,mu_max,K_S,Y_XS


In [6]:
%load_ext watermark
%watermark

Last updated: 2021-12-15T14:48:32.054605+01:00

Python implementation: CPython
Python version       : 3.7.9
IPython version      : 7.24.1

Compiler    : MSC v.1916 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : Intel64 Family 6 Model 158 Stepping 10, GenuineIntel
CPU cores   : 6
Architecture: 64bit

