In [8]:
#__version__ = 0.7.1

SyntaxError: invalid syntax (4152705385.py, line 1)

In [3]:
from __future__ import annotations

from collections import namedtuple
from typing import List, Tuple, Callable, Union
import warnings

import numpy as np
import pandas as pd
import pytz

from ipy_incyght.calculations.classes import CalculationWarning
from ipy_incyght.classes import Run, Variable, Parameter

# raw data inputs
# OnlineData
CO2_YIELD_X = 'CO2 yield (X)'
O2_YIELD_X = 'O2 yield (X)'
FEED_RATE_GRAVIMETRIC, FEED_AMOUNT = 'Feed 1 rate gravimetric', 'Feed amount'
FEED_VOLUME, FEED_RATE_VOLUMETRIC = 'Feed 1 volume', 'Feed 1 rate volumetric'
AIR_FLOW, = 'Air flow',
ACID_VOLUME, BASE_VOLUME = 'Acid volume', 'Base volume'
ACID_RATE, BASE_RATE = 'Acid rate', 'Base rate'
CO2, CO2_PERCENTAGE_INFLOW = 'Offgas CO2', 'Inlet air CO2'
O2, O2_PERCENTAGE_INFLOW = 'Offgas O2', 'Inlet air O2'
DILUTION_RATE = 'D'

# AnalyticalData
OD = 'OD'

# parameters
FEED_DENSITY = 'feed_density'
SAMPLE_DENSITY = 'sample_density'
START_VOLUME = 'start_volume'
BIOMASS_CONVERSION_FACTOR = 'biomass_conversion_factor'
R, T, P = 'R', 'T', 'P'
BIOMASS_INITIAL_OD = 'biomass_initial_od'
LEGACY = 'legacy'  # applies a variation of these calculation that was used in outdated xlsx example files
USE_SPECIFIC_NAMES_FOR_RESULTS = 'use_specific_names_for_results'
MOLAR_MASS_OF_BIOMASS = 'molar_mass_of_biomass'

# intermediate results
CO2_DECIMAL, O2_DECIMAL = 'CO2 decimal', 'O2 decimal'
CO2_Q, O2_Q = 'CO2 Q', 'O2 Q'
CO2_MOLAR, O2_MOLAR = 'CO2 molar', 'O2 molar'
CO2_ACCUMULATED, O2_ACCUMULATED = 'CO2 accumulated', 'O2 accumulated'
CO2_ACCUMULATED_DOWNSCALED, O2_ACCUMULATED_DOWNSCALED = 'CO2 accumulated downscaled', 'O2 accumulated downscaled'

O2_UPTAKE_RATE = 'O2 uptake rate'
CO2_EVOLUTION_RATE = 'CO2 evolution rate'
O2_SPECIFIC_UPTAKE_RATE = 'O2 specific uptake rate'
CO2_SPECIFIC_EVOLUTION_RATE = 'CO2 specific evolution rate'

O2_YIELD_C_MOLAR_S = 'O2 yield c-molar (S)'
CO2_YIELD_C_MOLAR_S = 'CO2 yield c-molar (S)'

BIOMASS = 'Biomass'
BIOMASS_FORMED = 'Biomass formed'
BIOMASS_FORMED_MOLAR = BIOMASS_FORMED + ' molar'
BIOMASS_AMOUNT = 'Biomass amount'
BIOMASS_AMOUNT_MOLAR = BIOMASS_AMOUNT + ' molar'

BIOMASS_FORMATION_RATE = 'Biomass formation rate'
BIOMASS_FORMATION_RATE_MOLAR = 'Biomass formation rate molar'

ACCUMULATED_SAMPLE_VOLUME = 'Accumulated sample volume'
LIQUID_VOLUME = 'Liquid volume'
LIQUID_VOLUME_DOWNSCALED = f'{LIQUID_VOLUME} downscaled'
LIQUID_VOLUME_WITHOUT_CORRECTED_SAMPLES = f'{LIQUID_VOLUME} (without corrected samples)'

SAMPLE_AMOUNT = 2  # mL -> only if sample volume variable is not provided

# components
SUBSTRATE_1, SUBSTRATE_2 = ALL_SUBSTRATES = [f'Substrate {i}' for i in range(1, 3)]
PRODUCT, BY_PRODUCT = 'Product', 'ByProduct'
COMPOUND_1, COMPOUND_2, COMPOUND_3, COMPOUND_4, COMPOUND_5, COMPOUND_6 = ALL_COMPOUNDS = \
    [f'Compound {i}' for i in range(1, 7)]

if _parameters.air_or_nitrogen:
    AIR_FLOW = 'Nitrogen flow'


def replace_first(a: np.array) -> np.array:
    return np.concatenate([np.array([np.nan]), a[1:]])


class DTUCalculationBase:
    TIME_VEC = None
    ONLINE_TIME_VEC = None
    VAR_HEADERS = ()
    TIME_HEADER = None

    run = None
    test_file = None
    start_datetime = None

    def __init__(self, run=None, p=None):
        if run is not None and run.first_timestamp is not None:
            run.first_timestamp = run.first_timestamp.astimezone(pytz.utc)
        self.run = run
        self.p = DTUParameters() if p is None else p

    def import_data(self, test_file, start_datetime) -> Run:
        self.test_file = test_file
        self.start_datetime = start_datetime
        if self.run is None:
            self.run = Run(name='Run 1', first_timestamp=self.start_datetime)
        return self.run

    def preprocess_variables(self, run: Run):
        raise NotImplementedError()

    def add_variables_to_run_from_df(self, run: Run, df: pd.DataFrame):
        self.TIME_VEC = pd.TimedeltaIndex(df[self.TIME_HEADER], unit='h') + self.start_datetime
        for online_var_header in self.VAR_HEADERS:
            if online_var_header not in df.columns:
                raise AssertionError(f'Invalid header "{online_var_header}')
            name = online_var_header.split(' (')[0].strip()
            data = df[online_var_header]
            try:
                unit = online_var_header[online_var_header.find('(') + 1:online_var_header.find(')')]
            except TypeError:
                unit = '-'
            if name == 'Airflow':
                unit = 'lpm'
            run.add_variable(Variable(name=name, unit=unit, time=self.TIME_VEC, data=data.astype(float)))
        return run

    def standardized_df(self, sheet_name: str, start_row: int, start_col: int, end_row: int = None) -> pd.DataFrame:
        df = pd.read_excel(self.test_file, sheet_name=sheet_name).iloc[start_row:, start_col:]
        header = df.iloc[0]
        if end_row is None:
            df = df[1:]
        else:
            df = df[1:end_row]
        df.columns = header
        return df

    def last_value_before_timestamps(self, variable: Variable, timestamps: pd.DatetimeIndex,
                                     method: Union[str, None] = 'pad') -> np.array:
        """
        :param time: time relative to first_timestamp [h]
        :param variable_name: variable name to get value for timestamp
        :return: value at that timestamp
        """
        indexes = []
        for t in timestamps.values:
            try:
                indexes.append(variable.df.index.get_loc(t, method=method))
            except KeyError:
                indexes.append(None)

        values = []
        for i in indexes:
            if i is None:
                values.append(np.nan)
            else:
                values.append(variable.df.iloc[i, 0])
        return np.array(values)

    def get_shortcuts(self, run: Run) -> Tuple[Callable, Callable, Callable, Callable]:
        def i(variable_name: str) -> np.array:
            """:return a numpy vector for the calculation input variable"""
            return run._variables[variable_name].data.m

        def t(variable_name: str):
            return run._variables[variable_name].time

        def p(parameter_name: str):
            """:return a parameter float value"""
            return self.p[parameter_name]

        def add_variable(name, data, unit=None, time=None):
            if time is None:
                time = self.TIME_VEC
            variable = Variable(name=name, unit=unit, data=data.astype(float), time=time)
            variable.raw_data = False
            run.add_variable(variable, replace=True)
            return variable

        return i, t, p, add_variable

    def fill_incomplete_var_with_nan(self, run: Run, variable: Variable):
        if not variable.df.index.equals(self.TIME_VEC):
            warnings.warn(f'{run.name}: '
                          f'Sampling rate of "{variable.name}" differs from "{self.p.sample_volume_name}". '
                          f'Filling up missing values with "NaNs". The calculation results could be incomplete.',
                          CalculationWarning)

            # fill missing values with nan
            variable.df = pd.DataFrame(index=self.TIME_VEC, data=self.last_value_before_timestamps(
                variable=variable, timestamps=self.TIME_VEC, method=None
            ))


class DTUCalculationOnlineData(DTUCalculationBase):
    """
    Imports the DTU OnlineData. Assumptions:
     - All online raw variables have the same origin and there is no need for interpolation
    """
    TIME_HEADER = 'Time (h)'
    VAR_HEADERS = (
        'Airflow ', 'CO2 percentage inflow (%)', 'CO2 (%)', 'O2 percentage inflow (%)', 'Oxygen (%)', 'Base (mL)',
        'Acid (mL)', 'Feed rate (g/h)', 'Time (h)',)

    def preprocess_variables(self, run: Run):
        if AIR_FLOW in run._variables:
            self.TIME_VEC = run._variables[AIR_FLOW].df.index
        else:
            self.TIME_VEC = run._variables[CO2].df.index

        if run.first_timestamp is None:
            run.first_timestamp = self.TIME_VEC[0]

        # some variables start later -> fill missing values with zeros
        for variable_name in [FEED_VOLUME, O2, CO2, FEED_RATE_VOLUMETRIC, BASE_VOLUME]:
            if variable_name in run._variables:
                variable = run._variables[variable_name]
                variable.df = variable.df.reindex(self.TIME_VEC, fill_value=0.0)

        # ensure, variables have the correct unit
        if AIR_FLOW in run._variables:
            run._variables[AIR_FLOW].convert_data_to('L/h')
        if FEED_RATE_VOLUMETRIC in run._variables:
            run._variables[FEED_RATE_VOLUMETRIC].convert_data_to('L/h')
        if FEED_RATE_GRAVIMETRIC in run._variables:
            run._variables[FEED_RATE_GRAVIMETRIC].convert_data_to('kg/h')
        if ACID_VOLUME in run._variables:
            run._variables[ACID_VOLUME].convert_data_to('L')
        run._variables[BASE_VOLUME].convert_data_to('L')

    def import_data(self, *args, **kwargs) -> Run:
        start_col = kwargs.pop('start_col', 4)
        run = super().import_data(*args, **kwargs)
        df = self.standardized_df(sheet_name='OnlineData', start_row=0, start_col=start_col)
        test_run = self.add_variables_to_run_from_df(run, df)

        test_run.rename_variable('CO2 percentage inflow', CO2_PERCENTAGE_INFLOW)
        test_run.rename_variable('O2 percentage inflow', O2_PERCENTAGE_INFLOW)
        test_run.rename_variable('Oxygen', O2)
        test_run.rename_variable('CO2', CO2)
        test_run.rename_variable('Airflow', AIR_FLOW)
        test_run.rename_variable('Base', BASE_VOLUME)
        test_run.rename_variable('Acid', ACID_VOLUME)

        FEED_RATE = 'Feed rate'
        feed_rate_unit = test_run._variables[FEED_RATE].unit.lower()
        if 'g/h' in feed_rate_unit:
            test_run.rename_variable(FEED_RATE, FEED_RATE_GRAVIMETRIC)
        elif 'l/h' in feed_rate_unit:
            test_run.rename_variable(FEED_RATE, FEED_RATE_VOLUMETRIC)
        else:
            raise AssertionError(f'Invalid unit "{feed_rate_unit}" for "{FEED_RATE}"')

        return test_run

    def calculate(self, runs: List[Run]):
        def mean_rate_between_times(variable_name):
            return i(variable_name)[:-1] + (np.diff(i(variable_name)) / 2)

        def cumulative_sum(variable_name, time_diff_h):
            return np.cumsum(np.concatenate([np.array([0]), time_diff_h * mean_rate_between_times(variable_name)]))

        def partial_int(variable_name, time_diff_h, first_value=0):
            return np.insert(time_diff_h * mean_rate_between_times(variable_name), 0, first_value)

        for run in runs:
            i, t, p, add_variable = self.get_shortcuts(run)
            self.preprocess_variables(run)
            time_h = (self.TIME_VEC - run.first_timestamp).total_seconds() / 3600
            time_diff_h = np.diff(time_h)

            if FEED_RATE_GRAVIMETRIC not in run._variables:
                add_variable(name=FEED_RATE_GRAVIMETRIC, unit='kg/h',
                             data=i(FEED_RATE_VOLUMETRIC) * p(FEED_DENSITY))

            add_variable(name=FEED_AMOUNT, unit='kg', data=cumulative_sum(FEED_RATE_GRAVIMETRIC, time_diff_h))

            if FEED_VOLUME not in run._variables:
                if p(LEGACY):
                    add_variable(name=FEED_VOLUME, unit='L',
                                 data=np.concatenate([np.array([0]), i(FEED_AMOUNT)[0:-1] / p(FEED_DENSITY)]))
                else:
                    add_variable(name=FEED_VOLUME, unit='L',
                                 data=i(FEED_AMOUNT) / p(FEED_DENSITY))
            else:
                run._variables[FEED_VOLUME].convert_data_to('L')

            if ACID_VOLUME not in run._variables or all(np.isnan(np.array(i(ACID_VOLUME)))):
                # if no acid volume variable was given, assume it is 0
                # if all acids are nan, also set them to 0
                add_variable(name=ACID_VOLUME, unit='L', data=i(BASE_VOLUME) * 0)

            add_variable(name=LIQUID_VOLUME_WITHOUT_CORRECTED_SAMPLES, unit='L',  # without corrected samples
                         data=i(ACID_VOLUME) + i(BASE_VOLUME) + p(START_VOLUME) + i(FEED_VOLUME))

            if FEED_RATE_VOLUMETRIC not in run._variables:
                add_variable(name=FEED_RATE_VOLUMETRIC, unit='L/h', data=i(FEED_RATE_GRAVIMETRIC) / p(FEED_DENSITY))
            else:
                run._variables[FEED_RATE_VOLUMETRIC].convert_data_to('L/h')

            if LIQUID_VOLUME in run._variables:
                dilution_rate_data = i(FEED_RATE_VOLUMETRIC) / i(LIQUID_VOLUME)
            else:
                dilution_rate_data = i(FEED_RATE_VOLUMETRIC) / i(LIQUID_VOLUME_WITHOUT_CORRECTED_SAMPLES)
            add_variable(name=DILUTION_RATE, unit='1/h', data=dilution_rate_data)

            # offgas data
            if CO2_PERCENTAGE_INFLOW not in run._variables:
                add_variable(name=CO2_PERCENTAGE_INFLOW, unit='%',
                             data=i(FEED_RATE_VOLUMETRIC) * 0 + np.nanmean(i(CO2)[0:3]))

            if O2_PERCENTAGE_INFLOW not in run._variables:
                add_variable(name=O2_PERCENTAGE_INFLOW, unit='%',
                             data=i(FEED_RATE_VOLUMETRIC) * 0 + np.nanmean(i(O2)[0:3]))
                
            
            #GSz-tag added this for different var length error
            # some variables start later -> fill missing values with zeros
            for variable_name in [O2_PERCENTAGE_INFLOW]:
                if variable_name in run._variables:
                    variable = run._variables[variable_name]
                    variable.df = variable.df.reindex(self.TIME_VEC, fill_value=0.0)

            add_variable(name=CO2_DECIMAL, data=(i(CO2) - i(CO2_PERCENTAGE_INFLOW)) / 100)
            add_variable(name=O2_DECIMAL, data=(i(O2_PERCENTAGE_INFLOW) - i(O2)) / 100)

            add_variable(name=CO2_Q, unit='L/h', data=i(AIR_FLOW) * i(CO2_DECIMAL))
            add_variable(name=O2_Q, unit='L/h', data=i(AIR_FLOW) * i(O2_DECIMAL))

            add_variable(name=CO2_MOLAR, unit='mol/h', data=i(CO2_Q) / ((p(R) * p(T)) / p(P)))
            add_variable(name=O2_MOLAR, unit='mol/h', data=-i(O2_Q) / ((p(R) * p(T)) / p(P)))  # negative

            CO2_PARTIAL_INTEGRATION, O2_PARTIAL_INTEGRATION = 'CO2 partial integration', 'O2 partial integration'
            add_variable(name=CO2_PARTIAL_INTEGRATION, data=partial_int(CO2_MOLAR, time_diff_h))
            add_variable(name=O2_PARTIAL_INTEGRATION, data=partial_int(O2_MOLAR, time_diff_h))
            add_variable(name=CO2_ACCUMULATED, data=cumulative_sum(CO2_MOLAR, time_diff_h))
            add_variable(name=O2_ACCUMULATED, data=cumulative_sum(O2_MOLAR, time_diff_h))


class DTUCalculationAnalyticalData(DTUCalculationBase):
    """
    Imports the DTU AnalyticaData. Assumptions:
     - All online data calculations were already done
    """
    TIME_HEADER = 'EFT (h)'
    VAR_HEADERS = (
        'Sample volume (mL)', 'OD (measured)', f'Residual {SUBSTRATE_1} concentration (g/L)',
        f'Residual {SUBSTRATE_2} concentration (g/L)', f'Residual {PRODUCT} concentration (g/L)',
        f'Residual {BY_PRODUCT} concentration (g/L)', f'Residual {COMPOUND_1} concentration (g/L)',
        f'Residual {COMPOUND_2} concentration (g/L)', f'Residual {COMPOUND_3} concentration (g/L)',
        f'Residual {COMPOUND_4} concentration (g/L)', f'Residual {COMPOUND_5} concentration (g/L)',
        f'Residual {COMPOUND_6} concentration (g/L)',
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def import_data(self, *args, **kwargs) -> Run:
        end_row = kwargs.pop('end_row')
        run = super().import_data(*args, **kwargs)
        df = self.standardized_df(sheet_name='AnalyticalData', start_row=0, start_col=3, end_row=end_row)
        test_run = self.add_variables_to_run_from_df(run, df)
        return test_run

    def preprocess_variables(self, run: Run):
        self.TIME_VEC = run._variables[self.p.sample_volume_name].df.index
        if AIR_FLOW in run._variables:
            self.ONLINE_TIME_VEC = run._variables[AIR_FLOW].df.index
        else:
            self.ONLINE_TIME_VEC = run._variables[CO2].df.index

        if not self.p.legacy and self.TIME_VEC[0] > run.first_timestamp:
            self.TIME_VEC = self.TIME_VEC.insert(0, self.ONLINE_TIME_VEC[0])

        # rename analytical variables
        vm = namedtuple('VariableMapper', ['original_name', 'internal_name', 'required', 'initial_p_name'])
        rename_list = [
            vm(self.p.substrate_1_name, f'Residual {SUBSTRATE_1} concentration', True, 'initial_substrate_1_amount'),
            vm(self.p.substrate_2_name, f'Residual {SUBSTRATE_2} concentration', False, 'initial_substrate_2_amount'),
            vm(self.p.product_name, f'Residual {PRODUCT} concentration', False, 'initial_product_amount'),
            vm(self.p.byproduct_name, f'Residual {BY_PRODUCT} concentration', False, None),
            vm(self.p.compound_1_name, f'Residual {COMPOUND_1} concentration', False, 'initial_compound_1_amount'),
            vm(self.p.compound_2_name, f'Residual {COMPOUND_2} concentration', False, 'initial_compound_2_amount'),
            vm(self.p.compound_3_name, f'Residual {COMPOUND_3} concentration', False, 'initial_compound_3_amount'),
            vm(self.p.compound_4_name, f'Residual {COMPOUND_4} concentration', False, 'initial_compound_4_amount'),
            vm(self.p.compound_5_name, f'Residual {COMPOUND_5} concentration', False, 'initial_compound_5_amount'),
            vm(self.p.compound_6_name, f'Residual {COMPOUND_6} concentration', False, 'initial_compound_6_amount'),
            vm(self.p.sample_volume_name, 'Sample volume', False, None),
            vm('OD', 'OD', True, BIOMASS_INITIAL_OD)
        ]
        for vm in rename_list:
            if vm.required:
                if not vm.original_name:
                    raise AssertionError(f'"{vm.internal_name}" is required')
                run.rename_variable(vm.original_name, vm.internal_name)
            else:
                if vm.original_name in run._variables:
                    run.rename_variable(vm.original_name, vm.internal_name)

        # if the first timestamp is missing, use the one from the parameters given
        if not self.p.legacy:
            for vm in rename_list:
                if vm.internal_name not in run._variables:
                    continue
                if run._variables[vm.internal_name].time[0] > self.TIME_VEC[0]:
                    if vm.initial_p_name is None:
                        first_value = 0
                    else:
                        if vm.initial_p_name == BIOMASS_INITIAL_OD:
                            # OD is a "concentration already"
                            first_value = self.p.get(vm.initial_p_name)
                        else:
                            first_value = self.p.get(vm.initial_p_name) / self.p.get(START_VOLUME)
                    print(f'Setting initial value for {vm.internal_name} to {first_value}')
                    run._variables[vm.internal_name].append_sample(data=first_value, time=self.TIME_VEC[0])

                # ensure, all analytical values have the same shape
                print(f'Check size for {vm.internal_name}')
                self.fill_incomplete_var_with_nan(run, run._variables[vm.internal_name])

        # ensure the variables have the same unit
        run._variables[self.p.sample_volume_name].convert_data_to('mL')

    def calculate(self, runs: List[Run]):
        for run in runs:
            i, t, p, add_variable = self.get_shortcuts(run)
            self.TIME_VEC = t(self.p.sample_volume_name)
            self.preprocess_variables(run)

            if p(LEGACY):
                add_variable(name=ACCUMULATED_SAMPLE_VOLUME, unit='mL',
                             data=np.concatenate([np.array([0]), np.cumsum(i(self.p.sample_volume_name)[1:])]))
            else:
                add_variable(name=ACCUMULATED_SAMPLE_VOLUME, unit='mL',
                             data=np.concatenate([np.array([0]), np.cumsum(i(self.p.sample_volume_name)[0:-1])]))

            liquid_volume_without_corrected_samples = self.last_value_before_timestamps(
                run._variables[LIQUID_VOLUME_WITHOUT_CORRECTED_SAMPLES], self.TIME_VEC
            )

            method = 'pad' if p(LEGACY) else 'nearest'
            feed_volume_downscaled = self.last_value_before_timestamps(
                run._variables[FEED_VOLUME], self.TIME_VEC, method=method
            )
            if LIQUID_VOLUME not in run._variables:
                add_variable(name=LIQUID_VOLUME_DOWNSCALED, unit='L',
                             data=liquid_volume_without_corrected_samples - (i(ACCUMULATED_SAMPLE_VOLUME) / 1000))
            else:
                # ensure the liquid volume has the same value frequency as the samples
                # also
                add_variable(name=LIQUID_VOLUME_DOWNSCALED, unit='L',
                             data=self.last_value_before_timestamps(run._variables[LIQUID_VOLUME], self.TIME_VEC))

            SAMPLE_MASS = 'Sample mass'
            add_variable(name=SAMPLE_MASS, unit='g', data=i(self.p.sample_volume_name) * p(SAMPLE_DENSITY), )

            BIOMASS_CONCENTRATION = 'Biomass concentration'
            add_variable(name=BIOMASS_CONCENTRATION, unit='g/L', data=i(OD) * p(BIOMASS_CONVERSION_FACTOR))

            if run.meta is None:
                run.meta = {}
            run.meta['formation_consumption'] = []

            # Residual values, last values and formed values
            for residual_var in self.p.get_available_residual_vars(run=run) + [BIOMASS_CONCENTRATION]:

                is_biomass = 'biomass' in residual_var.lower()
                is_substrate = 'substrate' in residual_var.lower()
                is_byproduct = 'byproduct' in residual_var.lower()
                is_product = 'product' in residual_var.lower() and 'byproduct' not in residual_var.lower()

                variable_name_amount = run._variables[residual_var].name.replace('concentration', 'amount')

                residual_data = i(residual_var) * i(LIQUID_VOLUME_DOWNSCALED)
                add_variable(name=variable_name_amount, unit='g', data=residual_data)

                variable_name_lost = run._variables[residual_var].name.replace('Residual ', '')
                variable_name_lost = variable_name_lost.replace(' concentration', '')
                variable_name_lost += ' lost in sample'
                lost_data = np.cumsum(i(residual_var) * i(self.p.sample_volume_name) / 1000)
                lost_data = np.concatenate([np.array([0]), lost_data[:-1]])
                add_variable(name=variable_name_lost, unit='g', data=lost_data)

                vn_base = run._variables[residual_var].name.replace('Residual ', '').replace(
                    ' concentration', '')
                p_base = vn_base.lower().replace(' ', '_')
                if is_substrate:
                    variable_name_formed = f'{vn_base} consumed'
                    is_consumed = True
                else:
                    variable_name_formed = f'{vn_base} formed'
                    is_consumed = False

                # get names of parameters
                p_initial = f'initial_{p_base}_amount'
                p_molar_mass = f'molar_mass_of_{p_base}'
                p_no_c_in = f'no_c_in_{p_base}'
                p_dor_of = f'dor_of_{p_base}'
                p_feed_concentration = f'feed_{p_base}_concentration'

                if is_biomass:
                    formed_data = residual_data - (p(BIOMASS_INITIAL_OD) * p(BIOMASS_CONVERSION_FACTOR)
                                                   * i(LIQUID_VOLUME_DOWNSCALED)) + lost_data
                elif is_substrate:
                    formed_data = p(p_initial) + (
                            p(p_feed_concentration) * feed_volume_downscaled) - residual_data - lost_data
                elif is_byproduct:
                    formed_data = residual_data + lost_data
                else:
                    formed_data = -p(p_initial) + residual_data + lost_data

                if p(LEGACY):
                    formed_data[0] = 0

                variable = add_variable(name=variable_name_formed, unit='g', data=formed_data)
                if is_biomass:
                    add_variable(name=BIOMASS_AMOUNT_MOLAR, unit='mol', data=residual_data / p(MOLAR_MASS_OF_BIOMASS))

                run.meta['formation_consumption'].append(
                    {
                        'variable': variable,
                        'p_names': {
                            'molar_mass': p_molar_mass,
                            'no_c_in': p_no_c_in,
                            'dor_of': p_dor_of
                        },
                        'vn_base': vn_base,
                        'is_product': is_product,
                        'is_byproduct': is_byproduct,
                        'is_substrate': is_substrate,
                        'is_biomass': is_biomass,
                        'is_consumed': is_consumed
                    })


class DTUCalculationsMolsData(DTUCalculationBase):
    def calculate(self, runs: List[Run]):
        for run in runs:
            i, t, p, add_variable = self.get_shortcuts(run)
            self.TIME_VEC = run.meta['formation_consumption'][0]['variable'].time
            carbon_balance_pos, carbon_balance_neg, dor_balance_pos, dor_balance_neg = 0, 0, 0, 0

            for variable_meta in run.meta['formation_consumption']:
                variable = variable_meta['variable']
                p_names = variable_meta['p_names']
                molar_mass = p(p_names['molar_mass'])

                try:
                    no_c_in = p(p_names['no_c_in'])
                except IndexError:
                    continue
                try:
                    dor_of = p(p_names['dor_of'])
                except IndexError:
                    continue

                if not np.isreal(molar_mass) or molar_mass <= 0:
                    continue

                # molar
                molar_data = i(variable.name) / molar_mass
                if all(np.isnan(molar_data)):
                    continue

                c_molar_data = molar_data * no_c_in
                add_variable(name=f'{variable.name} molar', unit='mol', data=molar_data)

                if 'substrate' in variable.name.lower():
                    # print('pos', variable.name.lower(), c_molar_data)
                    carbon_balance_neg += c_molar_data
                    dor_balance_neg += np.nan_to_num(dor_of * c_molar_data)
                else:
                    # print('neg', variable.name.lower(), c_molar_data)
                    carbon_balance_pos += c_molar_data
                    dor_balance_pos += np.nan_to_num(dor_of * c_molar_data)

                # c-molar
                add_variable(name=f'{variable.name} c-molar', unit='cmol', data=c_molar_data)

            # Carbon balance
            # (sum of all carbon elements / sum of substrate)*100
            o2_acc_data = self.last_value_before_timestamps(run._variables[O2_ACCUMULATED], self.TIME_VEC,
                                                            method='nearest')
            add_variable(name=O2_ACCUMULATED_DOWNSCALED, unit='mol', data=o2_acc_data)
            dor_balance_neg += np.nan_to_num(o2_acc_data) * p('dor_of_oxygen') * -1

            co2_acc_data = self.last_value_before_timestamps(run._variables[CO2_ACCUMULATED], self.TIME_VEC,
                                                             method='nearest')
            add_variable(name=CO2_ACCUMULATED_DOWNSCALED, unit='mol', data=co2_acc_data)
            carbon_balance_pos += np.nan_to_num(co2_acc_data)

            add_variable(name='C balance', unit='%', data=(carbon_balance_pos / carbon_balance_neg) * 100)

            # DoR balance
            add_variable(name='DoR balance', unit='%', data=(dor_balance_pos / dor_balance_neg) * 100)


class DTUCalculationsRatesData(DTUCalculationBase):
    def calculate(self, runs: List[Run]):
        def diff_from_accumulated(variable, diff_window_size=1) -> np.array:
            if isinstance(variable, np.ndarray):
                raw_data = variable
            elif isinstance(variable, Variable):
                raw_data = variable.data.m
            else:
                raise NotImplementedError('This variable type is not supported')
            # raw_diff = np.diff(raw_data) / time_diff_h
            raw_diff = raw_data[diff_window_size:] - raw_data[:-diff_window_size]
            return np.concatenate([np.array([0]), raw_diff])

        for run in runs:
            i, t, p, add_variable = self.get_shortcuts(run)

            self.TIME_VEC_RAW = run.meta['formation_consumption'][0]['variable'].time
            self.TIME_VEC = self.TIME_VEC_RAW[:-1]
            time_diff_h_2 = np.concatenate([np.array([0]), (self.TIME_VEC_RAW[2:] -
                                                            self.TIME_VEC_RAW[:-2]).total_seconds() / 3600])

            # Specific rates for other components
            for variable_meta in run.meta['formation_consumption']:
                variable = variable_meta['variable']
                variable_name_base = variable_meta['vn_base']
                p_names = variable_meta['p_names']
                molar_mass = p(p_names['molar_mass'])

                # rates in g/h
                if variable_meta['is_consumed']:
                    variable_name = f'{variable_name_base} uptake rate'
                    factor = -1
                else:
                    variable_name = f'{variable_name_base} formation rate'
                    factor = 1

                rate_gram = replace_first(np.nan_to_num(factor * (diff_from_accumulated(variable, diff_window_size=2) /
                                                                  time_diff_h_2)))
                rate_molar = rate_gram / molar_mass
                variable_meta['rate'] = add_variable(name=variable_name, unit='g/h', data=rate_gram)

                # rates in mol/h (c-mol/h for biomass)
                if variable_meta['is_biomass']:
                    molar_rate_unit = 'c-mol/h'
                else:
                    molar_rate_unit = 'mol/h'

                try:
                    variable_meta['molar_rate'] = add_variable(name=f'{variable_name} molar', unit=molar_rate_unit,
                                                               data=replace_first(factor * (diff_from_accumulated(
                                                                   i(f'{variable.name} molar'),
                                                                   diff_window_size=2) / time_diff_h_2)))
                except KeyError:
                    pass

                # specific productivities
                if not variable_meta['is_biomass']:
                    if variable_meta['is_consumed']:
                        variable_name = f'{variable_name_base} specific uptake rate'
                    else:
                        variable_name = f'{variable_name_base} specific formation rate'
                    add_variable(name=variable_name, unit='g/g(DW)*h',
                                 data=replace_first(rate_gram / i(BIOMASS_AMOUNT)[:-1]))

                    variable_name += ' molar'
                    variable_meta['specific_productivity'] = add_variable(name=variable_name, unit='mol/cmol(X)*h',
                                                                          data=replace_first(rate_molar /
                                                                                          i(BIOMASS_AMOUNT_MOLAR)[:-1]))

            # Rates for O2 and CO2
            add_variable(name=O2_UPTAKE_RATE, unit='mol/h',
                         data=replace_first(self.last_value_before_timestamps(
                             run._variables[O2_MOLAR], self.TIME_VEC
                         )))

            add_variable(name=CO2_EVOLUTION_RATE, unit='mol/h',
                         data=replace_first(self.last_value_before_timestamps(
                             run._variables[CO2_MOLAR], self.TIME_VEC
                         )))

            # Specific rates for O2 and CO2
            add_variable(name=O2_SPECIFIC_UPTAKE_RATE, unit='mol/c-mol(X)*h',
                         data=replace_first(self.last_value_before_timestamps(
                             run._variables[O2_MOLAR], self.TIME_VEC
                         ) / i(BIOMASS_AMOUNT_MOLAR)[:-1]))

            add_variable(name=CO2_SPECIFIC_EVOLUTION_RATE, unit='mol/c-mol(X)*h',
                         data=replace_first(self.last_value_before_timestamps(
                             run._variables[CO2_MOLAR], self.TIME_VEC
                         ) / i(BIOMASS_AMOUNT_MOLAR)[:-1]))

            add_variable(name='Biomass specific growth rate µ', unit='1/h',
                         data=replace_first(i('Biomass formation rate molar') / i(BIOMASS_AMOUNT_MOLAR)[:-1]))


class DTUCalculationsYieldsData(DTUCalculationBase):
    def calculate(self, runs: List[Run]):
        def get_substrate_data(c_molar=False):
            substrates_data = []
            for sn in self.p.get_substrate_names(omit_residual_prefix=True):
                try:
                    to_add = i(f'{sn} uptake rate molar')
                except KeyError:
                    continue
                if c_molar:
                    to_add = p('no_c_in_{}'.format(sn.replace(' ', '_').lower())) * to_add
                substrates_data.append(to_add)

            return substrates_data

        for run in runs:
            i, t, p, add_variable = self.get_shortcuts(run)

            self.TIME_VEC_RAW = run.meta['formation_consumption'][0]['variable'].time
            self.TIME_VEC = self.TIME_VEC_RAW[:-1]

            substrate_sum = -1 * np.array(
                [i(f'{sn} uptake rate') for sn in self.p.get_substrate_names(omit_residual_prefix=True, run=run) if
                 f'{sn} uptake rate' in run._variables]).sum(
                axis=0)
            substrate_sum_molar = -1 * np.array(get_substrate_data()).sum(axis=0)
            substrate_sum_c_molar = -1 * np.array(get_substrate_data(c_molar=True)).sum(axis=0)
            carbon_balance, dor_balance, elemental_balance = 1, 0, -1,

            for variable_meta in run.meta['formation_consumption']:
                variable_name_base = variable_meta['vn_base']
                p_names = variable_meta['p_names']
                factor = -1 if variable_meta['is_consumed'] else 1
                dor_of = p(p_names['dor_of'])

                # yield with reference to biomass
                if not variable_meta['is_biomass']:
                    # -- g/g(DW)
                    variable_name_yield = f'{variable_name_base} yield (X)'
                    variable_name_yield_molar = f'{variable_name_base} yield molar (X)'
                    variable_name_yield_c_molar = f'{variable_name_base} yield c-molar (X)'

                    add_variable(name=variable_name_yield, unit='g/g(DW)',
                                 data=replace_first(factor * variable_meta['rate'].data.m / i(BIOMASS_FORMATION_RATE)))

                    if 'molar_rate' in variable_meta:
                        # -- mol/c-mol(X)
                        molar_yield = replace_first(factor * variable_meta['molar_rate'].data.m / i(
                            BIOMASS_FORMATION_RATE_MOLAR))
                        add_variable(name=variable_name_yield_molar, unit='mol/c-mol(X)',
                                     data=molar_yield)

                        no_c_in = p(p_names['no_c_in'])
                        add_variable(name=variable_name_yield_c_molar, unit='c-mol/c-mol(X)',
                                     data=molar_yield * no_c_in)

                        factor = -1 if variable_meta['is_substrate'] else 1
                        dor_balance += i(variable_name_yield_c_molar) * dor_of * factor
                        carbon_balance += i(variable_name_yield_c_molar) * factor

                # yield with reference to substrate
                if not variable_meta['is_substrate']:
                    # -- g/g substrate
                    variable_name_yield = f'{variable_name_base} yield (S)'
                    variable_name_yield_molar = f'{variable_name_base} yield molar (S)'
                    variable_name_yield_c_molar = f'{variable_name_base} yield c-molar (S)'

                    add_variable(name=variable_name_yield, unit='g/g',
                                 data=replace_first(variable_meta['rate'].data.m / substrate_sum))

                    if 'molar_rate' in variable_meta:
                        molar_yield = variable_meta['molar_rate'].data.m / substrate_sum_molar
                        add_variable(name=variable_name_yield_molar,
                                     unit='c-mol/mol(S)' if variable_meta['is_biomass'] else 'mol/mol(S)',
                                     data=replace_first(molar_yield))

                        no_c_in = p(p_names['no_c_in'])
                        add_variable(name=variable_name_yield_c_molar, unit='c-mol/c-mol(S)',
                                     data=replace_first(variable_meta['molar_rate'].data.m * no_c_in /
                                                        substrate_sum_c_molar))

                        elemental_balance += i(variable_name_yield_c_molar)

            # O2 and CO2 yield
            add_variable(name=O2_YIELD_X, unit='mol/c-mol(X)',
                         data=replace_first(i(O2_UPTAKE_RATE) / i(BIOMASS_FORMATION_RATE_MOLAR)))
            add_variable(name=CO2_YIELD_X, unit='mol/c-mol(X)',
                         data=replace_first(i(CO2_EVOLUTION_RATE) / i(BIOMASS_FORMATION_RATE_MOLAR)))

            add_variable(name='O2 yield (S)', unit='mol/mol(S)',
                         data=replace_first(i(O2_UPTAKE_RATE) / substrate_sum_molar))
            add_variable(name='CO2 yield (S)', unit='mol/mol(S)',
                         data=replace_first(i(CO2_EVOLUTION_RATE) / substrate_sum_molar))

            add_variable(name=O2_YIELD_C_MOLAR_S, unit='mol/c-mol(S)',
                         data=replace_first(i(O2_UPTAKE_RATE) / substrate_sum_c_molar))
            add_variable(name=CO2_YIELD_C_MOLAR_S, unit='mol/c-mol(S)',
                         data=replace_first(i(CO2_EVOLUTION_RATE) / substrate_sum_c_molar))

            o2_data = np.nan_to_num(self.last_value_before_timestamps(run._variables[O2_YIELD_X], self.TIME_VEC,
                                                                      method='nearest'))
            dor_balance += o2_data * p('dor_of_oxygen')

            co2_data = np.nan_to_num(
                self.last_value_before_timestamps(run._variables[CO2_YIELD_X], self.TIME_VEC,
                                                  method='nearest')
            )
            dor_balance += co2_data
            carbon_balance += co2_data
            elemental_balance += i(CO2_YIELD_C_MOLAR_S)

            add_variable(name='DoR (from yields)', data=dor_balance)
            add_variable(name='C balance (using c-mols)', data=carbon_balance)
            add_variable(name='Elemental C balance with reference to substrate', data=elemental_balance)


class DTUGlobalPostProcessor:

    def __init__(self, run = None, p: DTUParameters = None):
        self.p = p

    def calculate(self, runs: List[Run], drop_raw_variables=True):
        for run in runs:
            run.meta = None
            var_names_to_save = []

            if drop_raw_variables:
                for variable in run:
                    if not variable.raw_data:
                        var_names_to_save.append(variable.name)
                    else:
                        run.del_variable(variable.name)
            var_list = '\n'.join(var_names_to_save)
            print(f'Run\n-\n{run.name}\n\n\nSaving variables\n-\n{var_list}')

            if self.p[USE_SPECIFIC_NAMES_FOR_RESULTS]:
                RepMap = namedtuple('ReplaceMapper', ['search_str', 'replace_str'])

                def clean(s: str):
                    # clean the variable name strings, in case they contain generic information like "Residual"
                    # or "concentration", like it is the case for some test datasets.
                    if s is None:
                        return None

                    if len(s) < 2:
                        return s

                    word = s.replace('Residual', '').replace('concentration', '').strip()
                    return word[0].upper() + word[1:]

                replace_list = [
                    RepMap(SUBSTRATE_1, clean(self.p['substrate_1_name'])),
                    RepMap(SUBSTRATE_2, clean(self.p['substrate_2_name'])),
                    RepMap(PRODUCT, clean(self.p['product_name'])),
                    RepMap(COMPOUND_1, clean(self.p['compound_1_name'])),
                    RepMap(COMPOUND_2, clean(self.p['compound_2_name'])),
                    RepMap(COMPOUND_3, clean(self.p['compound_3_name'])),
                    RepMap(COMPOUND_4, clean(self.p['compound_4_name'])),
                    RepMap(COMPOUND_5, clean(self.p['compound_5_name'])),
                    RepMap(COMPOUND_6, clean(self.p['compound_6_name'])),
                ]

                # Apply original names to calculated variables, i.e. replace the internal nomenclature if this option is
                # active
                for variable_name in run.variable_names:
                    for rep_map in replace_list:
                        if BY_PRODUCT in variable_name:
                            run.rename_variable(variable_name,
                                                variable_name.replace(BY_PRODUCT, clean(self.p['byproduct_name'])))
                            break
                        elif rep_map.search_str in variable_name:
                            run.rename_variable(variable_name,
                                                variable_name.replace(rep_map.search_str, rep_map.replace_str))
                            break


class DTUParameters(Parameter):
    legacy = False

    def __init__(self):
        super().__init__()

        # online data and names of variables
        self.add_item('', 'run_names', type=Parameter.BATCH_LIST_N, step=1, description='Runs for Calculation')
        self.add_item('', 'substrate_1_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'substrate_2_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'product_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'byproduct_name', type=Parameter.VAR_LIST, step=1)

        self.add_item('', 'compound_1_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'compound_2_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'compound_3_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'compound_4_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'compound_5_name', type=Parameter.VAR_LIST, step=1)
        self.add_item('', 'compound_6_name', type=Parameter.VAR_LIST, step=1)

        # online parameters
        self.add_item('0.3', START_VOLUME, unit='L', step=2, type=Parameter.FLOAT)
        self.add_item('1.15', 'feed_density', unit='g/mL', step=2, type=Parameter.FLOAT)
        self.add_item('415', 'feed_substrate_1_concentration', unit='g/L', step=2, type=Parameter.FLOAT)
        self.add_item('0', 'feed_substrate_2_concentration', unit='g/L', step=2, type=Parameter.FLOAT)

        # analytical parameters  (first page)
        self.add_item('', 'sample_volume_name', type=Parameter.VAR_LIST, step=3)
        self.add_item('1', 'sample_density', unit='g/mL', step=3, type=Parameter.FLOAT)
        self.add_item('0.2368421053', 'biomass_initial_od', unit='OD', step=3, description='Biomass initial DO',
                      type=Parameter.FLOAT)
        self.add_item('0.38', 'biomass_conversion_factor', unit='g/(L*OD)', step=3, type=Parameter.FLOAT)

        self.add_item('4.549227', 'initial_substrate_1_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_substrate_2_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_product_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_compound_1_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_compound_2_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_compound_3_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_compound_4_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_compound_5_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('0', 'initial_compound_6_amount', unit='g', step=3, type=Parameter.FLOAT)
        self.add_item('true', USE_SPECIFIC_NAMES_FOR_RESULTS, step=3, type=Parameter.BOOL)

        # molar and DoR data (second page)
        self.add_item('24.6', 'molar_mass_of_biomass', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('180.156', 'molar_mass_of_substrate_1', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('0', 'molar_mass_of_substrate_2', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('232.28', 'molar_mass_of_product', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('60.052', 'molar_mass_of_byproduct', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('204.23', 'molar_mass_of_compound_1', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('0', 'molar_mass_of_compound_2', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('0', 'molar_mass_of_compound_3', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('0', 'molar_mass_of_compound_4', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('0', 'molar_mass_of_compound_5', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('0', 'molar_mass_of_compound_6', unit='g/mol', type=Parameter.FLOAT, step=4)
        self.add_item('1', 'no_c_in_biomass', type=Parameter.INT, step=4, description='No. of C in biomass')
        self.add_item('6', 'no_c_in_substrate_1', type=Parameter.INT, step=4, description='No. of C in substrate 1')
        self.add_item('0', 'no_c_in_substrate_2', type=Parameter.INT, step=4, description='No. of C in substrate 2')
        self.add_item('13', 'no_c_in_product', type=Parameter.INT, step=4, description='No. of C in product')
        self.add_item('2', 'no_c_in_byproduct', type=Parameter.INT, step=4, description='No. of C in byproduct')
        self.add_item('11', 'no_c_in_compound_1', type=Parameter.INT, step=4, description='No. of C in compound 1')
        self.add_item('0', 'no_c_in_compound_2', type=Parameter.INT, step=4, description='No. of C in compound 2')
        self.add_item('0', 'no_c_in_compound_3', type=Parameter.INT, step=4, description='No. of C in compound 3')
        self.add_item('0', 'no_c_in_compound_4', type=Parameter.INT, step=4, description='No. of C in compound 4')
        self.add_item('0', 'no_c_in_compound_5', type=Parameter.INT, step=4, description='No. of C in compound 5')
        self.add_item('0', 'no_c_in_compound_6', type=Parameter.INT, step=4, description='No. of C in compound 6')
        self.add_item('4.2', 'dor_of_biomass', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of biomass')
        self.add_item('4', 'dor_of_substrate_1', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of substrate 1')
        self.add_item('0', 'dor_of_substrate_2', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of substrate 2')
        self.add_item('4.461538462', 'dor_of_product', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of product')
        self.add_item('4', 'dor_of_byproduct', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of byproduct')
        self.add_item('4.181818182', 'dor_of_compound_1', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of compound 1')
        self.add_item('0', 'dor_of_compound_2', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of compound 2')
        self.add_item('0', 'dor_of_compound_3', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of compound 3')
        self.add_item('0', 'dor_of_compound_4', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of compound 4')
        self.add_item('0', 'dor_of_compound_5', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of compound 5')
        self.add_item('0', 'dor_of_compound_6', unit='1/cmol', type=Parameter.FLOAT, step=4,
                      description='DoR of compound 6')
        self.add_item('-4', 'dor_of_oxygen', unit='1/mol', type=Parameter.FLOAT, step=4, description='DoR of Oxygen')

        # constants
        self.add_item('0.08314', 'R', unit='L*bar/K', step=5, type=Parameter.FLOAT)
        self.add_item('303.15', 'T', unit='K', step=5, type=Parameter.FLOAT)
        self.add_item('0.9885', 'P', unit='bar', step=5, type=Parameter.FLOAT)

    @staticmethod
    def _resid(name):
        return f'Residual {name} concentration'

    def get_substrate_names(self, omit_residual_prefix=False, run: Run = None):
        if run is not None:
            substrate_names = [sn for sn in ALL_SUBSTRATES if self._resid(sn) in run._variables]
        if omit_residual_prefix:
            return ALL_SUBSTRATES
        else:
            return [self._resid(pn) for pn in ALL_SUBSTRATES]

    def get_product_name(self):
        return self._resid('Product')

    def get_byproduct_name(self):
        return self._resid('ByProduct')

    def get_compounds_name(self):
        return [self._resid(cp) for cp in ALL_COMPOUNDS if cp is not None]

    def get_all_residual_vars(self):
        return self.get_substrate_names() \
               + [self.get_product_name()] \
               + [self.get_byproduct_name()] \
               + self.get_compounds_name()

    def get_available_residual_vars(self, run: Run):
        all_vars = self.get_all_residual_vars()
        return [av for av in all_vars if av in run._variables]


In [4]:
# extend parameters with calculation wizard settings
p = DTUParameters()
for (key, value) in _parameters.items().items():
    p.add_item(value, key, replace=True)
    setattr(p, key, value)

runs = Run.list(id__in=[r.id for r in _dataset.runs.values() if r.name in _parameters.run_names], deep=True)
for run in runs:
    if run.first_timestamp is not None:
        run.first_timestamp = run.first_timestamp.astimezone(pytz.utc)

In [5]:
for run in runs:
    # other calculation classes
    calc_classes = [DTUCalculationOnlineData, DTUCalculationAnalyticalData, DTUCalculationsMolsData,
                    DTUCalculationsRatesData, DTUCalculationsYieldsData,
                    DTUGlobalPostProcessor]

    for calc_class in calc_classes:
        try:
            calc_instance = calc_class(run=None, p=p)
            calc_instance.calculate([run])
        except Exception as e:
            import sys
            import traceback

            exc_type, exc_value, exc_traceback = sys.exc_info()
            formatted_exceptions = traceback.format_exception(exc_type, exc_value, exc_traceback)
            for fe in formatted_exceptions:
                warnings.warn(run.name + ': ' + fe, CalculationWarning)
            break


OTO_PD_001_30L_40: Traceback (most recent call last):



OTO_PD_001_30L_40:   File "/tmp/ipykernel_6947/2637807605.py", line 10, in <cell line: 1>
    calc_instance.calculate([run])



OTO_PD_001_30L_40:   File "/tmp/ipykernel_6947/2797974089.py", line 274, in calculate
    data=i(FEED_RATE_VOLUMETRIC) * p(FEED_DENSITY))



OTO_PD_001_30L_40:   File "/tmp/ipykernel_6947/2797974089.py", line 163, in i
    return run._variables[variable_name].data.m



OTO_PD_001_30L_40: KeyError: 'Feed 1 rate volumetric'




In [6]:
for run in runs:
    calculated_variables = [v for v in run.variables if v.raw_data == False]
    try:
        Variable.bulk_save(calculated_variables)
    except Exception as exc: 
        warnings.warn(f'Could not save variables for run {run.name}: {str(exc)}', CalculationWarning)