In [31]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import re
import petab
import bionetgen as bng
from petab import Problem
from petab.visualize import plot_problem

## Encoding hopf bifurcation model in petab

#### SBML file: Done
hopfBifurcation_k38_model_sbml.xml

### Conditions Table: NOT DONE
experimental_conditions.tsv

In [32]:
# define initial conditions
exp_conds = {

    # defines conditions 
    'conditionId': ['init_conds'],
    # human readable description
    'conditionName': ['initial conditions'],
    # intial X1 --- i just picked this value 
    'X1': [1]
}

In [33]:
## change from dictonary to dataframe 
experimental_conditions = pd.DataFrame(exp_conds)

## change from dataframe to .tsv file 
experimental_conditions.to_csv('experimental_conditions.tsv', sep='\t', index=False)

In [34]:
experimental_conditions

Unnamed: 0,conditionId,conditionName,X1
0,init_conds,initial conditions,1


### Observable Table: Done

observables.tsv

In [35]:
obs = {

    # name of observables to link to the measurements
    'observableId': ['X1', 'X2', 'X3'],
    # human readable description of the observables
    'observableName': ['X1', 'X2', 'X3'],
    # mathematical formula for how the model output is calculated 
    'observableFormula': ['(kA - k4)*X1 - k2*X1*X2', '-k3*X2 + k5*X3', 'k4*X1 - k5*X3'],
    'noiseFormula': ['', '', ''],
    'noiseDistribution': ['normal', 'normal', 'normal'],
}

In [36]:
## change from dictonary to dataframe 
observables = pd.DataFrame(obs)

## change from dataframe to .tsv file 
observables.to_csv('observables.tsv', sep='\t', index=False)

In [37]:
observables

Unnamed: 0,observableId,observableName,observableFormula,noiseFormula,noiseDistribution
0,X1,X1,(kA - k4)*X1 - k2*X1*X2,,normal
1,X2,X2,-k3*X2 + k5*X3,,normal
2,X3,X3,k4*X1 - k5*X3,,normal


### Measurement Table: NOT DONE

measurement_data.tsv

In [39]:
# import .csv file with the data

X_data = pd.read_csv('X_sim_data.csv')

In [40]:
X_data

Unnamed: 0,time,X1,X2,X3
0,0.0,1.0,0.0,0.0
1,1.666667,16.780671,4.242097,10.640809
2,3.333333,0.027356,6.137602,3.511148
3,5.0,0.003493,2.268323,0.667428
4,6.666667,0.042263,0.64346,0.140281
5,8.333333,2.321366,0.353979,0.692087
6,10.0,8.718655,5.30244,9.753623
7,11.666667,0.033815,4.974264,2.534796
8,13.333333,0.017957,1.745544,0.491058
9,15.0,0.357544,0.520644,0.205848


In [44]:
x1_data = X_data[['time','X1']]
x2_data = X_data[['time','X2']]
x3_data = X_data[['time','X3']]

In [45]:
x1_data = x1_data.rename(columns={'X1': 'measurement'})
x2_data = x2_data.rename(columns={'X2': 'measurement'})
x3_data = x3_data.rename(columns={'X3': 'measurement'})

In [50]:
measurement_info = {

    # references the observable ID from observable file
    'observableId': ['X1', 'X2', 'X3'],
    # references condition ID from the experimental condition file 
    'simulationConditionId': ['init_conds', 'init_conds', 'init_conds'],

}

In [71]:
measurement_info_X1 = {

    # references the observable ID from observable file
    'observableId': ['X1'],
    # references condition ID from the experimental condition file 
    'simulationConditionId': ['init_conds'],

}

# turn that into a df where they each get repeated 300 times 

X1_fordf = pd.DataFrame({key: value * 10 for key, value in measurement_info_X1.items()})

In [72]:
X1_fordf

Unnamed: 0,observableId,simulationConditionId
0,X1,init_conds
1,X1,init_conds
2,X1,init_conds
3,X1,init_conds
4,X1,init_conds
5,X1,init_conds
6,X1,init_conds
7,X1,init_conds
8,X1,init_conds
9,X1,init_conds


In [73]:
# merge nfkb and nfkb_data

measurements_X1 = [X1_fordf, x1_data]
measurements_X1

measurement_data_x1 = pd.concat(measurements_X1, axis=1)

In [74]:
measurements_X1_df = pd.concat(measurements_X1, axis=1)
measurements_X1_df

Unnamed: 0,observableId,simulationConditionId,time,measurement
0,X1,init_conds,0.0,1.0
1,X1,init_conds,1.666667,16.780671
2,X1,init_conds,3.333333,0.027356
3,X1,init_conds,5.0,0.003493
4,X1,init_conds,6.666667,0.042263
5,X1,init_conds,8.333333,2.321366
6,X1,init_conds,10.0,8.718655
7,X1,init_conds,11.666667,0.033815
8,X1,init_conds,13.333333,0.017957
9,X1,init_conds,15.0,0.357544


In [75]:
measurements_X1

[  observableId simulationConditionId
 0           X1            init_conds
 1           X1            init_conds
 2           X1            init_conds
 3           X1            init_conds
 4           X1            init_conds
 5           X1            init_conds
 6           X1            init_conds
 7           X1            init_conds
 8           X1            init_conds
 9           X1            init_conds,
         time  measurement
 0   0.000000     1.000000
 1   1.666667    16.780671
 2   3.333333     0.027356
 3   5.000000     0.003493
 4   6.666667     0.042263
 5   8.333333     2.321366
 6  10.000000     8.718655
 7  11.666667     0.033815
 8  13.333333     0.017957
 9  15.000000     0.357544]

In [76]:
measurement_info_X2 = {

    # references the observable ID from observable file
    'observableId': ['X2'],
    # references condition ID from the experimental condition file 
    'simulationConditionId': ['init_conds'],

}

# turn that into a df where they each get repeated 300 times 

X2_fordf = pd.DataFrame({key: value * 10 for key, value in measurement_info_X2.items()})

In [77]:
X2_fordf

Unnamed: 0,observableId,simulationConditionId
0,X2,init_conds
1,X2,init_conds
2,X2,init_conds
3,X2,init_conds
4,X2,init_conds
5,X2,init_conds
6,X2,init_conds
7,X2,init_conds
8,X2,init_conds
9,X2,init_conds


In [78]:
# merge nfkb and nfkb_data

measurements_X2 = [X2_fordf, x2_data]
measurements_X2

measurement_data_x2 = pd.concat(measurements_X2, axis=1)

In [79]:
measurement_data_x2

Unnamed: 0,observableId,simulationConditionId,time,measurement
0,X2,init_conds,0.0,0.0
1,X2,init_conds,1.666667,4.242097
2,X2,init_conds,3.333333,6.137602
3,X2,init_conds,5.0,2.268323
4,X2,init_conds,6.666667,0.64346
5,X2,init_conds,8.333333,0.353979
6,X2,init_conds,10.0,5.30244
7,X2,init_conds,11.666667,4.974264
8,X2,init_conds,13.333333,1.745544
9,X2,init_conds,15.0,0.520644


In [80]:
measurements_X2_df = pd.concat(measurements_X2, axis=1)
measurements_X2_df

Unnamed: 0,observableId,simulationConditionId,time,measurement
0,X2,init_conds,0.0,0.0
1,X2,init_conds,1.666667,4.242097
2,X2,init_conds,3.333333,6.137602
3,X2,init_conds,5.0,2.268323
4,X2,init_conds,6.666667,0.64346
5,X2,init_conds,8.333333,0.353979
6,X2,init_conds,10.0,5.30244
7,X2,init_conds,11.666667,4.974264
8,X2,init_conds,13.333333,1.745544
9,X2,init_conds,15.0,0.520644


In [81]:
measurement_info_X3 = {

    # references the observable ID from observable file
    'observableId': ['X3'],
    # references condition ID from the experimental condition file 
    'simulationConditionId': ['init_conds'],

}

# turn that into a df where they each get repeated 300 times 

X3_fordf = pd.DataFrame({key: value * 10 for key, value in measurement_info_X3.items()})

In [82]:
# merge nfkb and nfkb_data

measurements_X3 = [X3_fordf, x3_data]
measurements_X3

measurement_data_x3 = pd.concat(measurements_X3, axis=1)

In [83]:
measurements_X3_df = pd.concat(measurements_X3, axis=1)
measurements_X3_df

Unnamed: 0,observableId,simulationConditionId,time,measurement
0,X3,init_conds,0.0,0.0
1,X3,init_conds,1.666667,10.640809
2,X3,init_conds,3.333333,3.511148
3,X3,init_conds,5.0,0.667428
4,X3,init_conds,6.666667,0.140281
5,X3,init_conds,8.333333,0.692087
6,X3,init_conds,10.0,9.753623
7,X3,init_conds,11.666667,2.534796
8,X3,init_conds,13.333333,0.491058
9,X3,init_conds,15.0,0.205848


In [84]:
allX_data = pd.concat([measurements_X1_df, measurements_X2_df, measurements_X3_df], axis=0)

In [86]:
allX_data.reset_index(drop=True, inplace=True)

In [87]:
allX_data

Unnamed: 0,observableId,simulationConditionId,time,measurement
0,X1,init_conds,0.0,1.0
1,X1,init_conds,1.666667,16.780671
2,X1,init_conds,3.333333,0.027356
3,X1,init_conds,5.0,0.003493
4,X1,init_conds,6.666667,0.042263
5,X1,init_conds,8.333333,2.321366
6,X1,init_conds,10.0,8.718655
7,X1,init_conds,11.666667,0.033815
8,X1,init_conds,13.333333,0.017957
9,X1,init_conds,15.0,0.357544


In [88]:
# turn it into a .tsv file
## change from dataframe to .tsv file 
allX_data.to_csv('measurement_data.tsv', sep='\t', index=False)

### Parameter Table: Done

parameters.tsv

In [89]:
params = {

    # parameter name as defined in sbml
    'parameterId': ['kA', 'k2', 'k3', 'k4', 'k5'],
    # log10 (better for estimation) or lin (if the parameters can be negative)
    'parameterScale': ['lin', 'lin', 'lin', 'lin', 'lin'],
    # bounds 
    'lowerBound': [1E-5, 1E-5, 1E-5, 1E-5, 1E-5],
    'upperBound': [1E+5, 1E+5, 1E+5, 1E+5, 1E+5],
    # known values: keep empty if there are none
    'nominalValue': ['', '', '1', '1', '1'],
    # define if parameters are estimated
    ## 1: estimate
    ## 0: fixed to nominalValue
    'estimate': [1, 1, 0, 0, 0]

}

In [90]:
## change from dictonary to dataframe 
parameters = pd.DataFrame(params)

## change from dataframe to .tsv file 
parameters.to_csv('parameters.tsv', sep='\t', index=False)

In [91]:
parameters

Unnamed: 0,parameterId,parameterScale,lowerBound,upperBound,nominalValue,estimate
0,kA,lin,1e-05,100000.0,,1
1,k2,lin,1e-05,100000.0,,1
2,k3,lin,1e-05,100000.0,1.0,0
3,k4,lin,1e-05,100000.0,1.0,0
4,k5,lin,1e-05,100000.0,1.0,0


### Visualization Table: Done

visualization_specifications.tsv

In [95]:
viz = {

    # specifies plots: all lines with same plot ID combined into 1 plot
    'plotId': ['plot1', 'plot2'],
    # plotting style of measurement data
    'plotTypeData': ['Mean', 'Mean'],
    # label for x axis 
    'xLabel': ['Time', 'Time'],
    # defines what is plotted 
    'yValues': ['kA', 'k2'],
    # label for y axis
    'yLabel': ['kA value', 'k2 value']


}

In [96]:
## change from dictonary to dataframe 
visualization_specifications = pd.DataFrame(viz)

## change from dataframe to .tsv file 
visualization_specifications.to_csv('visualization_specifications.tsv', sep='\t', index=False)

In [97]:
visualization_specifications

Unnamed: 0,plotId,plotTypeData,xLabel,yValues,yLabel
0,plot1,Mean,Time,kA,kA value
1,plot2,Mean,Time,k2,k2 value
