## coding the NF-kB model from sanjana's thesis in PEtab

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import re
import petab
import bionetgen as bng
from petab import Problem
from petab.visualize import plot_problem

## Encoding NF-kB model in PEtab

In [None]:
## model class 

prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']

class Model:
    def __init__(self, opts): #initial settings
        for key in opts: #loops for all labels in the list 'key'
            setattr(self, key, opts[key]) #creates a dictionary where 'key' are the list of labels & 'ops[key]' are the values
        # Default model output to times and experimental observables
        self.defaultOutput= ['time'] + self.observables
        self.n_params= len(self.params)
        self.n_obs= len(self.observables)
        self.time_units= 'minutes'
        self.time_conv= 60 # conversion factor for seconds to minutes

    def __call__(self, theta_new):
        theta_new = theta_new
        res = self.log_likelihood(theta_new)
        return res
    
    def add_data(self, df):
        self.data= df

    def run(self, dose=None, times=None, params=None, output=None): # 5ng dose?, Five minute time pulse, should be same as in the experimental data
        rr = self.rr # rr calls upon the road runner simulation
        rr.resetAll()
        #rr.integrator.absolute_tolerance = 1e-12
        #rr.integrator.relative_tolerance = 1e-12
        
        ##addZero=False

        # Use timepoints of data as default times at which to get sim output
        if params is None:
            params= self.p_true # Sets the parameters as the true parameters. 
        if dose is None:
            dose= self.dose
        if times is None:
            times= self.times #np.array(list(self.data['time'].values))
        # make sure zero is first time point
        if times[0]>0.0:

            times = np.insert(times, 0, 0.0)

            # addZero= True
            # times= [0] + times
            
        # # Scale times to internal time units for model (seconds)
        # try: 
        #     ind_pulse= np.where(times==tpulse)[0][0]
        # except:
        #     print(f"No time point matching {tpulse} found in times")
        #     return([])
        

        # Set default output are the column names of the data
        if output is None:
            output= self.defaultOutput
       
        # Set parameter values
        for name, value in zip(self.params, params):
            rr[name] = float(value)
        rr.reset()  # Necessary to be able to update the initial conditions

        # ## DEBUG Statements
        # # prints parameter values and initial conditions
        # print("parameters used in the simulation:")
        # for name, value in zip(self.params, params):
        #     print(f"{name}: {value}")

        # print(f"initial conditions (with dose):")
        # print(f"L0: {dose}")
        # for species in rr.model.getFloatingSpeciesIds():
        #     print(f"{species}: {rr[species]}")


        # NOTE: If you want to vary the initial conditions, do this by defining
        # a parameter in the model that sets the initial concentration of
        # species and then include this parameter in your list of parameters to
        # fit. 
        # Run trajectories
        try:
            # # First equilibrate the trajectory TNF set to zero
            # rr.simulate(times=np.linspace(0,100, 101))
            #  # set the dose for the IL-6 (L0) dose
            # rr.L0=dose
            rr['L0']=dose
            # rr["init([L])"]=dose
            rr.reset()
            res= rr.simulate(times=times,selections=output)
            # rr.L0=dose
            # rr.L0_IC=0
            # res2= rr.simulate(times=times[ind_pulse:],selections=output)
            # traj =  np.vstack((res1,res2[1:,:])) # skip duplicate point at pulse end point
        except Exception as e:
            # If integration fails return empty array
            print(f"Simulation failed: {e}")
            return []

        return(res)
        
    def log_prior(self, theta_new): 
        bools = [(low <= i <= high) for i,low,high in zip(theta_new, self.lower_bnds, self.upper_bnds)] #if generated values are within bounds
        all_in_range = np.all(bools) #if all values are true, then output is true
        if all_in_range: 
            return 0.0 
        return -np.inf #if even one parameter out of bounds, it's false, and returns -infinity

    def log_likelihood(self, params): #how good is this candidate parameter fitting my data (maximize it)
        y = self.run(params=params) #sets y to the y results of solving ODE
        if (len(y)==0):
            # return large (but not infinite value) if integration fails
            return(-1e11)
        if (np.any(np.isnan(y))):
            return(-1e12)
        # Compute fold change (CAUTION: works only if there is only one observable being fit)
        y= y[1:,1]/y[0][1]
        # Original that allows for multiple observables
        #y= y[1:,1:]

        #sets data (CAUTION: just fitting fold change of one variable)
        #obs = self.data.values[1:,1] # Do not divide by the start is already using fold change data 
        obs= self.data[self.observables[0]].values[1:]
        # Better default choice for multiobjective fit to use relative error
        sigma= 1 #obs
        #sigma = np.array([1] * len(self.observables)) #makes all sigmas default to 1
        #print(sigma.shape)
        #print(y-obs)
        #print(y)
        #print(obs)
        ll= -np.sum(((y-obs)/(sigma))**2)# chi^2
        return(ll)
    
    def calc_cost(self, params):

        total_cost = 0

        for i, (key, current_dose) in enumerate(zip(pSTAT1_dict.keys(), doses)):

            # extract experimental data
            mod_df = pSTAT1_dict[key]
            # print("data:", mod_df)
            exp_times = pd.DataFrame(mod_df['time'])
            # print("time:", exp_time)
            exp_pSTAT = pd.DataFrame(mod_df['STATp'])
            # print("exp values:", exp_pSTAT)

            # run simulation 
            res = self.run(dose=current_dose, params=params)
            res = pd.DataFrame(res)
            # print(res)
            res.columns = ['time', 'STATp']
            # print(type(res))

            ## get the simulated data that matches the experimental data
            predicted_matches = res[res['time'].isin(exp_times['time'])]
            # print(predicted_matches)
            pred_values = pd.DataFrame(predicted_matches['STATp'])
            pred_values = pred_values.rename(columns={'STATp':'predicted'})
            # print(pred_values)
            # predicted values are aligned with the time points
            pred_values = pred_values.reset_index(drop=True)
            # print(pred_values)

            data_4_cost = pd.concat([exp_times.reset_index(drop=True), exp_pSTAT.reset_index(drop=True), pred_values], axis=1)
            data_4_cost.columns = ['time', 'experimental', 'predicted']
            # print(data_4_cost)

            
            # calculate the cost for the current dose
            cost = cost_function(data_4_cost)
            total_cost += cost

        return total_cost 

  
    # Makes of a plot of the observable data vs. predicted data for input parameters
    def plot_comparison(self, dose=None, params=None, data=None):
        if params is None:
            params= self.p_true
        if data is None:
            data= self.data
        res= self.run(params=params)
        # Plot observables
        for i,o in enumerate(self.observables):    
            plt.plot(res[:,0],res[:,i+1]/res[0,i+1],label=o,color=colors[i])
            plt.plot(data['time'].values, data[o].values,'o',color=colors[i]) # Removed division by self since already fold change data
        cost= -self.log_likelihood(params)
        plt.yscale('linear')
        plt.title(f'cost:{cost:0.2e} {params}')
        plt.xlabel('time (minutes)')
        plt.ylabel('Fraction of molecules')
        #_= plt.legend()
        return()
    
    

In [None]:
# Load model from BNGL
model_name="nfkb"
model = bng.bngmodel(model_name + ".bngl")
sim = model.setup_simulator() # sim is a libroadrunner simulator object

# Extract model parameter names and values
# NOTE: Parameters ending with '0' are skipped to avoid fitting initial concentrations.

# Fitting all parameters NOT including the initial conditions
pnames=[]
pvals=[]
for p in model.parameters:
    ## Skip internal BNG parameters
    if re.match('_',p)!=None: continue
    ## Skip initial concentration parameters. COMMENT OUT this line if you don't want these skipped
    if re.search('0$',p)!=None: continue
    ## Skip initial values of input parameters
    if re.search('_input$',p)!=None: continue
    pnames.append(p)
    val= eval(model.parameters[p].value)
    pvals.append(val)
lb = [0.01*val for val in pvals] # Increased range for lower and upper bounds
ub = [100*val for val in pvals]
print("All parameters:",pnames) 
print(pvals)
print(f'{lb}')
print(f'{ub}')

# set up Model object for more advanced tasks
mod_opts = {} #creates a dictionary    
mod_opts['observables'] = ['STATp'] # this is the observable in model, must divide by initial value
mod_opts['rr'] = sim
mod_opts['params'] = pnames #
mod_opts['p_true'] = np.array(pvals) 
mod_opts['lower_bnds'] = lb #lower bounds
mod_opts['upper_bnds'] = ub #upper bounds
mod_opts['times']= np.linspace(0,100,101)#np.array([0,5,10,20,30,45,60,90,120,180,240,300,360])
model = Model(mod_opts)
display(mod_opts['times'])

### SBML File: Done

nfkb_sbml.xml

### Conditions Table: Done
experimental_conditions.tsv

In [None]:
# define initial conditions
exp_conds = {

    # defines conditions 
    'conditionId': ['init_conds'],
    # human readable description
    'conditionName': ['initial conditions'],
    # initial TNFR concentration
    'TNFRin': [2.5257],
    # initial IKK concentration 
    'IKKin': [4.7454],
    # intial cytoplasmic bound
    'boundc': [6.5546]
}

In [None]:
## change from dictonary to dataframe 
experimental_conditions = pd.DataFrame(exp_conds)

## change from dataframe to .tsv file 
experimental_conditions.to_csv('experimental_conditions.tsv', sep='\t', index=False)

In [None]:
experimental_conditions

### Observable Table: Done

observables.tsv

In [None]:
# obs = {

#     # name of observables to link to the measurements
#     'observableId': ['TNF', 'TNFRi', 'TNFRa', 'IKKi', 'IKKa', 'cNFkB', 'nNFkB', 'cIkB', 'nIkB', 'cNFkB_IkB', 'nNFkB_IkB', 'A20'],
#     # human readable description of the observables
#     'observableName': ['TNF', 'TNFR(st~i)', 'TNFR(st~a)', 'IKK(s~I)', 'IKK(s~A)', 'NFkB(IkB,loc~c)', 'NFkB(IkB,loc~n)', 'IkB(NFkB,loc~c)', 'IkB(NFkB,loc~n)', 'NFkB(IkB!0,loc~c).IkB(NFkB!0,loc~c)', 'NFkB(IkB!0,loc~n).IkB(NFkB!0,loc~n)', 'A20'],
#     # mathematical formula for how the model output is calculated 
#     'observableFormula': ['TNF', 'TNFR(st~i)', 'TNFR(st~a)', 'IKK(s~I)', 'IKK(s~A)', 'NFkB(IkB,loc~c)', 'NFkB(IkB,loc~n)', 'IkB(NFkB,loc~c)', 'IkB(NFkB,loc~n)', 'NFkB(IkB!0,loc~c).IkB(NFkB!0,loc~c)', 'NFkB(IkB!0,loc~n).IkB(NFkB!0,loc~n)', 'A20']
#     ## can specify a noise formula and noise distribution
#     # 'noiseFormula': [],
#     # 'noiseDistribution': []

# }

In [None]:
obs = {
    'observableId': ['TNF_obs', 'TNFRi_obs', 'TNFRa_obs', 'IKKi_obs', 'IKKa_obs', 'cNFkB_obs', 'nNFkB_obs', 'cIkB_obs', 'nIkB_obs', 'cNFkB_IkB_obs', 'nNFkB_IkB_obs', 'A20_obs'],
    'observableName': ['TNF', 'TNFR(st~i)', 'TNFR(st~a)', 'IKK(s~I)', 'IKK(s~A)', 'NFkB(IkB,loc~c)', 'NFkB(IkB,loc~n)', 'IkB(NFkB,loc~c)', 'IkB(NFkB,loc~n)', 'NFkB(IkB!0,loc~c).IkB(NFkB!0,loc~c)', 'NFkB(IkB!0,loc~n).IkB(NFkB!0,loc~n)', 'A20'],
    'observableFormula': ['TNF', 'TNFR_st_i', 'TNFR_st_a', 'IKK_s_I', 'IKK_s_A', 'NFkB_IkB_loc_c', 'NFkB_IkB_loc_n', 'IkB_NFkB_loc_c', 'IkB_NFkB_loc_n', 'NFkB_IkB_0_loc_c_IkB_NFkB_0_loc_c', 'NFkB_IkB_0_loc_n_IkB_NFkB_0_loc_n', 'A20'],
    'noiseFormula': ['0.01 * TNF', '0.01 * TNFR_st_i', '0.01 * TNFR_st_a', '0.01 * IKK_s_I', '0.01 * IKK_s_A', '0.01 * NFkB_IkB_loc_c', '0.01 * NFkB_IkB_loc_n', '0.01 * IkB_NFkB_loc_c', '0.01 * IkB_NFkB_loc_n', '0.01 * NFkB_IkB_0_loc_c_IkB_NFkB_0_loc_c', '0.01 * NFkB_IkB_0_loc_n_IkB_NFkB_0_loc_n', '0.01 * A20']
}


In [None]:
## change from dictonary to dataframe 
observables = pd.DataFrame(obs)

## change from dataframe to .tsv file 
observables.to_csv('observables.tsv', sep='\t', index=False)

In [None]:
observables

### Measurement Table: Done

measurement_data.tsv

In [None]:
# import .csv file with the data

nfkb_data = pd.read_csv('NFkB_sim_data.csv')

In [None]:
# rename nNfkB column to measurement

In [None]:
nfkb_data = nfkb_data.rename(columns={'nNFkB': 'measurement'})

In [None]:
nfkb_data

In [None]:
measurement_info = {

    # references the observable ID from observable file
    'observableId': ['nNFkB'],
    # references condition ID from the experimental condition file 
    'simulationConditionId': ['init_conds']

}

In [None]:
# turn that into a df where they each get repeated 300 times 

nfkb = pd.DataFrame({key: value * 301 for key, value in measurement_info.items()})

In [None]:
nfkb

In [None]:
# merge nfkb and nfkb_data

measurements = [nfkb, nfkb_data]
measurements

measurement_data = pd.concat(measurements, axis=1)

In [None]:
measurement_data

In [None]:
# turn it into a .tsv file
## change from dataframe to .tsv file 
measurement_data.to_csv('measurement_data.tsv', sep='\t', index=False)

In [None]:
measurement_data

### Parameter Table: Done

parameters.tsv

In [None]:
params = {

    # parameter name as defined in sbml
    'parameterId': ['k_b', 'k_f', 'k_a', 'k_4', 'k_i1', 'k_e1', 'k_t2a', 'k_t1a', 'k_i2', 'k_e2', 'k_e2a', 'c_4a', 'c_5a', 'c_1a', 'k_a1a', 'k_d1a', 'c_3', 'c_1', 'k_ikk', 'k_tnfr', 'TNFRin', 'IKKin', 'boundc'],
    # log10 (better for estimation) or lin (if the parameters can be negative)
    'parameterScale': ['log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10', 'log10'],
    # bounds 
    'lowerBound': [1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5, 1E-5],
    'upperBound': [1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5, 1E+5],
    # known values: keep empty if there are none
    'nominalValue': ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''],
    # define if parameters are estimated
    ## 1: estimate
    ## 0: fixed to nominalValue
    'estimate': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

}

In [None]:
## change from dictonary to dataframe 
parameters = pd.DataFrame(params)

## change from dataframe to .tsv file 
parameters.to_csv('parameters.tsv', sep='\t', index=False)

In [None]:
parameters

### Visualization Table: 

visualization_specifications.tsv

In [None]:
viz = {

    # specifies plots: all lines with same plot ID combined into 1 plot
    'plotId': ['plot1'],
    # plotting style of measurement data
    'plotTypeData': ['Mean'],
    # label for x axis 
    'xLabel': ['Time'],
    # defines what is plotted 
    'yValues': ['nNFkB'],
    # label for y axis
    'yLabel': ['nNFkB Conentration']


}

In [None]:
## change from dictonary to dataframe 
visualization_specifications = pd.DataFrame(viz)

## change from dataframe to .tsv file 
visualization_specifications.to_csv('visualization_specifications.tsv', sep='\t', index=False)

In [None]:
visualization_specifications