# Parametric Simulation: Weibull with interaction terms

Setup chunk:

In [1]:
import numpy as np
from scipy.stats import weibull_min # r weibull simulation
from scipy.stats import norm # for covariate simulation
from scipy.stats import gamma # for weibull shape parameter
from scipy.stats import bernoulli # for censoring
from scipy.stats import uniform
from scipy.stats.mstats import mquantiles
import pandas as pd

## Simulated data
In this section, we define and check a function to simulate survival data from a Weibull distribution using a linear predictor given by:
\begin{equation}
  t_i \sim \text{Weib}(\kappa,\lambda e^{x_i^T \beta});
\end{equation}
\begin{equation}
  \lambda = 1, \quad \kappa \sim 5;
\end{equation}
\begin{equation}
  \beta = (\beta_1,\beta_2,\beta_{12})^T \in \mathbb{R} \text{ (chosen)};
\end{equation}
\begin{equation}
  x_{i,j} \sim \mathcal{N}(0,1) \quad \text{or} \quad x_{i,j} \sim \text{Bernoulli}(\pi_j) \text{ for } j = 1, 2.
\end{equation}
The combination of categorical and continious variables is chosen.
Censoring is modelled by:
- *Simulate end of follow-up period:* $p_\text{dropout} = 0.2$.  $q = \frac{1 - p_\text{censor}}{1 - p_\text{dropout} p_\text{censor}}$. If $\hat{F}(t_i) > q$, set $t_i \rightarrow t_i^\prime F^-(q)$. (I.e. set all times above $q$th quantile to $q$th quantile.)
-*Simulate random drop-out:* If $\hat{F}(t_i) <= q$, let $d_i \sim \text{Bern}(p_\text{dropout} p_\text{censor})$ be the indicator variable for dropping out. If $\delta_i = 1$, then $t_i \rightarrow t_i^\prime \sim \mathcal{U}(0,t_i)$.

Note that the `scipy/stats/weibull_min` uses the parameterisation of the Weibull distribution:
\begin{align}
f(t) = \frac{k}{l}(\frac{t}{l})^{k - 1} e^{-(t/l)^k}.
\end{align}
Hence, we set $k = \kappa$ and $l =  e^{-x_i^T \beta/k}$.

### Simultion function

In [2]:
def weibull_interaction(betas, n_cat, obs, censor_prop, show_beta = False, pi = 0.5):
    # betas: vector of coefficients
    # n_cat: approximate proportion of the variables that will be categorical, determining p_cont and p_cat
    # obs: number of observations to simulate
    # censor_prop: proportion of individuals to censored
    # show_beta: prints the linear coefficients used in the simulation (for testing function)
    # pi: probabilities for simulating the probabilities in the Bernoulli disribution for the categorical variables
    
    # matrix of normal covariates
    X_norm = norm.rvs(size = obs*(2 - n_cat)).reshape((obs,2 - n_cat))
    # matrix of categorical covariates
    X_cat = bernoulli.rvs(p = pi,size = obs*n_cat).reshape((obs,n_cat))
    X = np.hstack([X_norm,X_cat])
    X_interact = np.array(np.multiply(X[:,0],X[:,1])).reshape((obs,1))
    X = np.hstack([X, X_interact])
    
    # shape parameter of weibull
    c = 5
    
    # calculating linear predictor
    lin_pred = np.matmul(X,betas) 
    
    # creating a dataframe for the simulation
    sim_data = pd.DataFrame(X)
    # simulating survival times from weibull distribution
    sim_data["y"] = weibull_min.rvs(c, scale = np.exp(-lin_pred/c)) 

    # proportion of censors caused by dropping out
    dropout_prop = 0.2
    # quantile above which we censor
    max_time = float(mquantiles(sim_data["y"], prob = (1 - censor_prop)/(1 - dropout_prop*censor_prop)))
    # censoring indicator invdividuals above the quantile
    sim_data["end_censor"] = np.where(sim_data["y"] > max_time,True,False)
    # dropout indicator for individuals not censored by end censoring
    sim_data["dropout"] = np.where(sim_data["end_censor"] == False, bernoulli.rvs(p = dropout_prop*censor_prop, size = len(sim_data)) == 1, False)
    # "end censoring" times
    sim_data["end_censor_time"] = np.where(sim_data["y"] > max_time, max_time, sim_data["y"])
    # simulating the dropout time
    sim_data["time"] = np.where(sim_data["dropout"], uniform.rvs(scale = sim_data["end_censor_time"]), sim_data["end_censor_time"])

    sim_data["event"] = ~(sim_data["dropout"] | sim_data["end_censor"]) 
    
    if show_beta:
        print(betas)
        return(sim_data)
    else:
        return(sim_data)

In [3]:
weibull_interaction([1,1,1],1, 10, 0.7, show_beta = True, pi = 0.5)

[1, 1, 1]


Unnamed: 0,0,1,2,y,end_censor,dropout,end_censor_time,time,event
0,1.717084,1.0,1.717084,0.524468,False,False,0.524468,0.524468,True
1,-0.488694,1.0,-0.488694,0.971157,True,False,0.604878,0.604878,False
2,0.735216,1.0,0.735216,0.678897,True,False,0.604878,0.604878,False
3,1.755775,0.0,0.0,0.605806,True,False,0.604878,0.604878,False
4,0.899801,0.0,0.0,0.731583,True,False,0.604878,0.604878,False
5,-1.257388,0.0,-0.0,1.15838,True,False,0.604878,0.604878,False
6,-0.439255,0.0,-0.0,1.1061,True,False,0.604878,0.604878,False
7,1.187182,0.0,0.0,0.536971,False,False,0.536971,0.536971,True
8,0.598976,1.0,0.598976,0.583648,False,True,0.583648,0.52703,False
9,-0.374041,1.0,-0.374041,1.024351,True,False,0.604878,0.604878,False
