## Code to generat data for the simulation study
Note that the notebook is written using **R code**. Hence it requires an R kernel.

In [None]:
library(data.table)
library(tidyr)
library(truncnorm)
library(pbapply)
library(parallel)

# number of cores for parallelization
# parallelization is done over the number of datasets
NUMBER_OF_CORES <- 100

# base seed. Is used to generate seeds for simulation
BASE_SEED <- 1234L

In [2]:

Daten <- read.csv("../../data/dummy_data.txt", sep="")

# lung: Died of lung cancer? 0: no, 1: yes (6422 lung cancer deaths)
# yyborn: year of birth
# yyin: year the miner started working 
# yyin_18: year the miner started working plus 180 days (Vroni sagt das hat sie nie benutzt) 
# yystop: year the miner stopped working
# yyout: year the miner dropped out (for miners with lung cancer this is the year of death)
# wlm: cumulated WLM over the working years
# w46 - w89: WLM in year 1946 to year 1989 


# additional date variable  for "yyborn", "yystop"
year <- c("yyborn", "yystop")

set.seed(12345)
for(i in 1:dim(Daten)[1]){
  month <- sample(1:12,2, replace=TRUE)
  # day of birth
  if (month[1] %in% c(1,3,5,7,8,10,12)){
    day <- sample(1:31,1)
  } else if(month[1] %in% c(4,6,9,11)){
    day <- sample(1:30,1)
  } else if(month[1]==2 & Daten[i, year[1]]%% 4==0 & Daten[i, year[1]]!=1900){
    day <- sample(1:29,1)
  } else{
    day <- sample(1:28,1)
  }
  Daten$ymdborn[i] <- paste(Daten[i,year[1]], "/", month[1], "/", day, sep="")
  # day for stop (to avoid numerical problems, the 31st of December is not possible for stop)
  if (month[2] %in% c(1,3,5,7,8,10)){
    day <- sample(1:31,1)
  } else if(month[2] %in% c(4,6,9,11)){
    day <- sample(1:30,1)
  } else if(month[2]==2 & Daten[i, year[2]]%% 4==0 & Daten[i, year[2]]!=1900){
    day <- sample(1:29,1)
  } else{
    day <- sample(1:28,1)
  }
  Daten$ymdstop[i] <- paste(Daten[i,year[2]], "/", month[2], "/", day, sep="")
  
}


###############################################################################################
###############################################################################################
###############################################################################################


Simulate.Survival.Time <- function(numberofdatasets, write_path, seed, 
                                   measurementmodel, Berkson=TRUE, BerksonONLY=FALSE, Assignment="no",
                                   basehaz, betas, numberofminers, 
                                   truncation=NULL, censor.lambda=NULL, 
                                   NVtrunc=FALSE){

  time_betas <- c()
  mean.lungcancer <- c()
  median.lungcancer <- c()
  mean.age <- c()
  median.age <- c()
  median.baselinehazard0 <- c()
  mean.baselinehazard0 <- c()
  median.baselinehazard40 <- c()
  mean.baselinehazard40 <- c()
  median.baselinehazard55 <- c()
  mean.baselinehazard55 <- c()
  median.baselinehazard75 <- c()
  mean.baselinehazard75 <- c()
  mean.beta_pchreg <- c()
  median.beta_pchreg <- c()
  mean.beta_obs_pchreg <- c()
  median.beta_obs_pchreg <- c()
  coverage.rate_pchreg <- c()
  coverage.rate_obs_pchreg <- c()
  var.beta_pchreg <- c()
  var.beta_obs_pchreg <- c()
  
  b <- 0
  
  for(true.beta in betas){
    
    b <- b+1
    print(paste("true beta: ", true.beta, sep=""))
    
    
    set.seed(seed)
    seeds <- round(runif(2000, 1000,999999),0)
  
    
    parf <- function(j){
    
      data <- Survival.Time.Data(seed=seeds[j], 
                                 measurementmodel=measurementmodel, Berkson=Berkson, BerksonONLY=BerksonONLY,
                                 Assignment=Assignment,
                                 numberofminers=numberofminers, beta=true.beta, model="Cox", truncation=truncation, 
                                 censor=censor.lambda, basehaz=basehaz, NVtrunc=NVtrunc)
      

  
      write.csv(data, file=paste(write_path, "/Data_", j,  ".csv", sep=""))
      
        
      lung <- sum(data$delta==1)
      age <- c(data$stop[data$delta==1])
      
      library(eha)
        
      model_pchreg <- pchreg(Surv(start, stop, delta) ~ true.cum.exposure, data = data, cuts = c(0,40,55,75,104))
      baseline.hazards <- model_pchreg$hazards
        
      model_obs_pchreg <- pchreg(Surv(start, stop, delta) ~ obs.cum.exposure, data = data, cuts = c(0,40,55,75,104))
      baseline.hazards_obs <- model_obs_pchreg$hazards
        
      coefs_pchreg <- coefficients(model_pchreg)
        
      coefs_obs_pchreg <- coefficients(model_obs_pchreg)
        
      lower_pchreg <- coefs_pchreg - qnorm(0.975)*sqrt(model_pchreg$var)
      upper_pchreg <- coefs_pchreg + qnorm(0.975)*sqrt(model_pchreg$var)
        
      lower_obs_pchreg <- coefs_obs_pchreg - qnorm(0.975)*sqrt(model_obs_pchreg$var)
      upper_obs_pchreg <- coefs_obs_pchreg + qnorm(0.975)*sqrt(model_obs_pchreg$var)
        
        
      if(lower_pchreg<true.beta & true.beta<upper_pchreg){
        true.beta.in.CI_pchreg <- "yes"
      } else{
        true.beta.in.CI_pchreg <- "no"
      }
        
      if(lower_obs_pchreg<true.beta & true.beta<upper_obs_pchreg){
        true.beta.in.CI_obs_pchreg <- "yes"
      } else{
        true.beta.in.CI_obs_pchreg <- "no"
      }
        
    
      return(list(lung, age, 
                  baseline.hazards[1], baseline.hazards[2], baseline.hazards[3], baseline.hazards[4],
                  coefs_pchreg, coefs_obs_pchreg, true.beta.in.CI_pchreg, true.beta.in.CI_obs_pchreg))
        
      
    }
    
    
    
    cl <- makeForkCluster(NUMBER_OF_CORES)
    time <- system.time(
      solution <- pblapply(1:numberofdatasets, parf, cl = cl)
    )
    stopCluster(cl)
    
    
    time_betas[b] <- time[3]
    
    median.lungcancer[b] <- median(sapply(solution, function(x) x[[1]]))
    mean.lungcancer[b] <- mean(sapply(solution, function(x) x[[1]]))
    
    median.age[b] <- median(unlist(sapply(solution, function(x) x[[2]])))
    mean.age[b] <- mean(unlist(sapply(solution, function(x) x[[2]])))
    
    median.baselinehazard0[b] <- median(unlist(sapply(solution, function(x) x[[3]])))
    mean.baselinehazard0[b] <- mean(unlist(sapply(solution, function(x) x[[3]])))
      
    median.baselinehazard40[b] <- median(unlist(sapply(solution, function(x) x[[4]])))
    mean.baselinehazard40[b] <- mean(unlist(sapply(solution, function(x) x[[4]])))
      
    median.baselinehazard55[b] <- median(unlist(sapply(solution, function(x) x[[5]])))
    mean.baselinehazard55[b] <- mean(unlist(sapply(solution, function(x) x[[5]])))
      
    median.baselinehazard75[b] <- median(unlist(sapply(solution, function(x) x[[6]])))
    mean.baselinehazard75[b] <- mean(unlist(sapply(solution, function(x) x[[6]])))
      
    mean.beta_pchreg[b] <- mean(sapply(solution, function(x) x[[7]]))
    median.beta_pchreg[b] <- median(sapply(solution, function(x) x[[7]]))
      
    mean.beta_obs_pchreg[b] <- mean(sapply(solution, function(x) x[[8]]))
    median.beta_obs_pchreg[b] <- median(sapply(solution, function(x) x[[8]]))
      
    coverage.rate_pchreg[b] <- length(which(sapply(solution, function(x){ x[[9]] }) == "yes"))/
        length(which(sapply(solution, function(x){ x[[9]] }) %in% c("yes", "no")))
      
    coverage.rate_obs_pchreg[b] <- length(which(sapply(solution, function(x){ x[[10]] }) == "yes"))/
        length(which(sapply(solution, function(x){ x[[10]] }) %in% c("yes", "no")))
      
      
    var.beta_pchreg[b] <- var(sapply(solution, function(x) x[[7]]))
    var.beta_obs_pchreg[b] <- var(sapply(solution, function(x) x[[8]]))

  
    
   
  }
  
 
  
  
  Coverage <- data.frame(true.beta = betas,
                         mean.lungcancer, median.lungcancer, mean.age, median.age, time_betas,
                         mean.baselinehazard0, mean.baselinehazard40, mean.baselinehazard55,
                         mean.baselinehazard75, median.baselinehazard0, median.baselinehazard40,
                         median.baselinehazard55, median.baselinehazard75, mean.beta_pchreg, median.beta_pchreg,
                         mean.beta_obs_pchreg, median.beta_obs_pchreg, coverage.rate_pchreg, coverage.rate_obs_pchreg,
                         var.beta_pchreg, var.beta_obs_pchreg)
 
  
  return(Coverage)
  
}





################################################################################





Survival.Time.Data <- function(seed, measurementmodel, Berkson=TRUE, BerksonONLY=FALSE, 
                               Assignment="no", numberofminers, beta, model, censor=NULL, 
                               truncation=NULL, basehaz, NVtrunc=FALSE){
  
  # choose out of all miners
  set.seed(seed)
  miners_ID <- sample(Daten$ID, numberofminers)

  
  # Choose arbitrary clusters of miners
  if(measurementmodel == "M1aM2M2_ExpertM3M4"){
    n_o=6
  } else{ 
    n_o=4
  }
  Sample_Miners <- data.frame(ID=miners_ID, object=NA, activity=NA)
  # object
  set.seed(seed)
  Sample_Miners$object <- sample(1:n_o, size=numberofminers, replace=TRUE)
  
  # activity (each object should obtain all three activities)
  set.seed(seed)
  for(o in 1:n_o){
    Sample_Miners[which(Sample_Miners$object==o),"activity"] <- sample(1:3, size=sum(Sample_Miners$object==o), replace=TRUE)
    
  }
  
  # generate the true mean values and all measurement errors
  Generate <- U_truemeans(seed=seed, measurementmodel=measurementmodel, 
                          Berkson=Berkson, Data=Data, NVtrunc=NVtrunc, n_o=n_o)
  
  data <- NULL
  
  for(i in Sample_Miners$ID){
      
    # miner's radon exposure from the fictive cohort data set
    X <- as.numeric(Daten[i, (Daten[i, "yyin"] - 1946 + 9):(Daten[i, "yystop"] - 1946 + 9)])
      
    # miner's age at first radon exposure
    EndeEintrittsjahr <- paste(Daten[i,"yyin"],"/12/31", sep="")
    time.start <- length(seq.Date(as.Date(Daten[i,"ymdborn"]), as.Date(EndeEintrittsjahr), by="days"))/365.25
    # miner's age at last radon exposure
    time.stop <- length(seq.Date(as.Date(Daten[i,"ymdborn"]), as.Date(Daten[i,"ymdstop"]), by="days"))/365.25
      
    # age the miner received the exposures
    if(Daten[i,"yyin"]==Daten[i,"yystop"]){
      time.interval <- time.stop
    } else {
      time.interval <- c(time.start:time.stop, time.stop)
    }
      
    # year the miner received the exposures
    year <- Daten[i, "yyin"]:Daten[i, "yystop"]
    
    
    Calculate <- Calculate_E_Z(measurementmodel=measurementmodel, 
                               Berkson=Berkson, BerksonONLY=BerksonONLY, 
                               Assignment=Assignment, 
                               Generate=Generate, year=year, 
                               object=Sample_Miners[Sample_Miners$ID==i,2], 
                               activity=Sample_Miners[Sample_Miners$ID==i,3],
                               X_fictive_cohort=X, NVtrunc=NVtrunc)
    
    E <- Calculate$E
    E.cum <- cumsum(E)
    Z <- Calculate$Z
    Z.cum <- cumsum(Z)
    
    
    if(max(time.interval)<40){
      time.interval <- c(time.interval, 40, 55, 75)
      E <- c(E, rep(0,3))
      E.cum <- c(E.cum, rep(E.cum[length(E.cum)], 3))
      Z <- c(Z, rep(0,3))
      Z.cum <- c(Z.cum, rep(Z.cum[length(Z.cum)], 3))
      year <- c(year, rep(0,3))
    } else if(max(time.interval)<55){
      time.interval <- c(time.interval, 55, 75)
      E <- c(E, rep(0,2))
      E.cum <- c(E.cum, rep(E.cum[length(E.cum)], 2))
      Z <- c(Z, rep(0,2))
      Z.cum <- c(Z.cum, rep(Z.cum[length(Z.cum)], 2))
      year <- c(year, rep(0,2))
    } else if(max(time.interval)<75){
      time.interval <- c(time.interval, 75)
      E <- c(E, rep(0,1))
      E.cum <- c(E.cum, rep(E.cum[length(E.cum)], 1))
      Z <- c(Z, rep(0,1))
      Z.cum <- c(Z.cum, rep(Z.cum[length(Z.cum)], 1))
      year <- c(year, rep(0,1))
    }
    
      
    if(Sample_Miners[Sample_Miners$ID==i,2] %in% 1:4){
      w_period <- c(rep(1, sum(year<1959 & year!=0)), rep(2, sum(year>1958 & year<1966 & year!=0)),
                    rep(3, sum(year>1965 & year<1981 & year!=0)), rep(4, sum(year>1980)))
      
      g_period <- c(rep(1, sum(year<1953 & year!=0)), rep(2, sum(year>1952 & year<1958 & year!=0)),
                    rep(3, sum(year>1957 & year<1970 & year!=0)), rep(NA, sum(year>1969)))
    } else{
      w_period <- rep(NA, length(year))
      g_period <- rep(NA, length(year))      
    }
      
      
    if(Berkson==TRUE){
        if(Assignment=="add" | Assignment=="mult"){
          Miner <- Survival.Time.Miner(seed=seed, Berkson=TRUE, basehaz=basehaz, beta=beta, time.interval = time.interval,
                                       true.X = E, true.X.cum = E.cum, obs.Z = Z, obs.Z.cum = Z.cum,
                                       censor=censor, truncation=truncation, measurementmodel = measurementmodel,
                                       model=model, ID=i, year=year,
                                       object=Sample_Miners[Sample_Miners$ID==i,2], activity=Sample_Miners[Sample_Miners$ID==i,3],
                                       U_A=Calculate$U_A, 
                                       C_Rn_true=Calculate$C_Rn_true, U_Rn=Calculate$U_Rn, C_Rn_obs=Calculate$C_Rn_obs,
                                       C_Rn_old_true=Calculate$C_Rn_old_true, U_Rn_old=Calculate$U_Rn_old, C_Rn_old_obs=Calculate$C_Rn_old_obs,
                                       C_Rn_ref_true=Calculate$C_Rn_ref_true, U_Rn_ref=Calculate$U_Rn_ref, C_Rn_ref_obs=Calculate$C_Rn_ref_obs,
                                       w_period=w_period, w_true_mean=Calculate$w_true_mean, U_w_c=Calculate$U_w_c, w_classical=Calculate$w_classical,
                                       U_w_B=Calculate$U_w_B, w_Berkson=Calculate$w_Berkson,
                                       g_period=g_period, g_true_mean=Calculate$g_true_mean, U_g_c=Calculate$U_g_c, g_classical=Calculate$g_classical,
                                       U_g_B=Calculate$U_g_B, g_Berkson=Calculate$g_Berkson,
                                       c_true_mean=Calculate$c_true_mean, U_c_c=Calculate$U_c_c, c_classical=Calculate$c_classical,
                                       U_c_B=Calculate$U_c_B, c_Berkson=Calculate$c_Berkson,
                                       f_true_mean=Calculate$f_true_mean, U_f_c=Calculate$U_f_c, f_classical=Calculate$f_classical,
                                       U_f_B=Calculate$U_f_B, f_Berkson=Calculate$f_Berkson,
                                       b_true_mean=Calculate$b_true_mean, U_b_c=Calculate$U_b_c, b_classical=Calculate$b_classical,
                                       U_b_B=Calculate$U_b_B, b_Berkson=Calculate$b_Berkson,
                                       tau_e_true_mean=Calculate$tau_e_true_mean, U_tau_e_c=Calculate$U_tau_e_c, tau_e_classical=Calculate$tau_e_classical,
                                       U_tau_e_B=Calculate$U_tau_e_B, tau_e_Berkson=Calculate$tau_e_Berkson,
                                       A_ref=Calculate$A_ref, A_t_o=Calculate$A_t_o, r_t_o=Calculate$r_t_o)
        } else{
          Miner <- Survival.Time.Miner(seed=seed, Berkson=TRUE, basehaz=basehaz, beta=beta, time.interval = time.interval,
                                       true.X = E, true.X.cum = E.cum, obs.Z = Z, obs.Z.cum = Z.cum,
                                       censor=censor, truncation=truncation, measurementmodel = measurementmodel,
                                       model=model, ID=i, year=year,
                                       object=Sample_Miners[Sample_Miners$ID==i,2], activity=Sample_Miners[Sample_Miners$ID==i,3],
                                       C_Rn_true=Calculate$C_Rn_true, U_Rn=Calculate$U_Rn, C_Rn_obs=Calculate$C_Rn_obs,
                                       C_Rn_old_true=Calculate$C_Rn_old_true, U_Rn_old=Calculate$U_Rn_old, C_Rn_old_obs=Calculate$C_Rn_old_obs,
                                       C_Rn_ref_true=Calculate$C_Rn_ref_true, U_Rn_ref=Calculate$U_Rn_ref, C_Rn_ref_obs=Calculate$C_Rn_ref_obs,
                                       w_period=w_period, w_true_mean=Calculate$w_true_mean, U_w_c=Calculate$U_w_c, w_classical=Calculate$w_classical,
                                       U_w_B=Calculate$U_w_B, w_Berkson=Calculate$w_Berkson,
                                       g_period=g_period, g_true_mean=Calculate$g_true_mean, U_g_c=Calculate$U_g_c, g_classical=Calculate$g_classical,
                                       U_g_B=Calculate$U_g_B, g_Berkson=Calculate$g_Berkson,
                                       c_true_mean=Calculate$c_true_mean, U_c_c=Calculate$U_c_c, c_classical=Calculate$c_classical,
                                       U_c_B=Calculate$U_c_B, c_Berkson=Calculate$c_Berkson,
                                       f_true_mean=Calculate$f_true_mean, U_f_c=Calculate$U_f_c, f_classical=Calculate$f_classical,
                                       U_f_B=Calculate$U_f_B, f_Berkson=Calculate$f_Berkson,
                                       b_true_mean=Calculate$b_true_mean, U_b_c=Calculate$U_b_c, b_classical=Calculate$b_classical,
                                       U_b_B=Calculate$U_b_B, b_Berkson=Calculate$b_Berkson,
                                       tau_e_true_mean=Calculate$tau_e_true_mean, U_tau_e_c=Calculate$U_tau_e_c, tau_e_classical=Calculate$tau_e_classical,
                                       U_tau_e_B=Calculate$U_tau_e_B, tau_e_Berkson=Calculate$tau_e_Berkson,
                                       A_ref=Calculate$A_ref, A_t_o=Calculate$A_t_o, r_t_o=Calculate$r_t_o)
        }
      } else{
      Miner <- Survival.Time.Miner(seed=seed, Berkson=FALSE, basehaz=basehaz, beta=beta, time.interval = time.interval,
                                    true.X = E, true.X.cum = E.cum, obs.Z = Z, obs.Z.cum = Z.cum,
                                    censor=censor, truncation=truncation, measurementmodel = measurementmodel,
                                    model=model, ID=i, year=year,
                                    object=Sample_Miners[Sample_Miners$ID==i,2], activity=Sample_Miners[Sample_Miners$ID==i,3],
                                    C_Rn_true=Calculate$C_Rn_true, U_Rn=Calculate$U_Rn, C_Rn_obs=Calculate$C_Rn_obs,
                                    C_Rn_old_true=Calculate$C_Rn_old_true, U_Rn_old=Calculate$U_Rn_old, C_Rn_old_obs=Calculate$C_Rn_old_obs,
                                    C_Rn_ref_true=Calculate$C_Rn_ref_true, U_Rn_ref=Calculate$U_Rn_ref, C_Rn_ref_obs=Calculate$C_Rn_ref_obs,
                                    w_period=w_period, w_true_mean=Calculate$w_true_mean, U_w_c=Calculate$U_w_c, w_classical=Calculate$w_classical,
                                    g_period=g_period, g_true_mean=Calculate$g_true_mean, U_g_c=Calculate$U_g_c, g_classical=Calculate$g_classical,
                                    c_true_mean=Calculate$c_true_mean, U_c_c=Calculate$U_c_c, c_classical=Calculate$c_classical,
                                    f_true_mean=Calculate$f_true_mean, U_f_c=Calculate$U_f_c, f_classical=Calculate$f_classical,
                                    b_true_mean=Calculate$b_true_mean, U_b_c=Calculate$U_b_c, b_classical=Calculate$b_classical,
                                    tau_e_true_mean=Calculate$tau_e_true_mean, U_tau_e_c=Calculate$U_tau_e_c, tau_e_classical=Calculate$tau_e_classical,
                                    A_ref=Calculate$A_ref, A_t_o=Calculate$A_t_o, r_t_o=Calculate$r_t_o)
    }
      
      
    
    
    data <- rbind(data, Miner)
    
    
  }
  

  data$model <- "M1a"
  data$model[data$year>1954] <- "M2"
  data$model[data$year %in% c(1958, 1959, 1960)] <- "M2_Expert"
  data$model[data$year>1969] <- "M3"
  data$model[data$object %in% 5:6] <- "M4"
  data$model[data$true.exposure==0] <- "M0"
  data$model[data$year==0] <- NA
    
  data$b_period <- rep(NA, dim(data)[1])
  data$b_period[data$object==2 & data$model=="M1a"] <- 1
  data$b_period[data$object==3 & data$model=="M1a"] <- 2
  data$b_period[data$object==4 & data$model=="M1a"] <- 3
  data$b_reference <- rep(NA, dim(data)[1])
  data$b_reference[data$model=="M1a"] <- 0
  data$cluster_C_Rn_old <- rep(NA, dim(data)[1])
  data$cluster_C_Rn_old[data$object %in% c(2,3,4) & data$model=="M1a"] <- 1
  data$cluster_C_Rn_obs_ref <- rep(NA, dim(data)[1])
  data$cluster_C_Rn_obs_ref[data$object %in% c(1,3,4) & data$model=="M1a"] <- 1
  data$tau_e_period <- rep(NA, dim(data)[1])
  data$tau_e_period[data$object==3 & data$model=="M1a"] <- 1
  data$tau_e_period[data$object==4 & data$model=="M1a"] <- 2
  data$tau_e_no_error <- rep(NA, dim(data)[1])
  data$tau_e_no_error[data$model=="M1a"] <- 0
  data$tau_e_no_error[data$object==1 & data$model=="M1a"] <- 1
  
  
  return(data)
}





################################################################################





U_truemeans <- function (seed, measurementmodel, Berkson, Data, NVtrunc, n_o){
  
  set.seed(seed)
  seeds <- round(runif(25, 10000,99999),0)
  
  # Draw "true/true_mean" values from the respective distributions 
  if(NVtrunc == TRUE){
    set.seed(seeds[1]) 
    C_Rn_true <- matrix(c(rtruncnorm(4*3, a=0, b=Inf, mean = 6, sd = 8), 
                          rlnorm(4*3, meanlog=1.78, sdlog=0.79),
                          rtruncnorm(4*9, a=0, b=Inf, mean = 6, sd = 8),
                          rtruncnorm(4*20, a=0, b=Inf, mean = 0.15, sd = 0.2)), 
                        ncol=4, nrow=35, byrow=TRUE) # 4 Objekte mit M2M3 *35 Jahre
    C_Rn_true <- rbind(matrix(rep(NA, 4*9), ncol=4, nrow=9), C_Rn_true)
    
    if(measurementmodel=="M1aM2M2_ExpertM3M4"){
      C_Rn_true <- cbind(C_Rn_true, matrix(rlnorm(2*44, meanlog=2, sdlog=0.8),
                                           ncol=2, nrow=44, byrow=TRUE)) # 2 Objekte mit M4 *35 Jahre
    }  
  } else {
    # set.seed(seeds[1]) 
    # C_Rn_true <- matrix(rlnorm(4*35, meanlog = 1.2, sdlog = 1.8), 
    #                     ncol=4, nrow=35) # 4 Objekte *35 Jahre
  }
  
  row.names(C_Rn_true) <- 1946:1989
  colnames(C_Rn_true) <- 1:n_o 
  
  set.seed(seeds[2])
  w_true_mean <- c(0.88 + rbeta(n=4, shape1 = 3, shape2 = 3)*0.32) # 4 periods, min=0.88, max=1.2
  
  set.seed(seeds[3])    
  g_true_mean <- matrix((0.2 + rbeta(n=3*4, shape1=3, shape2=3)*0.4),
                        ncol=4, nrow=3) # 3 periods * 4 objects, min=0.2, max=0.6
  row.names(g_true_mean) <- 1:3
  colnames(g_true_mean) <- 1:4
  
  set.seed(seeds[4])    
  c_true_mean <- c(1.2 + rbeta(n=4, shape1=3, shape2=3)*0.3) # 4 objects, min=1.2, max=1.5
  
  set.seed(seeds[5])    
  f_true_mean <- matrix(rbeta(n=n_o*2, shape1=3, shape2=3),
                        ncol=n_o, nrow=2) # 4 or 6 objects * 2 Activities except hewer
  row.names(f_true_mean) <- 2:3
  colnames(f_true_mean) <- 1:n_o
  
  set.seed(seeds[6])
  b_true_mean <- c(0, 0.17 + rbeta(n=3, shape1=3, shape2=3)*0.83) # first object is reference without old mining, 3 objects, min=0.17, max=1
  
  set.seed(seeds[7])  
  C_Rn_old_true_dist <- rtruncnorm(1, a=0, b=Inf, mean = 22.5, sd = 4)
  C_Rn_old_true <- c(0,rep(C_Rn_old_true_dist,3)) # first object is reference without old mining  
  
  set.seed(seeds[8])
  C_Rn_ref_true_dist <- rtruncnorm(1, a=0, b=Inf, mean = 27.3, sd = 4)
  C_Rn_ref_true <- c(C_Rn_ref_true_dist, 0, rep(C_Rn_ref_true_dist,2)) # second object is just old mining 
  
  A_ref <- c(1505.16, 1, 1505.16, 1505.16 ) # second object is just old mining , hence theoretical A_ref=0, to prevent devision by 0, A_ref=1 is used
  
  set.seed(seeds[9])
  tau_e_true_mean <- c(1, 0, 0.46 + rbeta(n=2, shape1 = 3, shape2 = 3)*0.54) # first object is reference with tau_e=1, second is only old mining, 2 objects, min=0.46, max=1
  
  A_t_o <- cbind(c(30, 70, 100, 113.2, 442.24, 538.16, 680.98, 816.8, 1135.86),
                 rep(0,9), # second object is  old mining, hence A_t_o=0
                 c(89,250,624,998,1425,1746,1960,2173,2364.24),
                 c(88,157,432,845,1120,1395,2129.82,2284,2587.18)) 
  row.names(A_t_o) <- 1946:1954
  colnames(A_t_o) <- 1:4 
  
  r_t_o <- cbind(rep(1,9), rep(0,9), # first obj is reference, r_t_o=1; second is only old mining, r_t_o=0
                 rep(0.649,9), c(rep(0.504,7),rep(0.475,2))) 
  row.names(r_t_o) <- 1946:1954
  colnames(r_t_o) <- 1:4 
  
  
  # Draw the classical measurement errors from the respective lognormal distributions (package stats) mean=-sigma^2/2, var=sigma^2
  
  set.seed(seeds[10])  
  U_Rn <- matrix(c(rnorm(4*3, mean = 0, sd = 0.59),
                   rlnorm(4*3, meanlog = -(0.936)^2/2, sdlog = 0.936),
                   rnorm(4*9, mean = 0, sd = 0.59),
                   rnorm(4*20, mean = 0, sd = 0.03)), 
                 ncol=4, nrow=35, byrow=TRUE)   # 4 Objekte *35 Jahre
  U_Rn <- rbind(matrix(rep(NA, 4*9), ncol=4, nrow=9), U_Rn)
  if(measurementmodel=="M1aM2M2_ExpertM3M4"){
    U_Rn <- cbind(U_Rn, matrix(rlnorm(2*44, meanlog = -(0.936)^2/2, sdlog = 0.936), 
                               ncol=2, nrow=44, byrow=TRUE))
  }
  row.names(U_Rn) <- 1946:1989
  colnames(U_Rn) <- 1:n_o
  
  set.seed(seeds[11])
  U_w_c <- rlnorm(4, meanlog = -(0.04)^2/2, sdlog = 0.04)  # 4 periods
  
  set.seed(seeds[12]) 
  U_g_c <- matrix(rlnorm(3*4, meanlog = -(0.23)^2/2, sdlog = 0.23), ncol=4, nrow=3)  # 3 periods * 4 objects
  rownames(U_g_c) <- 1:3
  colnames(U_g_c) <- 1:4
  
  set.seed(seeds[13])
  U_c_c <- rlnorm(4, meanlog = -(0.33)^2/2, sdlog = 0.33)
  
  set.seed(seeds[14])
  U_f_c <- matrix(rlnorm(n_o*2, meanlog = -(0.33)^2/2, sdlog = 0.33), ncol=n_o, nrow=2)  # 4/6 objects * 2 activities except hewer
  rownames(U_f_c) <- 2:3
  colnames(U_f_c) <- 1:n_o
  
  set.seed(seeds[15])
  U_b_c <- rlnorm(4, meanlog = -(0.33)^2/2, sdlog = 0.33)
  
  set.seed(seeds[16])
  U_Rn_old <- rnorm(1, mean = 0, sd = 6.56)
  U_Rn_old <- c(0, rep(U_Rn_old,3)) # first obj is reference and gets additive error
  
  set.seed(seeds[17])
  U_Rn_ref <- rnorm(1, mean = 0, sd = 5.29)
  U_Rn_ref <- c(U_Rn_ref, 0, rep(U_Rn_ref,2))
  
  set.seed(seeds[18])
  U_tau_e_c <- c(1, rlnorm(3, meanlog = -(0.37)^2/2, sdlog = 0.37)) # first obj is reference, hence, only error for tau_e=1
  
  
  # Draw the Berkson measurement errors
  
  if(Berkson==TRUE){ 
    # used values proposed by Raphael (sd = 1.45 generates numerically instable resuts)
    sd_U_w_B <- 0.12
    sd_U_g_B <- 0.69
    sd_U_c_B <- 0.69
    sd_U_f_B <- 0.69
    sd_U_b_B <- 0.69
    sd_U_tau_e_B <- 0.33
    
    # old values/
    # sd_U_w_B <- 0.12
    # sd_U_g_B <- 0.69
    # sd_U_c_B <- 1.45
    # sd_U_f_B <- 0.69
    # sd_U_b_B <- 0.69
    # sd_U_tau_e_B <- 0.33
            
    
    
    set.seed(seeds[19])
    U_w_B <- matrix(rlnorm(4*44, meanlog = -(sd_U_w_B)^2/2, sdlog = sd_U_w_B), nrow=44, ncol=4)  # 4 objects *44 years
    row.names(U_w_B) <- 1946:1989
    colnames(U_w_B) <- 1:4
    
    set.seed(seeds[20])
    U_g_B <- matrix(rlnorm(4*44, meanlog = -(sd_U_g_B)^2/2, sdlog = sd_U_g_B), nrow=44, ncol=4)  # 4 objects *44 years
    row.names(U_g_B) <- 1946:1989
    colnames(U_g_B) <- 1:4  
    
    set.seed(seeds[21])
    U_c_B <- matrix(rlnorm(4*44, meanlog = -(sd_U_c_B)^2/2, sdlog = sd_U_c_B), nrow=44, ncol=4)  # 4 objects *44 years
    row.names(U_c_B) <- 1946:1989
    colnames(U_c_B) <- 1:4  
    
    set.seed(seeds[22])
    U_f_B <- rlnorm(n_o*44*2, meanlog = -(sd_U_f_B)^2/2, sdlog = sd_U_f_B)  # 4/6 objects *44 years * 2 activities except hewer
    U_f_B_2 <- matrix(U_f_B[1:(n_o*44)], nrow=44, ncol=n_o)
    row.names(U_f_B_2) <- 1946:1989
    colnames(U_f_B_2) <- 1:n_o
    U_f_B_3 <- matrix(U_f_B[(n_o*44+1):(n_o*44*2)], nrow=44, ncol=n_o)
    row.names(U_f_B_3) <- 1946:1989
    colnames(U_f_B_3) <- 1:n_o
    
    set.seed(seeds[23])
    U_b_B <- matrix(rlnorm(4*44, meanlog = -(sd_U_b_B)^2/2, sdlog = sd_U_b_B), nrow=44, ncol=4) # 4 objects *44 years
    row.names(U_b_B) <- 1946:1989
    colnames(U_b_B) <- 1:4  
    
    set.seed(seeds[24])
    U_tau_e_B <- matrix(rlnorm(3*44, meanlog = -(sd_U_tau_e_B)^2/2, sdlog = sd_U_tau_e_B), nrow=44, ncol=3)  # 3 objects *44 years
    U_tau_e_B <- cbind(rep(1,44), U_tau_e_B) # first is reference obj without error, tau_e=1
    row.names(U_tau_e_B) <- 1946:1989
    colnames(U_tau_e_B) <- 1:4  
    
    
    return(list(C_Rn_true=C_Rn_true, w_true_mean=w_true_mean, 
                g_true_mean=g_true_mean, c_true_mean=c_true_mean,
                f_true_mean=f_true_mean, b_true_mean=b_true_mean,
                C_Rn_old_true=C_Rn_old_true, C_Rn_ref_true=C_Rn_ref_true,
                A_ref=A_ref, tau_e_true_mean=tau_e_true_mean,
                A_t_o=A_t_o, r_t_o=r_t_o,
                U_Rn=U_Rn, U_Rn_old=U_Rn_old, U_Rn_ref=U_Rn_ref,
                U_w_c=U_w_c, U_w_B=U_w_B, 
                U_g_c=U_g_c, U_g_B=U_g_B, U_c_c=U_c_c, U_c_B=U_c_B,
                U_f_c=U_f_c, U_f_B_2=U_f_B_2, U_f_B_3=U_f_B_3,
                U_b_c=U_b_c, U_b_B=U_b_B, U_tau_e_c=U_tau_e_c, U_tau_e_B=U_tau_e_B))
  } else{
    return(list(C_Rn_true=C_Rn_true, w_true_mean=w_true_mean, 
                g_true_mean=g_true_mean, c_true_mean=c_true_mean,
                f_true_mean=f_true_mean, b_true_mean=b_true_mean,
                C_Rn_old_true=C_Rn_old_true, C_Rn_ref_true=C_Rn_ref_true,
                A_ref=A_ref, tau_e_true_mean=tau_e_true_mean,
                A_t_o=A_t_o, r_t_o=r_t_o,
                U_Rn=U_Rn, U_Rn_old=U_Rn_old, U_Rn_ref=U_Rn_ref,
                U_w_c=U_w_c, U_g_c=U_g_c, 
                U_c_c=U_c_c, U_f_c=U_f_c,
                U_b_c=U_b_c, U_tau_e_c=U_tau_e_c))
  }

  
}





################################################################################




Calculate_E_Z <- function(measurementmodel, Berkson, BerksonONLY, 
                          Assignment, Generate=NULL, year, 
                          object, activity, X_fictive_cohort, NVtrunc=NULL){
  
  # Calculate the classical components
  
  if(object %in% 1:4){
    C_Rn_true <- as.vector(Generate$C_Rn_true[row.names(Generate$C_Rn_true) %in% year, object])
    U_Rn <-  as.vector(Generate$U_Rn[row.names(Generate$U_Rn) %in% year, object])
    
    C_Rn_obs <- c(C_Rn_true[year<1958] + U_Rn[year<1958],
                  C_Rn_true[year %in% c(1958, 1959, 1960)] * U_Rn[year %in% c(1958, 1959, 1960)],
                  C_Rn_true[year>1960] + U_Rn[year>1960])
    
    C_Rn_old_true <- c(rep(Generate$C_Rn_old_true[object], sum(year<1955)),
                       rep(NA, sum(year>1954)))
    U_Rn_old <- c(rep(Generate$U_Rn_old[object], sum(year<1955)),
                  rep(NA, sum(year>1954)))
    C_Rn_old_obs <- C_Rn_old_true + U_Rn_old
    
    C_Rn_ref_true <- c(rep(Generate$C_Rn_ref_true[object], sum(year<1955)),
                       rep(NA, sum(year>1954)))
    U_Rn_ref <- c(rep(Generate$U_Rn_ref[object], sum(year<1955)),
                  rep(NA, sum(year>1954)))
    C_Rn_ref_obs <- C_Rn_ref_true + U_Rn_ref
    
    A_ref <- c(rep(Generate$A_ref[object], sum(year<1955)),
               rep(NA, sum(year>1954)))
    
    A_t_o <- c(as.vector(Generate$A_t_o[row.names(Generate$A_t_o) %in% year, object]),
               rep(NA, sum(year>1954)))
    
    r_t_o <- c(as.vector(Generate$r_t_o[row.names(Generate$r_t_o) %in% year, object]),
               rep(NA, sum(year>1954)))
    
    # First period of the working time factor assumed until 1958, second period 1959-1965, 
    # third period 1966-1980, fourth period 1981-1989 
    w_true_mean <- c(rep(Generate$w_true_mean[1], sum(year<1959)),
                     rep(Generate$w_true_mean[2], sum(year>1958 & year<1966)),
                     rep(Generate$w_true_mean[3], sum(year>1965 & year<1981)),
                     rep(Generate$w_true_mean[4], sum(year>1980)))
    U_w_c <- c(rep(Generate$U_w_c[1], sum(year<1959)),
               rep(Generate$U_w_c[2], sum(year>1958 & year<1966)),
               rep(Generate$U_w_c[3], sum(year>1965 & year<1981)),
               rep(Generate$U_w_c[4], sum(year>1980)))
    w_classical <- w_true_mean * U_w_c
    
    # First period of the equilibrium factor assumed until 1952, second 1953-1957, third from 1958
    g_true_mean <- c(rep(Generate$g_true_mean[1, object], sum(year<1953)),
                     rep(Generate$g_true_mean[2, object], sum(year>1952 & year<1958)),
                     rep(Generate$g_true_mean[3, object], sum(year>1957 & year<1970)),
                     rep(NA, sum(year>1969)))
    U_g_c <- c(rep(Generate$U_g_c[1, object], sum(year<1953)),
               rep(Generate$U_g_c[2, object], sum(year>1952 & year<1958)),
               rep(Generate$U_g_c[3, object], sum(year>1957 & year<1970)),
               rep(NA, sum(year>1969)))
    g_classical <- g_true_mean * U_g_c
    
    c_true_mean <- c(rep(NA, sum(year<1970)),
                     rep(Generate$c_true_mean[object], sum(year>1969)))
    U_c_c <- c(rep(NA, sum(year<1970)),
               rep(Generate$U_c_c[object], sum(year>1969)))
    c_classical <- c_true_mean * U_c_c
    
    b_true_mean <- c(rep(Generate$b_true_mean[object], sum(year<1955)),
                     rep(NA, sum(year>1954)))
    U_b_c <- c(rep(Generate$U_b_c[object], sum(year<1955)),
               rep(NA, sum(year>1954)))
    b_classical <- b_true_mean * U_b_c
    
    tau_e_true_mean <- c(rep(Generate$tau_e_true_mean[object], sum(year<1955)),
                         rep(NA, sum(year>1954)))
    U_tau_e_c <- c(rep(Generate$U_tau_e_c[object], sum(year<1955)),
                   rep(NA, sum(year>1954)))
    tau_e_classical <- tau_e_true_mean * U_tau_e_c
    
  } else if(object %in% 5:6){
    C_Rn_true <- as.vector(Generate$C_Rn_true[row.names(Generate$C_Rn_true) %in% year, object])
    U_Rn <-  as.vector(Generate$U_Rn[row.names(Generate$U_Rn) %in% year, object])
    C_Rn_obs <- C_Rn_true * U_Rn
    
    C_Rn_old_true <- NA
    U_Rn_old <- NA
    C_Rn_old_obs <- NA
    C_Rn_ref_true <- NA
    U_Rn_ref <- NA
    C_Rn_ref_obs <- NA
    A_ref <- NA
    A_t_o <- NA
    r_t_o <- NA
    w_true_mean <- NA
    U_w_c <- NA
    w_classical <- NA
    g_true_mean <- NA
    U_g_c <- NA
    g_classical <- NA
    c_true_mean <- NA
    U_c_c <- NA
    c_classical <- NA
    b_true_mean <- NA
    U_b_c <- NA
    b_classical <- NA
    tau_e_true_mean <- NA
    U_tau_e_c <- NA
    tau_e_classical <- NA
  }
  
  if(activity==1){
    f_true_mean <- rep(1, length(year))
    f_classical <- rep(1, length(year))
    U_f_c <- NA
  } else{
    f_true_mean <- rep(Generate$f_true_mean[activity-1, object], length(year))
    U_f_c <- rep(Generate$U_f_c[activity-1, object], length(year))
    f_classical <- f_true_mean * U_f_c
  }
  
  
  # Calculate the Berkson components
  
  if(Berkson==TRUE){
    
    if(object %in% 1:4){
      U_w_B <- as.vector(Generate$U_w_B[row.names(Generate$U_w_B) %in% year, object])
      w_Berkson <- w_true_mean * U_w_B
      
      U_g_B <- as.vector(Generate$U_g_B[row.names(Generate$U_g_B) %in% year, object])
      g_Berkson <- g_true_mean * U_g_B
      
      U_c_B <- as.vector(Generate$U_c_B[row.names(Generate$U_c_B) %in% year, object])
      c_Berkson <- c_true_mean * U_c_B
      
      U_b_B <- as.vector(Generate$U_b_B[row.names(Generate$U_b_B) %in% year, object])
      b_Berkson <- b_true_mean * U_b_B
      
      U_tau_e_B <- as.vector(Generate$U_tau_e_B[row.names(Generate$U_tau_e_B) %in% year, object])
      tau_e_Berkson <- tau_e_true_mean * U_tau_e_B
      
    } else{
      U_w_B <- NA
      w_Berkson <- NA
      U_g_B <- NA
      g_Berkson <- NA
      U_c_B <- NA
      c_Berkson <- NA
      U_b_B <- NA
      b_Berkson <- NA
      U_tau_e_B <- NA
      tau_e_Berkson <- NA
      
    }
    
    if(activity==1){
      f_Berkson <- rep(1, length(year))
      U_f_B <- NA
    } else if(activity==2){
      U_f_B <- as.vector(Generate$U_f_B_2[row.names(Generate$U_f_B_2) %in% year, object])
      f_Berkson <- f_true_mean * U_f_B
    } else {
      U_f_B <- as.vector(Generate$U_f_B_3[row.names(Generate$U_f_B_3) %in% year, object])
      f_Berkson <- f_true_mean * U_f_B
    }
    
    
    # Calculate the true exposure E 
    if(object %in% 1:4){ 
      E_M1a <- (C_Rn_old_true * b_Berkson +
                  r_t_o * (C_Rn_ref_true / A_ref) * tau_e_Berkson * A_t_o) *
        12 * w_Berkson * g_Berkson * f_Berkson
      E_M2 <- 12 * C_Rn_true * w_Berkson * g_Berkson * f_Berkson
      E_M3 <- 12 * C_Rn_true * w_Berkson * c_Berkson * f_Berkson
      E <- c(E_M1a[!is.na(E_M1a)], E_M2[!is.na(E_M2)], E_M3[!is.na(E_M3)])
    } else{
      E <- C_Rn_true * f_Berkson
    }
    
    
    if(Assignment=="add"){
      U_A <- rnorm(n=length(year), mean=0, sd=13.95)
      E <- E + U_A
    } else if(Assignment=="mult"){
      U_A <- rlnorm(n=length(year), meanlog=-2^2/2, sdlog=2)
      E <- E * U_A
    }
    if(min(X_fictive_cohort)==0){
      E[which(X_fictive_cohort==0)] <- 0
    }
    
  } else{
    
    # Calculate the true exposure E 
    if(object %in% 1:4){ 
      E_M1a <- (C_Rn_old_true * b_true_mean +
                  r_t_o * (C_Rn_ref_true / A_ref) * tau_e_true_mean * A_t_o) *
        12 * w_true_mean * g_true_mean * f_true_mean
      E_M2 <- 12 * C_Rn_true * w_true_mean * g_true_mean * f_true_mean
      E_M3 <- 12 * C_Rn_true * w_true_mean * c_true_mean * f_true_mean
      E <- c(E_M1a[!is.na(E_M1a)], E_M2[!is.na(E_M2)], E_M3[!is.na(E_M3)])
    } else{
      E <- C_Rn_true * f_true_mean
    }
    
    
    if(min(X_fictive_cohort)==0){
      E[which(X_fictive_cohort==0)] <- 0
    }
  }
  
  # Calculate the observed exposure Z
  
  if(BerksonONLY==TRUE){ 
    if(object %in% 1:4){
      Z_M1a <- (C_Rn_old_true * b_true_mean +
                  r_t_o * (C_Rn_ref_true / A_ref) * tau_e_true_mean * A_t_o) *
        12 * w_true_mean * g_true_mean * f_true_mean
      Z_M2 <- 12 * C_Rn_true * w_true_mean * g_true_mean * f_true_mean
      Z_M3 <- 12 * C_Rn_true * w_true_mean * c_true_mean * f_true_mean
      Z <- c(Z_M1a[!is.na(Z_M1a)], Z_M2[!is.na(Z_M2)], Z_M3[!is.na(Z_M3)])
    } else{
      Z <- C_Rn_true * f_true_mean
    }
    
    if(min(X_fictive_cohort)==0){
      Z[which(X_fictive_cohort==0)] <- 0
    }
  } else{
    if(object %in% 1:4){ 
      #Z <- 12 * C_Rn_true * w_classical * g_true_mean * f_true_mean
      Z_M1a <- (C_Rn_old_obs * b_classical +
                  r_t_o * (C_Rn_ref_obs / A_ref) * tau_e_classical * A_t_o) *
        12 * w_classical * g_classical * f_classical
      Z_M2 <- 12 * C_Rn_obs * w_classical * g_classical * f_classical
      Z_M3 <- 12 * C_Rn_obs * w_classical * c_classical * f_classical
      Z <- c(Z_M1a[!is.na(Z_M1a)], Z_M2[!is.na(Z_M2)], Z_M3[!is.na(Z_M3)])
    } else{
      Z <- C_Rn_obs * f_classical
    }
    
    if(min(X_fictive_cohort)==0){
      Z[which(X_fictive_cohort==0)] <- 0
    }
  }
  
  
  if(Berkson==TRUE){ 
    if(Assignment=="add" | Assignment=="mult"){
      return(list(E=E, Z=Z, U_A=U_A, 
                  C_Rn_true=C_Rn_true, U_Rn=U_Rn, C_Rn_obs=C_Rn_obs, 
                  C_Rn_old_true=C_Rn_old_true, U_Rn_old=U_Rn_old, C_Rn_old_obs=C_Rn_old_obs, 
                  C_Rn_ref_true=C_Rn_ref_true, U_Rn_ref=U_Rn_ref, C_Rn_ref_obs=C_Rn_ref_obs,
                  A_ref=A_ref, A_t_o=A_t_o, r_t_o=r_t_o,
                  w_true_mean=w_true_mean, U_w_c=U_w_c, w_classical=w_classical, U_w_B=U_w_B, w_Berkson=w_Berkson,
                  g_true_mean=g_true_mean, U_g_c=U_g_c, g_classical=g_classical, U_g_B=U_g_B, g_Berkson=g_Berkson,
                  c_true_mean=c_true_mean, U_c_c=U_c_c, c_classical=c_classical, U_c_B=U_c_B, c_Berkson=c_Berkson,
                  f_true_mean=f_true_mean, U_f_c=U_f_c, f_classical=f_classical, U_f_B=U_f_B, f_Berkson=f_Berkson,
                  b_true_mean=b_true_mean, U_b_c=U_b_c, b_classical=b_classical, U_b_B=U_b_B, b_Berkson=b_Berkson,
                  tau_e_true_mean=tau_e_true_mean, U_tau_e_c=U_tau_e_c, tau_e_classical=tau_e_classical, U_tau_e_B=U_tau_e_B, tau_e_Berkson=tau_e_Berkson))
    } else{ 
      return(list(E=E, Z=Z, 
                  C_Rn_true=C_Rn_true, U_Rn=U_Rn, C_Rn_obs=C_Rn_obs,
                  C_Rn_old_true=C_Rn_old_true, U_Rn_old=U_Rn_old, C_Rn_old_obs=C_Rn_old_obs, 
                  C_Rn_ref_true=C_Rn_ref_true, U_Rn_ref=U_Rn_ref, C_Rn_ref_obs=C_Rn_ref_obs,
                  A_ref=A_ref, A_t_o=A_t_o, r_t_o=r_t_o,
                  w_true_mean=w_true_mean, U_w_c=U_w_c, w_classical=w_classical, U_w_B=U_w_B, w_Berkson=w_Berkson,
                  g_true_mean=g_true_mean, U_g_c=U_g_c, g_classical=g_classical, U_g_B=U_g_B, g_Berkson=g_Berkson,
                  c_true_mean=c_true_mean, U_c_c=U_c_c, c_classical=c_classical, U_c_B=U_c_B, c_Berkson=c_Berkson,
                  f_true_mean=f_true_mean, U_f_c=U_f_c, f_classical=f_classical, U_f_B=U_f_B, f_Berkson=f_Berkson,
                  b_true_mean=b_true_mean, U_b_c=U_b_c, b_classical=b_classical, U_b_B=U_b_B, b_Berkson=b_Berkson,
                  tau_e_true_mean=tau_e_true_mean, U_tau_e_c=U_tau_e_c, tau_e_classical=tau_e_classical, U_tau_e_B=U_tau_e_B, tau_e_Berkson=tau_e_Berkson))
    }
    
  } else{ 
    return(list(E=E, Z=Z, 
                C_Rn_true=C_Rn_true, U_Rn=U_Rn, C_Rn_obs=C_Rn_obs,
                C_Rn_old_true=C_Rn_old_true, U_Rn_old=U_Rn_old, C_Rn_old_obs=C_Rn_old_obs, 
                C_Rn_ref_true=C_Rn_ref_true, U_Rn_ref=U_Rn_ref, C_Rn_ref_obs=C_Rn_ref_obs,
                A_ref=A_ref, A_t_o=A_t_o, r_t_o=r_t_o,
                w_true_mean=w_true_mean, U_w_c=U_w_c, w_classical=w_classical,
                g_true_mean=g_true_mean, U_g_c=U_g_c, g_classical=g_classical,
                c_true_mean=c_true_mean, U_c_c=U_c_c, c_classical=c_classical,
                f_true_mean=f_true_mean, U_f_c=U_f_c, f_classical=f_classical,
                b_true_mean=b_true_mean, U_b_c=U_b_c, b_classical=b_classical,
                tau_e_true_mean=tau_e_true_mean, U_tau_e_c=U_tau_e_c, tau_e_classical=tau_e_classical))
  }

}




################################################################################





Survival.Time.Miner <- function(seed, Berkson, time.interval, true.X, true.X.cum, obs.Z=NULL, obs.Z.cum=NULL, measurementmodel,
                                model, beta, basehaz, censor=NULL, truncation=NULL, 
                                ID, year, object, activity, U_A=NULL, C_Rn_true=NULL, U_Rn=NULL, C_Rn_obs=NULL,
                                C_Rn_old_true=NULL, U_Rn_old=NULL, C_Rn_old_obs=NULL, C_Rn_ref_true=NULL, U_Rn_ref=NULL, C_Rn_ref_obs=NULL,
                                C_Rn_ref_0_true=NULL, U_Rn_ref_0=NULL, C_Rn_ref_0_obs=NULL, C_Rn_ref_130_true=NULL, U_Rn_ref_130=NULL, C_Rn_ref_130_obs=NULL,
                                w_period=NULL, w_true_mean=NULL, U_w_c=NULL, w_classical=NULL, U_w_B=NULL, w_Berkson=NULL,
                                g_period=NULL, g_true_mean=NULL, U_g_c=NULL, g_classical=NULL, U_g_B=NULL, g_Berkson=NULL,
                                c_true_mean=NULL, U_c_c=NULL, c_classical=NULL, U_c_B=NULL, c_Berkson=NULL,
                                f_true_mean=NULL, U_f_c=NULL, f_classical=NULL, U_f_B=NULL, f_Berkson=NULL,
                                b_true_mean=NULL, U_b_c=NULL, b_classical=NULL, U_b_B=NULL, b_Berkson=NULL,
                                tau_e_true_mean=NULL, U_tau_e_c=NULL, tau_e_classical=NULL, U_tau_e_B=NULL, tau_e_Berkson=NULL,
                                e_period=NULL, e_true_mean=NULL, U_e_c=NULL, e_classical=NULL, U_e_B=NULL, e_Berkson=NULL,
                                e2_period=NULL, e2_true_mean=NULL, U_e2_c=NULL, e2_classical=NULL, U_e2_B=NULL, e2_Berkson=NULL,
                                A_ref=NULL, A_t_o=NULL, r_t_o=NULL, d_t_o=NULL,
                                U_combined=NULL){
  
  require(msm)
  require(survival)
  
  # CREATING g() AND g^-1()
  g <- function(x) {x/basehaz}
  g.inv <- function(x) {x * basehaz}
  
  #CREATING THE BOUNDS OF TRUNCATION
  t.max <- 500
  t.min <- time.interval[1]
  # Here you could also try a lag of 5 years
  # t.min <- time.interval[1]+5
  
  g.inv.t.max <- g.inv(t.max)
  g.inv.t.min <- g.inv(t.min)
  
  time.interval <- c(0,time.interval)
  g.inv.t <- g.inv(time.interval)
  X.cum <- c(0, true.X.cum)
  # Vector for baseline hazard
  basestep <- function(t){
    if(t<40) return(0.015)
    if(t<55) return(0.3)
    if(t<75) return(1.15)
    return(2.5)
  }
  basesteps <-sapply(time.interval, basestep)
  I_fun <- function(t){
    if(t<40) return(1)
    if(t<55) return(2)
    if(t<75) return(3)
    return(4)
  }
  I <-sapply(time.interval, I_fun)
  
  if(is.null(obs.Z.cum)){
    obs.Z <- true.X
    obs.Z.cum <- true.X.cum
  }
 
  if(model=="Cox"){
    lambda <- exp(beta*X.cum + log(basesteps))
   
  } else if(model=="EHR"){
    lambda <- basesteps+beta*X.cum*basesteps
  } else{print("Use for model either 'Cox' or 'EHR'")}
  
  #GENERATING DATA USING ACCEPT-REJECT METHOD
  
  gen.y <- function(x) {
    repeat{
      # draw random numbers from piecewise exponential mit raten lambda at times g.inv.t
      y <- rpexp(1, x, g.inv.t)
      if (y>g.inv.t.min & y<g.inv.t.max) break
    }
    return(y)
  }
  
  
  g.y <-  g(gen.y(lambda))
  
  #CREATING CENSORING INDICATOR
  if(is.null(censor)){
    Y = g.y
  } else{
    #exponential censoring
    C <- rexp(1, rate = censor)
    while(C<= t.min){
      C <- rexp(1, rate = censor)
    }
    Y = min(C, g.y)
  }
  
  #CREATING TRUNCATION INDICATOR
  if(!(is.null(truncation))){
    Y <- min(Y, truncation)
  }
  
  #CREATING DATASET
  data <- data.frame(cbind(rep(ID, length(time.interval[-c(1, which(time.interval>=Y))])) ,
                           year[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           time.interval[-c(1, which(time.interval>=Y))],
                           c(time.interval[-c(1, 2, which(time.interval>=Y))], Y),
                           true.X[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           X.cum[2:(length(time.interval[-c(1, which(time.interval>=Y))])+1)],
                           obs.Z[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           obs.Z.cum[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           rep(0,length(time.interval[-c(1, which(time.interval>=Y))])),
                           rep(object, length(time.interval[-c(1, which(time.interval>=Y))])),
                           rep(activity, length(time.interval[-c(1, which(time.interval>=Y))])),
                           U_A[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_true[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_Rn[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_obs[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_old_true[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_Rn_old[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_old_obs[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_ref_true[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_Rn_ref[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_ref_obs[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_ref_0_true[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_Rn_ref_0[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_ref_0_obs[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_ref_130_true[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_Rn_ref_130[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           C_Rn_ref_130_obs[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           w_period[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           w_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_w_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           w_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_w_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           w_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           g_period[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           g_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_g_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           g_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_g_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           g_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           c_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_c_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           c_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_c_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           c_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           f_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_f_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           f_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_f_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           f_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           b_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_b_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           b_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_b_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           b_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           tau_e_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_tau_e_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           tau_e_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_tau_e_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           tau_e_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e_period[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_e_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_e_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e2_period[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e2_true_mean[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_e2_c[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e2_classical[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_e2_B[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           e2_Berkson[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           A_ref[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           A_t_o[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           r_t_o[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           d_t_o[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           U_combined[1:length(time.interval[-c(1, which(time.interval>=Y))])],
                           I[2:(length(time.interval[-c(1, which(time.interval>=Y))])+1)]))
  

  if(Berkson==TRUE){
    if(is.null(U_A)){
      colnames(data) <- c("ID", "year", "start", "stop", "true.exposure", "true.cum.exposure", 
                          "obs.exposure", "obs.cum.exposure", "delta", "object", "activity", 
                          "C_Rn_true", "U_Rn", "C_Rn_obs", 
                          "C_Rn_old_true", "U_Rn_old", "C_Rn_old", 
                          "C_Rn_ref_true", "U_Rn_ref", "C_Rn_obs_ref", "w_period",
                          "w_true_mean", "U_w_c", "w_classical", "U_w_B", "w_Berkson", "g_period",
                          "g_true_mean", "U_g_c", "g_classical", "U_g_B", "g_Berkson",
                          "c_true_mean", "U_c_c", "c_classical", "U_c_B", "c_Berkson",
                          "f_true_mean", "U_f_c", "f_classical", "U_f_B", "f_Berkson",
                          "b_true_mean", "U_b_c", "b", "U_b_B", "b_Berkson",
                          "tau_e_true_mean", "U_tau_e_c", "tau_e", "U_tau_e_B", "tau_e_Berkson",
                          "A_ref", "A_calculated", "r",
                          "I")
    } else{
      colnames(data) <- c("ID", "year", "start", "stop", "true.exposure", "true.cum.exposure", 
                          "obs.exposure", "obs.cum.exposure", "delta", "object", "activity",
                          "U_A", 
                          "C_Rn_true", "U_Rn", "C_Rn_obs", 
                          "C_Rn_old_true", "U_Rn_old", "C_Rn_old", 
                          "C_Rn_ref_true", "U_Rn_ref", "C_Rn_obs_ref", "w_period",
                          "w_true_mean", "U_w_c", "w_classical", "U_w_B", "w_Berkson", "g_period",
                          "g_true_mean", "U_g_c", "g_classical", "U_g_B", "g_Berkson",
                          "c_true_mean", "U_c_c", "c_classical", "U_c_B", "c_Berkson",
                          "f_true_mean", "U_f_c", "f_classical", "U_f_B", "f_Berkson",
                          "b_true_mean", "U_b_c", "b", "U_b_B", "b_Berkson",
                          "tau_e_true_mean", "U_tau_e_c", "tau_e", "U_tau_e_B", "tau_e_Berkson",
                          "A_ref", "A_calculated", "r",
                          "I")
    }
  } else{
    colnames(data) <- c("ID", "year", "start", "stop", "true.exposure", "true.cum.exposure", 
                        "obs.exposure", "obs.cum.exposure", "delta", "object", "activity", 
                        "C_Rn_true", "U_Rn", "C_Rn_obs", 
                        "C_Rn_old_true", "U_Rn_old", "C_Rn_old", 
                        "C_Rn_ref_true", "U_Rn_ref", "C_Rn_obs_ref", "w_period",
                        "w_true_mean", "U_w_c", "w_classical", "g_period",
                        "g_true_mean", "U_g_c", "g_classical",
                        "c_true_mean", "U_c_c", "c_classical",
                        "f_true_mean", "U_f_c", "f_classical", 
                        "b_true_mean", "U_b_c", "b", 
                        "tau_e_true_mean", "U_tau_e_c", "tau_e", 
                        "A_ref", "A_calculated", "r",
                        "I")
  }  

  
  
  if(is.null(censor)){
    data$delta[dim(data)[1]] <- 1
  } else if(C>g.y){
    data$delta[dim(data)[1]] <- 1
  } 
  if(!is.null(truncation)){
    if(g.y>truncation){
      data$delta[dim(data)[1]] <- 0
    }
  }
  
  return(data)
  
}





################################################################################
# bias functions for fast checking
Bias <- function(X){(X[,15]-X[,1])/X[,1]}
Bias_obs <- function(X){(X[,17]-X[,1])/X[,1]}
Bias_baseline <- function(X){c((X[,7]-6e-05)/6e-05, (X[,8]-0.0012)/0.0012,
                               (X[,9]-0.0046)/0.0046, (X[,10]-0.01)/0.01)}




# Generate Data
- $\beta = 0.003$
- $5000$ miners

In [None]:
PATH <- "../../data/M1M2M2_ExpertM3M4-b3-5"
 
dir.create(file.path(PATH))

t = Sys.time()
Results_S3_M1aM2M2_ExpertM3M4 <-    
  Simulate.Survival.Time(numberofdatasets=100, write_path=PATH ,seed=BASE_SEED+2, 
                         measurementmodel="M1aM2M2_ExpertM3M4", Berkson=TRUE, Assignment="no",
                         basehaz=0.004, betas=c(0.003), NVtrunc=TRUE,
                         numberofminers=5000, truncation=104, 
                         censor.lambda=0.005)

Bias(Results_S3_M1aM2M2_ExpertM3M4)
Bias_obs(Results_S3_M1aM2M2_ExpertM3M4)
Bias_baseline(Results_S3_M1aM2M2_ExpertM3M4)
print("Done!")
print("Time:")
print(Sys.time() - t)

- $\beta = 0.006$
- $5000$ miners

In [None]:
PATH <- "../../data/M1M2M2_ExpertM3M4-b6-5"
 
dir.create(file.path(PATH))

t = Sys.time()
Results_S3_M1aM2M2_ExpertM3M4 <-    
  Simulate.Survival.Time(numberofdatasets=100, write_path=PATH ,seed=BASE_SEED+4, 
                         measurementmodel="M1aM2M2_ExpertM3M4", Berkson=TRUE, Assignment="no",
                         basehaz=0.004, betas=c(0.006), NVtrunc=TRUE,
                         numberofminers=5000, truncation=104, 
                         censor.lambda=0.005)

Bias(Results_S3_M1aM2M2_ExpertM3M4)
Bias_obs(Results_S3_M1aM2M2_ExpertM3M4)
Bias_baseline(Results_S3_M1aM2M2_ExpertM3M4)
print("Done!")
print("Time:")
print(Sys.time() - t)