In [11]:
rm(list = ls())
setwd("/home/creambbq/facu/Datos de panel/TP4")
library("haven"); library("plm"); library("dplyr"); library("pglm");
set.seed(1313)

In [12]:
get_data <- function(N, TT, model) {
  df <- df <- setNames(data.frame(matrix(0, ncol = 10, nrow = N*TT)), 
                       c("j", "t", "x", "z", "eps", "u", 
                         "psi_1", "psi_2", "psi_3", "psi_4"))
  aux <- 1
  for(j in 1:N){
    psi_2 <- rnorm(1,0,1)
    psi_3 <- rnorm(1,0,1)
    psi_4 <- rnorm(1,0,1)
    for(t in 1:TT){
      x <- rnorm(1,0,1)
      z <- rnorm(1,0,1)
      eps <- rnorm(1,0,1)
      psi_1 <- rnorm(1,0,1)
      u <- 0.6*eps + 0.8*psi_1
      df[aux, ] <- c(j, t, x, z, eps, u, psi_1, psi_2, psi_3, psi_4)
      aux <- aux + 1
    }
  }
  if (model == "A"){
    df <- df %>% mutate(alpha = psi_2 + psi_4, 
                        c = psi_3 + psi_4, 
                        s = case_when(x + z + alpha + eps > 0 ~ 1,
                                      TRUE ~ 0),
                        y = case_when(s == 1 ~ x + c + u,
                                      TRUE ~ NA_real_))
  } else if (model == "B"){
    df <- df %>% group_by(j) %>% mutate(alpha = psi_2 + sum(z)/2, 
                                        c = psi_3 + sum(x)/2) %>% 
      ungroup()
    df <- df %>% mutate(s = case_when(x + z + alpha + eps > 0 ~ 1,
                                      TRUE ~ 0),
                        y = case_when(s == 1 ~ x + c + u,
                                      TRUE ~ NA_real_))
  } else if (model == "C") { 
    df <- df %>% group_by(j) %>% mutate(alpha = psi_2 + sum(z)/2 + psi_4, 
                                        c = psi_3 + sum(x)/2 + psi_4) %>% 
      ungroup()
    df <- df %>% mutate(s = case_when(x + z + alpha + eps > 0 ~ 1,
                                      TRUE ~ 0),
                        y = case_when(s == 1 ~ x + c + u,
                                      TRUE ~ NA_real_))
    }
  df <- df[, c("j", "t", "y", "x", "s", "z", "alpha", "c")]
  return(df)
}
get_wooldridge <- function(df, boots){
  df <- df %>% group_by(j) %>% mutate(mean_x = mean(x), 
                                      mean_z = mean(z), 
                                      t1 = case_when(t == 1 ~ 1,
                                                     TRUE ~ 0), 
                                      t2 = case_when(t == 2 ~ 1, 
                                                     TRUE ~ 0)) %>% 
    ungroup()
  pdata <- pdata.frame(df, index = c("j", "t"))
  probit <- pglm(s ~ x + mean_x + z + mean_z -1, 
                 family = binomial("probit"), 
                 model = "pooling", 
                 method = "bfgs",
                 data = pdata)
  df <- df %>% mutate(pred = probit$estimate[1]*x + 
                        probit$estimate[2]*mean_x + 
                        probit$estimate[3]*z + 
                        probit$estimate[4]*mean_z, 
                      lambda = dnorm(pred)/pnorm(pred), 
                      lambda_1 = t1*lambda,
                      lambda_2 = t2*lambda)
  pdata <- pdata.frame(df, index = c("j", "t"))
  pOls <- plm(y ~ x + mean_x + lambda_1 + lambda_2 -1,
              fixed = c("j", "t"), 
              effect = "individual", 
              model = "pooling", 
              data = pdata)
  if (boots){
    return(c(pOls$coefficients[1], probit$estimate[1], probit$estimate[3],(df$s-df$pred)))
  } else {
    return(c(pOls$coefficients[1], probit$estimate[1], probit$estimate[3]))
  }
}
montecarlo <- function(S) {
  models <- c("A", "B", "C")
  Ns <- c(20,40,100)
  df <- df <- setNames(data.frame(matrix(0, ncol = 6, nrow = length(Ns)*S)),
                       c("N", "T", "model", "beta", "gamma_1", "gamma_2"))
  aux <- 1
  for(s in 1:S) {
    for(N in Ns){
      for(model in models){
        data <- get_data(N, 2, model)
        df[aux, ] <- c(N, 2, model, get_wooldridge(data, boots = FALSE))
        aux <- aux + 1
      }
    }
  }
  resultados <- df %>% mutate(across(!model, as.numeric)) %>% 
    group_by(N, model) %>% 
    summarise(sesgo_medio_beta = mean(beta) - 1, 
              sesgo_medio_gamma1 = mean(gamma_1) - 1, 
              sesgo_medio_gamma2 = mean(gamma_2) - 1, 
              sesgo_mediano_beta = median(beta) - 1,
              sesgo_mediano_gamma1 = median(gamma_1) - 1,
              sesgo_mediano_gamma2 = median(gamma_2) - 1,
              desvio_beta = sqrt((sum(beta - mean(beta))^2)/S), 
              desvio_gamma1 = sqrt((sum(gamma_1 - mean(gamma_1))^2)/S),
              desvio_gamma2 = sqrt((sum(gamma_2 - mean(gamma_2))^2)/S), 
              rmse_beta = sqrt(((sum(mean(beta)-1))^2)/S), 
              rmse_gamma_1 = sqrt(((sum(mean(gamma_1)-1))^2)/S), 
              rmse_gamma_2= sqrt(((sum(mean(gamma_2)-1))^2)/S), 
              desvio_medio_abs_beta = abs(sum(beta-mean(beta)))/S)
  return(resultados)
}
get_bt_iteration <- function(N, TT, model){
  df <- get_data(N, TT, model)
  wold <- get_wooldridge(df, boots = TRUE)
  beta_hat <- wold[1]
  gamma1_hat <- wold[2]
  gamma2_hat <- wold[3]
  eps_hat <- wold[4:(N*TT+3)]
  muestra <- eps_hat[sample.int(N*TT, N*TT, replace = TRUE)]
  df["eps_hat"] <- muestra
  df <- df %>% mutate(s = case_when(gamma1_hat*x + gamma2_hat*z + alpha + eps_hat > 0 ~ 1,
                                    TRUE ~ 0),
                      y = case_when(s == 1 ~ beta_hat*x + c,
                                    TRUE ~ NA_real_))
  return(get_wooldridge(df, boots = FALSE))
}
bootstrap <- function(B){
  models <- c("A", "B", "C")
  Ns <- c(20,40,100)
  res <- setNames(data.frame(matrix(0, ncol = 6, nrow = B*length(Ns))), 
                       c("N", "T", "model", "beta", "gamma1", "gamma2"))
  aux <- 1
  for (b in 1:B){
    for (N in Ns){
      for(model in models){
        res[aux, ] <- c(N, 10, model, get_bt_iteration(N, 10, model))
        aux <- aux + 1
      }
    }
  }
  res <- res %>% mutate(across(!model, as.numeric)) %>% 
    group_by(N, model) %>% 
    summarise(beta = paste(t.test(beta, conf.level = 0.95)$conf.int[1], 
                           t.test(beta, conf.level = 0.95)$conf.int[2], 
                                                    sep = " - "), 
              gamma1 = paste(t.test(gamma1, conf.level = 0.95)$conf.int[1], 
                             t.test(gamma1, conf.level = 0.95)$conf.int[2], 
                             sep = " - "), 
              gamma2 = paste(t.test(gamma2, conf.level = 0.95)$conf.int[1], 
                             t.test(gamma2, conf.level = 0.95)$conf.int[2], 
                             sep = " - "))
  return(res)
}

In [16]:
montecarlo(S = 1000)

[1m[22m`summarise()` has grouped output by 'N'. You can override using the `.groups`
argument.


N,model,sesgo_medio_beta,sesgo_medio_gamma1,sesgo_medio_gamma2,sesgo_mediano_beta,sesgo_mediano_gamma1,sesgo_mediano_gamma2,desvio_beta,desvio_gamma1,desvio_gamma2,rmse_beta,rmse_gamma_1,rmse_gamma_2,desvio_medio_abs_beta
<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
20,A,-0.0112970049,-0.3050114,-0.3062207,-0.013372874,-0.3573986,-0.3595108,1.711531e-15,1.629027e-15,1.860742e-16,0.0003572427,0.009645307,0.009683549,5.412337e-17
20,B,0.0072075208,0.181445,0.1373124,0.008717682,-0.1790091,-0.2120548,2.397899e-15,2.31715e-16,3.258053e-15,0.0002279218,0.005737794,0.004342199,7.582823e-17
20,C,0.0040653414,-0.2695462,-0.2482568,0.005883743,-0.3502289,-0.3381803,7.197209e-16,5.178479e-16,1.211238e-15,0.0001285574,0.008523799,0.00785057,2.2759570000000003e-17
40,A,-0.0056585971,-0.3682121,-0.3703239,-0.001791064,-0.386335,-0.3907765,1.55881e-15,5.757767e-16,3.756592e-16,0.0001789406,0.011643888,0.011710668,4.92939e-17
40,B,-0.0003603279,-0.2040229,-0.2117574,-0.008960586,-0.2317531,-0.2445701,5.26625e-16,1.026919e-15,3.58105e-16,1.139457e-05,0.006451771,0.006696358,1.665335e-17
40,C,0.0120098058,-0.373205,-0.3703607,0.001204926,-0.4030595,-0.3972771,3.149218e-15,4.967829e-16,1.084848e-15,0.0003797834,0.011801777,0.011711834,9.958701e-17
100,A,-0.0104292049,-0.4068716,-0.407684,-0.014904579,-0.4077285,-0.4156162,6.7408e-16,9.882996e-16,4.318325e-16,0.0003298004,0.012866411,0.0128921,2.1316280000000002e-17
100,B,-0.0013972155,-0.2660563,-0.2615729,0.005776863,-0.2696534,-0.2759581,1.025163e-15,6.653029e-16,1.744884e-15,4.418383e-05,0.008413438,0.008271663,3.241851e-17
100,C,-0.0037989806,-0.3956144,-0.4107918,-0.001283858,-0.4090418,-0.4119602,9.90055e-16,4.862504e-16,1.715042e-15,0.0001201343,0.012510427,0.012990378,3.1308290000000003e-17


In [15]:
bootstrap(B = 1000)

[1m[22m`summarise()` has grouped output by 'N'. You can override using the `.groups`
argument.


N,model,beta,gamma1,gamma2
<dbl>,<chr>,<chr>,<chr>,<chr>
20,A,0.942726726662758 - 0.974482112022787,0.386125019480433 - 0.403031088778777,0.382355251677562 - 0.399844054950864
20,B,0.983825601804865 - 1.00696275370419,0.4213169985221 - 0.439095522841933,0.414390225560934 - 0.432836033179865
20,C,0.97450692678223 - 1.0020143508821,0.326951818960078 - 0.344049703093458,0.329376065797398 - 0.34657992870442
40,A,0.962441259867782 - 0.983173414351192,0.359411627448733 - 0.371437740605436,0.359955157604336 - 0.37208840547083
40,B,0.987416317209452 - 1.00345752398281,0.411808109584478 - 0.424621621317865,0.413198934519474 - 0.4252195181738
40,C,0.975269889419827 - 0.99496729353996,0.322627867851751 - 0.334721296332131,0.321170714880369 - 0.332914296847212
100,A,0.963954964507596 - 0.977399854051773,0.354512251462243 - 0.361790500751028,0.353532539710579 - 0.360670011326123
100,B,0.988259456439461 - 0.998416239351141,0.40271729179876 - 0.410557879800307,0.404124409095184 - 0.412233967561336
100,C,0.986255820549675 - 0.998463534022802,0.313618671780353 - 0.321025346000642,0.316842028882268 - 0.324286437458612
