In [None]:
rm(list = ls())
setwd("/home/creambbq/facu/Datos de panel/TP4")
library("haven"); library("plm"); library("dplyr"); library("pglm");
set.seed(1313)

In [4]:
get_data <- function(N, TT, model) {
  df <- df <- setNames(data.frame(matrix(0, ncol = 10, nrow = N*TT)), 
                       c("j", "t", "x", "z", "eps", "u", 
                         "psi_1", "psi_2", "psi_3", "psi_4"))
  aux <- 1
  for(j in 1:N){
    psi_2 <- rnorm(1,0,1)
    psi_3 <- rnorm(1,0,1)
    psi_4 <- rnorm(1,0,1)
    for(t in 1:TT){
      x <- rnorm(1,0,1)
      z <- rnorm(1,0,1)
      eps <- rnorm(1,0,1)
      psi_1 <- rnorm(1,0,1)
      u <- 0.6*eps + 0.8*psi_1
      df[aux, ] <- c(j, t, x, z, eps, u, psi_1, psi_2, psi_3, psi_4)
      aux <- aux + 1
    }
  }
  if (model == "A"){
    df <- df %>% mutate(alpha = psi_2 + psi_4, 
                        c = psi_3 + psi_4, 
                        s = case_when(x + z + alpha + eps > 0 ~ 1,
                                      TRUE ~ 0),
                        y = case_when(s == 1 ~ x + c + u,
                                      TRUE ~ NA_real_))
  } else if (model == "B"){
    df <- df %>% group_by(j) %>% mutate(alpha = psi_2 + sum(z)/2, 
                                        c = psi_3 + sum(x)/2) %>% 
      ungroup()
    df <- df %>% mutate(s = case_when(x + z + alpha + eps > 0 ~ 1,
                                      TRUE ~ 0),
                        y = case_when(s == 1 ~ x + c + u,
                                      TRUE ~ NA_real_))
  } else if (model == "C") { 
    df <- df %>% group_by(j) %>% mutate(alpha = psi_2 + sum(z)/2 + psi_4, 
                                        c = psi_3 + sum(x)/2 + psi_4) %>% 
      ungroup()
    df <- df %>% mutate(s = case_when(x + z + alpha + eps > 0 ~ 1,
                                      TRUE ~ 0),
                        y = case_when(s == 1 ~ x + c + u,
                                      TRUE ~ NA_real_))
    }
  df <- df[, c("j", "t", "y", "x", "s", "z", "alpha", "c")]
  return(df)
}
get_wooldridge <- function(df, boots){
  df <- df %>% group_by(j) %>% mutate(mean_x = mean(x), 
                                      mean_z = mean(z), 
                                      t1 = case_when(t == 1 ~ 1,
                                                     TRUE ~ 0), 
                                      t2 = case_when(t == 2 ~ 1, 
                                                     TRUE ~ 0)) %>% 
    ungroup()
  pdata <- pdata.frame(df, index = c("j", "t"))
  probit <- pglm(s ~ x + mean_x + z + mean_z -1, 
                 family = binomial("probit"), 
                 model = "pooling", 
                 method = "bfgs",
                 data = pdata)
  df <- df %>% mutate(pred = probit$estimate[1]*x + 
                        probit$estimate[2]*mean_x + 
                        probit$estimate[3]*z + 
                        probit$estimate[4]*mean_z, 
                      lambda = dnorm(pred)/pnorm(pred), 
                      lambda_1 = t1*lambda,
                      lambda_2 = t2*lambda)
  pdata <- pdata.frame(df, index = c("j", "t"))
  pOls <- plm(y ~ x + mean_x + lambda_1 + lambda_2 -1,
              fixed = c("j", "t"), 
              effect = "individual", 
              model = "pooling", 
              data = pdata)
  if (boots){
    return(c(pOls$coefficients[1], probit$estimate[1], probit$estimate[3],(df$s-df$pred)))
  } else {
    return(c(pOls$coefficients[1], probit$estimate[1], probit$estimate[3]))
  }
}
montecarlo <- function(S) {
  models <- c("A", "B", "C")
  Ns <- c(20,40,100)
  df <- df <- setNames(data.frame(matrix(0, ncol = 6, nrow = length(Ns)*S)),
                       c("N", "T", "model", "beta", "gamma_1", "gamma_2"))
  aux <- 1
  for(s in 1:S) {
    for(N in Ns){
      for(model in models){
        data <- get_data(N, 2, model)
        df[aux, ] <- c(N, 2, model, get_wooldridge(data, boots = FALSE))
        aux <- aux + 1
      }
    }
  }
  resultados <- df %>% mutate(across(!model, as.numeric)) %>% 
    group_by(N, model) %>% 
    summarise(sesgo_medio_beta = mean(beta) - 1, 
              sesgo_medio_gamma1 = mean(gamma_1) - 1, 
              sesgo_medio_gamma2 = mean(gamma_2) - 1, 
              sesgo_mediano_beta = median(beta) - 1,
              sesgo_mediano_gamma1 = median(gamma_1) - 1,
              sesgo_mediano_gamma2 = median(gamma_2) - 1,
              desvio_beta = sqrt((sum(beta - mean(beta))^2)/S), 
              desvio_gamma1 = sqrt((sum(gamma_1 - mean(gamma_1))^2)/S),
              desvio_gamma2 = sqrt((sum(gamma_2 - mean(gamma_2))^2)/S), 
              rmse_beta = sqrt(((sum(mean(beta)-1))^2)/S), 
              rmse_gamma_1 = sqrt(((sum(mean(gamma_1)-1))^2)/S), 
              rmse_gamma_2= sqrt(((sum(mean(gamma_2)-1))^2)/S), 
              desvio_medio_abs_beta = abs(sum(beta-mean(beta)))/S, 
              desvio_medio_abs_gamma_1= abs(sum(gamma_1-mean(gamma_1)))/S,
              desvio_medio_abs_gamma_2 = abs(sum(gamma_2-mean(gamma_2)))/S)
  return(resultados)
}
get_bt_iteration <- function(N, TT, model){
  df <- get_data(N, TT, model)
  wold <- get_wooldridge(df, boots = TRUE)
  beta_hat <- wold[1]
  gamma1_hat <- wold[2]
  gamma2_hat <- wold[3]
  eps_hat <- wold[4:(N*TT+3)]
  muestra <- eps_hat[sample.int(N*TT, N*TT, replace = TRUE)]
  df["eps_hat"] <- muestra
  df <- df %>% mutate(s = case_when(gamma1_hat*x + gamma2_hat*z + alpha + eps_hat > 0 ~ 1,
                                    TRUE ~ 0),
                      y = case_when(s == 1 ~ beta_hat*x + c,
                                    TRUE ~ NA_real_))
  return(get_wooldridge(df, boots = FALSE))
}
bootstrap <- function(B){
  models <- c("A", "B", "C")
  Ns <- c(20,40,100)
  res <- setNames(data.frame(matrix(0, ncol = 6, nrow = B*length(Ns))), 
                       c("N", "T", "model", "beta", "gamma1", "gamma2"))
  aux <- 1
  for (b in 1:B){
    for (N in Ns){
      for(model in models){
        res[aux, ] <- c(N, 10, model, get_bt_iteration(N, 10, model))
        aux <- aux + 1
      }
    }
  }
  res <- res %>% mutate(across(!model, as.numeric)) %>% 
    group_by(N, model) %>% 
    summarise(beta = paste(t.test(beta, conf.level = 0.95)$conf.int[1], 
                           t.test(beta, conf.level = 0.95)$conf.int[2], 
                                                    sep = " - "), 
              gamma1 = paste(t.test(gamma1, conf.level = 0.95)$conf.int[1], 
                             t.test(gamma1, conf.level = 0.95)$conf.int[2], 
                             sep = " - "), 
              gamma2 = paste(t.test(gamma2, conf.level = 0.95)$conf.int[1], 
                             t.test(gamma2, conf.level = 0.95)$conf.int[2], 
                             sep = " - "))
  return(res)
}

In [5]:
montecarlo(S = 1000)

[1m[22m`summarise()` has grouped output by 'N'. You can override using the `.groups`
argument.


N,model,sesgo_medio_beta,sesgo_medio_gamma1,sesgo_medio_gamma2,sesgo_mediano_beta,sesgo_mediano_gamma1,sesgo_mediano_gamma2,desvio_beta,desvio_gamma1,desvio_gamma2,rmse_beta,rmse_gamma_1,rmse_gamma_2,desvio_medio_abs_beta,desvio_medio_abs_gamma_1,desvio_medio_abs_gamma_2
<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
20,A,0.0241694849,-0.3045436,-0.29505421,0.011263258,-0.3568761,-0.339911,1.225281e-15,1.697488e-15,9.619684e-16,0.0007643062,0.0096305146,0.009330433,3.874678e-17,5.3679280000000004e-17,3.042011e-17
20,B,0.0149022653,0.0232771,0.07729486,0.012612075,-0.206217,-0.1833151,3.054425e-16,8.074917000000001e-17,4.704517e-16,0.000471251,0.0007360866,0.002444278,9.658940000000001e-18,2.553513e-18,1.487699e-17
20,C,-0.0210872097,-0.2416354,-0.1788,-0.021408449,-0.3692672,-0.3419225,6.424825e-16,9.531913e-16,9.338817e-16,0.0006668361,0.0076411828,0.005654152,2.0317080000000002e-17,3.014256e-17,2.953193e-17
40,A,0.0007902622,-0.3747803,-0.37394857,0.010422218,-0.3940861,-0.3894861,1.604451e-15,1.741373e-15,7.934484e-16,2.499028e-05,0.0118515938,0.011825292,5.0737190000000006e-17,5.5067060000000006e-17,2.5091040000000002e-17
40,B,-0.0043876853,-0.2114296,-0.21662711,-0.004776185,-0.247197,-0.250653,1.730841e-15,1.683445e-15,5.108263e-16,0.0001387508,0.0066859916,0.006850351,5.4734000000000005e-17,5.3235190000000006e-17,1.615375e-17
40,C,-0.0007113491,-0.3681212,-0.36597157,0.010947011,-0.3925703,-0.383963,6.301946e-16,1.755417e-18,8.355784e-16,2.249483e-05,0.0116410143,0.011573037,1.9928500000000003e-17,5.551114999999999e-20,2.642331e-17
100,A,0.0015004521,-0.4054145,-0.4036448,-0.005822889,-0.4093798,-0.4086708,2.1065e-15,2.457583e-16,3.7917e-16,4.744846e-05,0.012820331,0.012764369,6.661338000000001e-17,7.771561e-18,1.199041e-17
100,B,0.0061080941,-0.2642993,-0.2672137,0.01312613,-0.2776974,-0.268393,3.514344e-15,4.915167e-17,5.617334e-17,0.0001931549,0.0083578788,0.008450039,1.111333e-16,1.554312e-18,1.776357e-18
100,C,-0.0073742081,-0.4052786,-0.40734989,-0.002892267,-0.4114613,-0.4065668,1.488593e-15,1.527213e-16,8.777084e-17,0.0002331929,0.0128160354,0.012881535,4.7073460000000006e-17,4.8294700000000004e-18,2.775558e-18


In [15]:
bootstrap(B = 1000)

[1m[22m`summarise()` has grouped output by 'N'. You can override using the `.groups`
argument.


N,model,beta,gamma1,gamma2
<dbl>,<chr>,<chr>,<chr>,<chr>
20,A,0.942726726662758 - 0.974482112022787,0.386125019480433 - 0.403031088778777,0.382355251677562 - 0.399844054950864
20,B,0.983825601804865 - 1.00696275370419,0.4213169985221 - 0.439095522841933,0.414390225560934 - 0.432836033179865
20,C,0.97450692678223 - 1.0020143508821,0.326951818960078 - 0.344049703093458,0.329376065797398 - 0.34657992870442
40,A,0.962441259867782 - 0.983173414351192,0.359411627448733 - 0.371437740605436,0.359955157604336 - 0.37208840547083
40,B,0.987416317209452 - 1.00345752398281,0.411808109584478 - 0.424621621317865,0.413198934519474 - 0.4252195181738
40,C,0.975269889419827 - 0.99496729353996,0.322627867851751 - 0.334721296332131,0.321170714880369 - 0.332914296847212
100,A,0.963954964507596 - 0.977399854051773,0.354512251462243 - 0.361790500751028,0.353532539710579 - 0.360670011326123
100,B,0.988259456439461 - 0.998416239351141,0.40271729179876 - 0.410557879800307,0.404124409095184 - 0.412233967561336
100,C,0.986255820549675 - 0.998463534022802,0.313618671780353 - 0.321025346000642,0.316842028882268 - 0.324286437458612
