In [8]:
# Verificar e instalar librerías si es necesario
paquetes <- c("glmnet", "stats", "MASS", "dplyr", "lmtest", "sandwich")

for (pkg in paquetes) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}

cat("✓ Todas las librerías están listas\n")

Your code contains a unicode char which cannot be displayed in your
current locale and R will silently convert it to an escaped form when the
R kernel executes this code. This can lead to subtle errors if you use
such chars to do comparisons. For more information, please see
https://github.com/IRkernel/repr/wiki/Problems-with-unicode-on-windows

<U+2713> Todas las librerías están listas


In [10]:
## ----- Fix UTF-8 (evita alerta IRkernel/Windows) -----
options(encoding = "UTF-8")
Sys.setenv(LANG = "en_US.UTF-8")

## ----- Repos confiable -----
repos <- getOption("repos")
if (is.null(repos) || repos["CRAN"] == "@CRAN@") {
  options(repos = c(CRAN = "https://cloud.r-project.org"))
}

## ----- Helper: instalar en binario, sin vignettes -----
safe_install <- function(pkgs) {
  pkgs <- setdiff(pkgs, rownames(installed.packages()))
  if (length(pkgs)) {
    install.packages(pkgs, type = "binary", dependencies = FALSE)
  }
}

## 1) Instala primero Matrix (clave para glmnet)
safe_install(c("Matrix"))

## Verifica Matrix; si falla aquí, actualiza R (recomendado 4.3+)
if (!requireNamespace("Matrix", quietly = TRUE)) {
  stop("No se pudo instalar 'Matrix'. Actualiza R a 4.3+ y vuelve a ejecutar.")
}

## 2) Instala el resto en binario
safe_install(c("glmnet","MASS","dplyr","lmtest","sandwich","stats"))

## 3) Carga paquetes
pkgs <- c("Matrix","glmnet","MASS","dplyr","lmtest","sandwich","stats")
invisible(lapply(pkgs, require, character.only = TRUE))

## 4) Muestra versiones
cat("✓ Paquetes listos\n")
print(sapply(pkgs, function(p) as.character(packageVersion(p))))


Your code contains a unicode char which cannot be displayed in your
current locale and R will silently convert it to an escaped form when the
R kernel executes this code. This can lead to subtle errors if you use
such chars to do comparisons. For more information, please see
https://github.com/IRkernel/repr/wiki/Problems-with-unicode-on-windows

<U+2713> Paquetes listos
    Matrix     glmnet       MASS      dplyr     lmtest   sandwich      stats 
   "1.5.4"    "4.1.7" "7.3.58.3"    "1.1.2"   "0.9.40"    "3.1.1"    "4.1.3" 


In [12]:
set.seed(42)

n <- 1000
X1 <- rnorm(n, 0, 1)
X3 <- rnorm(n, 0, 1)
X2 <- rbinom(n, 1, 0.5)
X4 <- rbinom(n, 1, 0.5)
D  <- rbinom(n, 1, 0.5)
epsilon <- rnorm(n, 0, 1)

Y <- 2*D + 0.5*X1 - 0.3*X2 + 0.2*X3 + epsilon


In [14]:
df <- data.frame(Y, D, X1, X2, X3, X4)

cat(paste(rep("=", 60), collapse = ""), "\n")
cat("3.1 DATA SIMULATION - COMPLETADO\n")
cat(paste(rep("=", 60), collapse = ""), "\n")

head(df, 5)
dim(df)


3.1 DATA SIMULATION - COMPLETADO


Unnamed: 0_level_0,Y,D,X1,X2,X3,X4
Unnamed: 0_level_1,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>
1,1.14092427,0,1.3709584,1,2.3250585,1
2,0.218681118,1,-0.5646982,1,0.5241222,1
3,0.26161029,0,0.3631284,0,0.9707334,0
4,-0.001136103,0,0.6328626,1,0.3769734,1
5,-0.119613144,0,0.4042683,0,-0.9959334,0


In [16]:
tratamiento <- subset(df, D == 1)
control <- subset(df, D == 0)

balance_check <- function(variable) {
  t <- t.test(tratamiento[[variable]], control[[variable]])
  cat(variable, ":\n")
  cat("  Media tratamiento:", mean(tratamiento[[variable]]), "\n")
  cat("  Media control:", mean(control[[variable]]), "\n")
  cat("  Diferencia:", mean(tratamiento[[variable]]) - mean(control[[variable]]), "\n")
  cat("  p-valor:", t$p.value, "\n")
  cat(paste(rep("-", 40), collapse = ""), "\n")
}

cat("\nBALANCE CHECK - Comparación de medias\n")
for (cov in c("X1", "X2", "X3", "X4")) {
  balance_check(cov)
}



BALANCE CHECK - Comparación de medias
X1 :
  Media tratamiento: -0.02217622 
  Media control: -0.02945807 
  Diferencia: 0.007281841 
  p-valor: 0.9086493 
---------------------------------------- 
X2 :
  Media tratamiento: 0.5210421 
  Media control: 0.5269461 
  Diferencia: -0.005904024 
  p-valor: 0.8519081 
---------------------------------------- 
X3 :
  Media tratamiento: -0.02167226 
  Media control: 0.01097098 
  Diferencia: -0.03264324 
  p-valor: 0.6008966 
---------------------------------------- 
X4 :
  Media tratamiento: 0.4869739 
  Media control: 0.5209581 
  Diferencia: -0.03398414 
  p-valor: 0.2829707 
---------------------------------------- 


In [18]:
modelo_simple <- lm(Y ~ D, data = df)
cat("=== REGRESIÓN SIMPLE (Y ~ D) ===\n")
cat("Coeficiente de D (ATE):", round(coef(modelo_simple)["D"], 3), "\n")
cat("Error estándar:", round(sqrt(diag(vcov(modelo_simple)))["D"], 3), "\n\n")


=== REGRESIÓN SIMPLE (Y ~ D) ===
Coeficiente de D (ATE): 2.075 
Error estándar: 0.074 



In [20]:
modelo_completo <- lm(Y ~ D + X1 + X2 + X3 + X4, data = df)
cat("=== REGRESIÓN CON CONTROLES (Y ~ D + X1 + X2 + X3 + X4) ===\n")
cat("Coeficiente de D (ATE):", round(coef(modelo_completo)["D"], 3), "\n")
cat("Error estándar:", round(sqrt(diag(vcov(modelo_completo)))["D"], 3), "\n\n")


=== REGRESIÓN CON CONTROLES (Y ~ D + X1 + X2 + X3 + X4) ===
Coeficiente de D (ATE): 2.075 
Error estándar: 0.064 



In [22]:
cat("=== COMPARACIÓN ===\n")
cat("Diferencia en ATE:", coef(modelo_completo)["D"] - coef(modelo_simple)["D"], "\n")
cat("Diferencia en SE:",
    sqrt(diag(vcov(modelo_completo)))["D"] - sqrt(diag(vcov(modelo_simple)))["D"], "\n")


=== COMPARACIÓN ===
Diferencia en ATE: 0.0001191611 
Diferencia en SE: -0.009431047 


In [24]:
library(glmnet)

chernozhukov_lambda <- function(n, p, confidence = 0.95) {
  c <- 1.1
  alpha <- 1 - confidence
  lambda_opt <- c * sqrt(n) * qnorm(1 - alpha / (2 * p))
  return(lambda_opt)
}

X_cov <- as.matrix(df[, c("X1", "X2", "X3", "X4")])
y <- df$Y

n <- nrow(df)
p <- ncol(X_cov)
lambda_ch <- chernozhukov_lambda(n, p)

cat("\n", paste(rep("=", 70), collapse = ""), "\n")
cat("3.3 LASSO CON PARÁMETROS TEÓRICOS (Chernozhukov et al.)\n")
cat(paste(rep("=", 70), collapse = ""), "\n")

cat("Tamaño de muestra (n):", n, "\n")
cat("Número de covariables (p):", p, "\n")
cat("Lambda teórico Chernozhukov:", lambda_ch, "\n")
cat("Lambda para glmnet:", lambda_ch / n, "\n\n")

# LASSO con lambda teórico
lasso_theoretical <- glmnet(X_cov, y, alpha = 1, lambda = lambda_ch / n)

# LASSO con validación cruzada
set.seed(42)
lasso_cv <- cv.glmnet(X_cov, y, alpha = 1)

cat("=== COMPARACIÓN DE MÉTODOS ===\n")
cat("Lambda validación cruzada:", lasso_cv$lambda.min, "\n")
cat("Lambda Chernozhukov:", lambda_ch / n, "\n")
cat("Ratio (CV/Chernozhukov):", lasso_cv$lambda.min / (lambda_ch / n), "\n\n")

# Coeficientes
coef_theoretical <- coef(lasso_theoretical)
coef_cv <- coef(lasso_cv, s = "lambda.min")

print(coef_theoretical)
print(coef_cv)



3.3 LASSO CON PARÁMETROS TEÓRICOS (Chernozhukov et al.)
Tamaño de muestra (n): 1000 
Número de covariables (p): 4 
Lambda teórico Chernozhukov: 86.88282 
Lambda para glmnet: 0.08688282 

=== COMPARACIÓN DE MÉTODOS ===
Lambda validación cruzada: 0.001836725 
Lambda Chernozhukov: 0.08688282 
Ratio (CV/Chernozhukov): 0.02114026 

5 x 1 sparse Matrix of class "dgCMatrix"
                     s0
(Intercept)  0.86600576
X1           0.44660428
X2          -0.09420331
X3           0.07360008
X4           .         
5 x 1 sparse Matrix of class "dgCMatrix"
                     s1
(Intercept)  1.00464016
X1           0.52911764
X2          -0.27630237
X3           0.16695877
X4          -0.08053006
