In [89]:
rm(list=ls())

library(DoubleML)
library(ggplot2)
library(mlr3)
library(xtable)
library(mlr3learners)
library(repr)
library(data.table)
library("readxl")
library(mvtnorm)
suppressMessages(library(tidyverse))
suppressMessages(library(parallel))
suppressMessages(library(ggplot2))
suppressMessages(library(doMC))
lgr::get_logger("mlr3")$set_threshold("warn")

numCores <- detectCores()
registerDoMC(cores = numCores)

In [90]:
# Importing control variables used for all dependant variables for Table 3 Column 3
dataset <- read_excel("Table3.xlsx")

# ALl variables used in Blau et al. (2020) Table 3, Column (3) Regression
data_unconfound <- data.table(dataset[,c("nchild","lths","scol","cold","sp_lths","sp_scol","sp_cold",
                                         "genrace","sp_genrace","region","year","age","age2","age3",
                                         "sp_age","sp_age2","sp_age3","sp_imm","yrsusa1","yrsusa2",
                                         "sp_yrsusa1","sp_yrsusa2","igenfert","ligengdp","igenlfp",
                                         "igensr","chld1","girl_lfp","girl_sr")])

# Addition of endogenous control for mother's age at first birth: "birthage"
data_confound   <- data.table(dataset[,c("nchild","lths","scol","cold","sp_lths","sp_scol","sp_cold",
                                         "genrace","sp_genrace","region","year","age","age2","age3",
                                         "sp_age","sp_age2","sp_age3","sp_imm","yrsusa1","yrsusa2",
                                         "sp_yrsusa1","sp_yrsusa2","igenfert","ligengdp","igenlfp",
                                         "igensr","chld1","girl_lfp","girl_sr","birthage")])

DML_data  <- double_ml_data_from_data_frame(data_unconfound, y_col = "nchild", d_cols = "igenfert")
set.seed(12)

In [91]:
learner <- lrn("regr.ranger", num.trees=100, min.node.size=2, max.depth=5)
g_hat <- learner$clone()                                  # use RF to estimate nuisance g() and m()
m_hat <- learner$clone()

DML_PLR  <- DoubleMLPLR$new(DML_data, g_hat, m_hat)   # initialize the DML Partially Linear Model 
DML_PLR$fit()  
params_rf_uc    <- cbind(DML_PLR$coef,DML_PLR$se)

In [92]:
learner <- lrn("regr.cv_glmnet", s="lambda.min",standardize=TRUE)
g_hat <- learner$clone()                                  # use RF to estimate nuisance g() and m()
m_hat <- learner$clone()

DML_PLR  <- DoubleMLPLR$new(DML_data, g_hat, m_hat)   # initialize the DML Partially Linear Model 
DML_PLR$fit()  
params_lasso_uc    <- cbind(DML_PLR$coef,DML_PLR$se)

In [93]:
learner <- lrn("regr.rpart")
g_hat <- learner$clone()
m_hat <- learner$clone()
DML_PLR  <- DoubleMLPLR$new(DML_data, g_hat, m_hat) 
DML_PLR$fit()
params_rt_uc    <- cbind(DML_PLR$coef,DML_PLR$se)

In [94]:
DML_data  <- double_ml_data_from_data_frame(data_confound, y_col = "nchild", d_cols = "igenfert")

learner <- lrn("regr.ranger", num.trees=100, min.node.size=2, max.depth=5)
g_hat <- learner$clone()                                  # use RF to estimate nuisance g() and m()
m_hat <- learner$clone()

DML_PLR  <- DoubleMLPLR$new(DML_data, g_hat, m_hat)   # initialize the DML Partially Linear Model 
DML_PLR$fit()  
params_rf_c <- cbind(DML_PLR$coef,DML_PLR$se)

In [95]:
learner <- lrn("regr.cv_glmnet", s="lambda.min",standardize=TRUE)
g_hat <- learner$clone()                                  # use RF to estimate nuisance g() and m()
m_hat <- learner$clone()

DML_PLR  <- DoubleMLPLR$new(DML_data, g_hat, m_hat)   # initialize the DML Partially Linear Model 
DML_PLR$fit()  
params_lasso_c <- cbind(DML_PLR$coef,DML_PLR$se)

In [96]:
learner <- lrn("regr.rpart")
g_hat <- learner$clone()
m_hat <- learner$clone()
DML_PLR  <- DoubleMLPLR$new(DML_data, g_hat, m_hat) 
DML_PLR$fit()
params_rt_c    <- cbind(DML_PLR$coef,DML_PLR$se)

In [97]:
blau <- matrix(c(0.1014,0.0278),byrow = TRUE,1,2)
#blau <- matrix(c(-0.3701,0.1444),byrow = TRUE,1,2)
results_1 <- cbind(round(cbind(blau,params_rf_uc,params_lasso_uc,params_rt_uc),3))
rownames(results_1) <- c("Total Fertility")

results_2 <- cbind(" ", " ", round(cbind(params_rf_c,params_lasso_c,params_rt_c),3))
rownames(results_2) <- c("Total Fertility with Age at First Birth")

results <- rbind(results_1,results_2)

colnames(results)[1:8] <- c("Benchmark", " ", "Random Forest", " ", "LASSO", " ", "Regression Tree", " ")

xtable(results) # results copied to latex

Unnamed: 0_level_0,Benchmark,Unnamed: 2_level_0,Random Forest,Unnamed: 4_level_0,LASSO,Unnamed: 6_level_0,Regression Tree,Unnamed: 8_level_0
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Total Fertility,0.101,0.028,0.103,0.006,0.101,0.005,0.121,0.009
Total Fertility with Age at First Birth,,,0.091,0.006,0.083,0.004,0.088,0.008
