lectures/applications/applications.bib

@article{kleinberg2017,
  author   = {Kleinberg, Jon and Lakkaraju, Himabindu and Leskovec, Jure and Ludwig, Jens and Mullainathan, Sendhil},
  title    = {{Human Decisions and Machine Predictions*}},
  journal  = {The Quarterly Journal of Economics},
  volume   = {133},
  number   = {1},
  pages    = {237-293},
  year     = {2017},
  month    = {08},
  abstract = {{Can machine learning improve human decision making? Bail decisions provide a good test case. Millions of times each year, judges make jail-or-release decisions that hinge on a prediction of what a defendant would do if released. The concreteness of the prediction task combined with the volume of data available makes this a promising machine-learning application. Yet comparing the algorithm to judges proves complicated. First, the available data are generated by prior judge decisions. We only observe crime outcomes for released defendants, not for those judges detained. This makes it hard to evaluate counterfactual decision rules based on algorithmic predictions. Second, judges may have a broader set of preferences than the variable the algorithm predicts; for instance, judges may care specifically about violent crimes or about racial inequities. We deal with these problems using different econometric strategies, such as quasi-random assignment of cases to judges. Even accounting for these concerns, our results suggest potentially large welfare gains: one policy simulation shows crime reductions up to 24.7\\% with no change in jailing rates, or jailing rate reductions up to 41.9\\% with no increase in crime rates. Moreover, all categories of crime, including violent crimes, show reductions; these gains can be achieved while simultaneously reducing racial disparities. These results suggest that while machine learning can be valuable, realizing this value requires integrating these tools into an economic framework: being clear about the link between predictions and decisions; specifying the scope of payoff functions; and constructing unbiased decision counterfactuals. JEL Codes: C10, C55, K40.}},
  issn     = {0033-5533},
  doi      = {10.1093/qje/qjx032},
  url      = {https://dx.doi.org/10.1093/qje/qjx032},
  eprint   = {http://oup.prod.sis.lan/qje/article-pdf/133/1/237/24246094/qjx032.pdf}
}


@article{kleinberg2015,
  author  = {Kleinberg, Jon and Ludwig, Jens and Mullainathan, Sendhil and Obermeyer, Ziad},
  title   = {Prediction Policy Problems},
  journal = {American Economic Review},
  volume  = {105},
  number  = {5},
  year    = {2015},
  month   = {May},
  pages   = {491-95},
  doi     = {10.1257/aer.p20151023},
  url     = {http://www.aeaweb.org/articles?id=10.1257/aer.p20151023}
}

@article{chernozhukov2018,
  author   = {Chernozhukov, Victor and Chetverikov, Denis and Demirer, Mert and Duflo, Esther and Hansen, Christian and Newey, Whitney and Robins, James},
  title    = {Double/debiased machine learning for treatment and structural parameters},
  journal  = {The Econometrics Journal},
  volume   = {21},
  number   = {1},
  year     = {2018},
  pages    = {C1-C68},
  doi      = {10.1111/ectj.12097},
  url      = {https://onlinelibrary.wiley.com/doi/abs/10.1111/ectj.12097},
  eprint   = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/ectj.12097},
  abstract = {Summary We revisit the classic semi-parametric problem of inference on a low-dimensional parameter θ0 in the presence of high-dimensional nuisance parameters η0. We depart from the classical setting by allowing for η0 to be so high-dimensional that the traditional assumptions (e.g. Donsker properties) that limit complexity of the parameter space for this object break down. To estimate η0, we consider the use of statistical or machine learning (ML) methods, which are particularly well suited to estimation in modern, very high-dimensional cases. ML methods perform well by employing regularization to reduce variance and trading off regularization bias with overfitting in practice. However, both regularization bias and overfitting in estimating η0 cause a heavy bias in estimators of θ0 that are obtained by naively plugging ML estimators of η0 into estimating equations for θ0. This bias results in the naive estimator failing to be consistent, where N is the sample size. We show that the impact of regularization bias and overfitting on estimation of the parameter of interest θ0 can be removed by using two simple, yet critical, ingredients: (1) using Neyman-orthogonal moments/scores that have reduced sensitivity with respect to nuisance parameters to estimate θ0; (2) making use of cross-fitting, which provides an efficient form of data-splitting. We call the resulting set of methods double or debiased ML (DML). We verify that DML delivers point estimators that concentrate in an -neighbourhood of the true parameter values and are approximately unbiased and normally distributed, which allows construction of valid confidence statements. The generic statistical theory of DML is elementary and simultaneously relies on only weak theoretical requirements, which will admit the use of a broad array of modern ML methods for estimating the nuisance parameters, such as random forests, lasso, ridge, deep neural nets, boosted trees, and various hybrids and ensembles of these methods. We illustrate the general theory by applying it to provide theoretical properties of the following: DML applied to learn the main regression parameter in a partially linear regression model; DML applied to learn the coefficient on an endogenous variable in a partially linear instrumental variables model; DML applied to learn the average treatment effect and the average treatment effect on the treated under unconfoundedness; DML applied to learn the local average treatment effect in an instrumental variables setting. In addition to these theoretical applications, we also illustrate the use of DML in three empirical examples.}
}

@article{chernozhukov2017,
  author  = {Chernozhukov, Victor and Chetverikov, Denis and Demirer, Mert and Duflo, Esther and Hansen, Christian and Newey, Whitney},
  title   = {Double/Debiased/Neyman Machine Learning of Treatment Effects},
  journal = {American Economic Review},
  volume  = {107},
  number  = {5},
  year    = {2017},
  month   = {May},
  pages   = {261-65},
  doi     = {10.1257/aer.p20171038},
  url     = {http://www.aeaweb.org/articles?id=10.1257/aer.p20171038}
}

@techreport{cddf2018,
  title       = {Generic Machine Learning Inference on Heterogenous Treatment Effects in Randomized Experimentsxo},
  author      = {Chernozhukov, Victor and Demirer, Mert and Duflo, Esther and Fernández-Val, Iván},
  institution = {National Bureau of Economic Research},
  type        = {Working Paper},
  series      = {Working Paper Series},
  number      = {24678},
  year        = {2018},
  month       = {June},
  doi         = {10.3386/w24678},
  url         = {http://www.nber.org/papers/w24678},
  abstract    = {We propose strategies to estimate and make inference on key features of heterogeneous effects in randomized experiments. These key features include best linear predictors of the effects using machine learning proxies, average effects sorted by impact groups, and average characteristics of most and least impacted units. The approach is valid in high dimensional settings, where the effects are proxied by machine learning methods. We post-process these proxies into the estimates of the key features. Our approach is generic, it can be used in conjunction with penalized methods, deep and shallow neural networks, canonical and new random forests, boosted trees, and ensemble methods. It does not rely on strong assumptions. In particular, we don’t require conditions for consistency of the machine learning methods. Estimation and inference relies on repeated data splitting to avoid overfitting and achieve validity. For inference, we take medians of p-values and medians of confidence intervals, resulting from many different data splits, and then adjust their nominal level to guarantee uniform validity. This variational inference method is shown to be uniformly valid and quantifies the uncertainty coming from both parameter estimation and data splitting. An empirical application to the impact of micro-credit on economic development illustrates the use of the approach in randomized experiments.}
}

@article{wager2018,
  author    = {Stefan Wager and Susan Athey},
  title     = {Estimation and Inference of Heterogeneous Treatment Effects using Random Forests},
  journal   = {Journal of the American Statistical Association},
  volume    = {0},
  number    = {0},
  pages     = {1-15},
  year      = {2018},
  publisher = {Taylor & Francis},
  doi       = {10.1080/01621459.2017.1319839},
  url       = {
        https://doi.org/10.1080/01621459.2017.1319839

},
  eprint    = {
        https://doi.org/10.1080/01621459.2017.1319839

}
}

@article{athey2016b,
  author    = {Athey, Susan and Imbens, Guido},
  title     = {Recursive partitioning for heterogeneous causal effects},
  volume    = {113},
  number    = {27},
  pages     = {7353--7360},
  year      = {2016},
  doi       = {10.1073/pnas.1510489113},
  publisher = {National Academy of Sciences},
  abstract  = {In this paper we propose methods for estimating heterogeneity in causal effects in experimental and observational studies and for conducting hypothesis tests about the magnitude of differences in treatment effects across subsets of the population. We provide a data-driven approach to partition the data into subpopulations that differ in the magnitude of their treatment effects. The approach enables the construction of valid confidence intervals for treatment effects, even with many covariates relative to the sample size, and without {\textquotedblleft}sparsity{\textquotedblright} assumptions. We propose an {\textquotedblleft}honest{\textquotedblright} approach to estimation, whereby one sample is used to construct the partition and another to estimate treatment effects for each subpopulation. Our approach builds on regression tree methods, modified to optimize for goodness of fit in treatment effects and to account for honest estimation. Our model selection criterion anticipates that bias will be eliminated by honest estimation and also accounts for the effect of making additional splits on the variance of treatment effect estimates within each subpopulation. We address the challenge that the {\textquotedblleft}ground truth{\textquotedblright} for a causal effect is not observed for any individual unit, so that standard approaches to cross-validation must be modified. Through a simulation study, we show that for our preferred method honest estimation results in nominal coverage for 90\% confidence intervals, whereas coverage ranges between 74\% and 84\% for nonhonest approaches. Honest estimation requires estimating the model with a smaller sample size; the cost in terms of mean squared error of treatment effects for our preferred method ranges between 7{\textendash}22\%.},
  issn      = {0027-8424},
  url       = {http://www.pnas.org/content/113/27/7353},
  eprint    = {http://www.pnas.org/content/113/27/7353.full.pdf},
  journal   = {Proceedings of the National Academy of Sciences}
}

@techreport{alatas2011,
  title       = {Program Keluarga Harapan : impact evaluation of Indonesia's Pilot Household Conditional Cash Transfer Program},
  author      = {Alatas, Vivi and Cahyadi, Nur and Ekasari, Elisabeth and Harmoun, Sarah and Hidayat, Budi and Janz, Edgar and Jellema, Jon and Tuhiman, H and Wai-Poi, M},
  institution = {World Bank},
  url         = {http://documents.worldbank.org/curated/en/589171468266179965/Program-Keluarga-Harapan-impact-evaluation-of-Indonesias-Pilot-Household-Conditional-Cash-Transfer-Program},
  year        = {2011}
}

@article{triyana2016,
  author  = {Triyana, Margaret},
  title   = {Do Health Care Providers Respond to Demand-Side Incentives? Evidence from Indonesia},
  journal = {American Economic Journal: Economic Policy},
  volume  = {8},
  number  = {4},
  year    = {2016},
  month   = {November},
  pages   = {255-88},
  doi     = {10.1257/pol.20140048},
  url     = {http://www.aeaweb.org/articles?id=10.1257/pol.20140048}
}

  @article{hdm,
  title   = {{hdm}: High-Dimensional Metrics},
  author  = {Victor Chernozhukov and Chris Hansen and Martin Spindler},
  journal = {R Journal},
  year    = {2016},
  volume  = {8},
  number  = {2},
  pages   = {185-199},
  url     = {https://journal.r-project.org/archive/2016/RJ-2016-040/index.html}
}

@article{dieterich2016,
  title   = {COMPAS risk scales: Demonstrating accuracy equity and predictive parity},
  author  = {Dieterich, William and Mendoza, Christina and Brennan, Tim},
  journal = {Northpoint Inc},
  year    = {2016}
}

@inbook{belloni2011,
  author    = {Belloni, Alexandre
and Chernozhukov, Victor},
  editor    = {Alquier, Pierre
and Gautier, Eric
and Stoltz, Gilles},
  title     = {High Dimensional Sparse Econometric Models: An Introduction},
  booktitle = {Inverse Problems and High-Dimensional Estimation: Stats in the Ch{\^a}teau Summer School, August 31 - September 4, 2009},
  year      = {2011},
  publisher = {Springer Berlin Heidelberg},
  address   = {Berlin, Heidelberg},
  pages     = {121--156},
  abstract  = {In this chapter we discuss conceptually high dimensional sparse econometric models as well as estimation of these models using l                           1-penalization and post- l                           1-penalization methods. Focusing on linear and nonparametric regression frameworks, we discuss various econometric examples, present basic theoretical results, and illustrate the concepts and methods with Monte Carlo simulations and an empirical application. In the application, we examine and confirm the empirical validity of the Solow-Swan model for international economic growth.},
  isbn      = {978-3-642-19989-9},
  doi       = {10.1007/978-3-642-19989-9_3},
  url       = {https://doi.org/10.1007/978-3-642-19989-9_3}
}

@misc{athey2018,
  title     = {Machine learning and econometrics},
  author    = {Athey, Susan and Imbens, Guido},
  year      = {2018},
  publisher = {AEA Continuing Education },
  url       = {https://www.aeaweb.org/conference/cont-ed/2018-webcasts}
}

@article{athey2017,
  author  = {Athey, Susan and Imbens, Guido W.},
  title   = {The State of Applied Econometrics: Causality and Policy Evaluation},
  journal = {Journal of Economic Perspectives},
  volume  = {31},
  number  = {2},
  year    = {2017},
  month   = {May},
  pages   = {3-32},
  doi     = {10.1257/jep.31.2.3},
  url     = {http://www.aeaweb.org/articles?id=10.1257/jep.31.2.3}
}


@book{friedman2008,
  title     = {The elements of statistical learning},
  author    = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
  year      = {2009},
  publisher = {Springer series in statistics},
  url       = {https://web.stanford.edu/~hastie/ElemStatLearn/}
}


@book{efron2016,
  title     = {Computer age statistical inference},
  author    = {Efron, Bradley and Hastie, Trevor},
  volume    = {5},
  year      = {2016},
  url       = {https://web.stanford.edu/~hastie/CASI/},
  publisher = {Cambridge University Press}
}


@article{hornik1989,
  title    = {Multilayer feedforward networks are universal approximators},
  journal  = {Neural Networks},
  volume   = {2},
  number   = {5},
  pages    = {359 - 366},
  year     = {1989},
  issn     = {0893-6080},
  doi      = {https://doi.org/10.1016/0893-6080(89)90020-8},
  url      = {http://www.sciencedirect.com/science/article/pii/0893608089900208},
  author   = {Kurt Hornik and Maxwell Stinchcombe and Halbert White},
  keywords = {Feedforward networks, Universal approximation, Mapping networks, Network representation capability, Stone-Weierstrass Theorem, Squashing functions, Sigma-Pi networks, Back-propagation networks}
}