Skip to content

Commit

Permalink
Merge pull request #134 from DeclareDesign/lukesonnet/cran_final
Browse files Browse the repository at this point in the history
Final CRAN prep
  • Loading branch information
lukesonnet committed Feb 15, 2018
2 parents bdcbc7f + 9eae931 commit e92cdbd
Show file tree
Hide file tree
Showing 13 changed files with 29 additions and 49 deletions.
3 changes: 0 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ Suggests:
RcppEigen,
fabricatr,
randomizr (>= 0.8),
knitr,
rmarkdown,
margins,
prediction
Enhances: broom, texreg
VignetteBuilder: knitr
22 changes: 12 additions & 10 deletions R/S3_print.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,35 @@ print.lm_robust <-
#' @export
print.summary.lm_robust <-
function(
x,
digits = max(3L, getOption("digits") - 3L),
...) {

cat("\nCall:\n", paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
"\n\n", sep = "")
x,
digits = max(3L, getOption("digits") - 3L),
...) {
cat(
"\nCall:\n", paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
"\n\n", sep = ""
)
if (x$weighted) {
cat("Weighted, ")
}
cat("Standard error type = ", x$se_type, "\n")

if (x$rank < x$k) {
singularities <- x$k - x$rank
cat("\nCoefficients: (", singularities, " not defined because the design matrix is rank deficient)\n",
sep = "")
cat(
"\nCoefficients: (", singularities, " not defined because the design matrix is rank deficient)\n",
sep = ""
)
} else {
cat("\nCoefficients:\n")
}

print(x$coefficients, digits = digits)

if (!is.null(x$fstatistic)) {

cat(
"\nMultiple R-squared: ", formatC(x$r.squared, digits = digits),
",\tAdjusted R-squared: ", formatC(x$adj.r.squared, digits = digits),
"\nF-statistic:", formatC(x$fstatistic[1L],digits = digits),
"\nF-statistic:", formatC(x$fstatistic[1L], digits = digits),
"on", x$fstatistic[2L], "and", x$fstatistic[3L],
"DF, p-value:",
format.pval(pf(
Expand Down
2 changes: 1 addition & 1 deletion R/estimatr_lm_lin.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#'
#' @param formula an object of class formula, as in \code{\link{lm}}, such as
#' \code{Y ~ Z} with only one variable on the right-hand side, the treatment
#' @param covariates a right-sided formula with pre-treatment covaraites on
#' @param covariates a right-sided formula with pre-treatment covariates on
#' the right hand side, such as \code{ ~ x1 + x2 + x3}.
#' @param data A \code{data.frame}
#' @param weights the bare (unquoted) names of the weights variable in the
Expand Down
2 changes: 1 addition & 1 deletion man/lm_lin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion tests/testthat/test-horvitz-thompson.R
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,6 @@ test_that("Works without variation in treatment", {
horvitz_thompson(y ~ t, condition_pr_mat = cpm),
NA
)

})

test_that("multi-valued treatments not allowed in declaration", {
Expand Down
12 changes: 5 additions & 7 deletions tests/testthat/test-lm-robust.R
Original file line number Diff line number Diff line change
Expand Up @@ -286,17 +286,16 @@ test_that("lm robust works with rank-deficient X", {
})

test_that("r squared is right", {

lmo <- summary(lm(mpg ~ hp, mtcars))
lmow <- summary(lm(mpg ~ hp, mtcars, weights = wt))
lmon <- summary(lm(mpg ~ hp-1, mtcars))
lmown <- summary(lm(mpg ~ hp-1, mtcars, weights = wt))
lmon <- summary(lm(mpg ~ hp - 1, mtcars))
lmown <- summary(lm(mpg ~ hp - 1, mtcars, weights = wt))

lmro <- lm_robust(mpg ~ hp, mtcars)
lmrow <- lm_robust(mpg ~ hp, mtcars, weights = wt)
lmron <- lm_robust(mpg ~ hp-1, mtcars)
lmrown <- lm_robust(mpg ~ hp-1, mtcars, weights = wt)
lmrclust <- lm_robust(mpg ~ hp-1, mtcars, weights = wt, clusters = carb) # for good measure
lmron <- lm_robust(mpg ~ hp - 1, mtcars)
lmrown <- lm_robust(mpg ~ hp - 1, mtcars, weights = wt)
lmrclust <- lm_robust(mpg ~ hp - 1, mtcars, weights = wt, clusters = carb) # for good measure

expect_equal(
c(lmo$r.squared, lmo$adj.r.squared, lmo$fstatistic),
Expand All @@ -323,4 +322,3 @@ test_that("r squared is right", {
c(lmrclust$r.squared, lmrclust$adj.r.squared, lmrclust$fstatistic)
)
})

17 changes: 4 additions & 13 deletions tests/testthat/test-lm-robust_margins.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ library(margins)

mv <- c("AME", "SE", "z", "p")

test_that("lm robust can work with margins",{

test_that("lm robust can work with margins", {
x <- lm(mpg ~ cyl * hp + wt, data = mtcars)
lmr <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars)

Expand Down Expand Up @@ -51,11 +50,9 @@ test_that("lm robust can work with margins",{
expect_true(!any(is.na(lmrc)))
lmrc <- summary(margins(lmr_class, vce = "simulation", iterations = 10L))
expect_true(!any(is.na(lmrc)))

})

test_that("lm robust + weights can work with margins",{

test_that("lm robust + weights can work with margins", {
x <- lm(mpg ~ cyl * hp, data = mtcars, weights = wt)
x2 <- lm_robust(mpg ~ cyl * hp, data = mtcars, weights = wt, se_type = "classical")
expect_equal(marginal_effects(x), marginal_effects(x2))
Expand All @@ -71,10 +68,9 @@ test_that("lm robust + weights can work with margins",{
)
# Have to round quite a bit!
expect_equal(lmc, lmr)

})

test_that("lm robust + cluster can work with margins",{
test_that("lm robust + cluster can work with margins", {

# works but throws a lot of warnings
x <- lm(mpg ~ cyl * hp + wt, data = mtcars)
Expand All @@ -92,12 +88,10 @@ test_that("lm robust + cluster can work with margins",{
expect_true(
!any(lmc[, 2] == lmr[, 2])
)

})


test_that("lm lin can work with margins",{

test_that("lm lin can work with margins", {
data("alo_star_men")
lml <- lm_lin(GPA_year1 ~ ssp, ~ gpa0, data = alo_star_men, se_type = "classical")

Expand All @@ -112,7 +106,4 @@ test_that("lm lin can work with margins",{
round(lml_sum[, 4], 5),
round(lmo_sum[, 4], 5)
)


})

1 change: 0 additions & 1 deletion tests/testthat/test-na-omit-details.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ test_that("Row names are set correctly", {
})

test_that("Logic for nested dfs and lists holds", {

df$X <- list(x = c(NA, 2:nrow(df)))
df$Xmat <- matrix(rep(c(1, NA, 3:nrow(df)), 2), nrow(df))

Expand Down
1 change: 0 additions & 1 deletion tests/testthat/test-s3-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ test_that("coef and confint work", {
nobs(summary(lmro))
)
)

})

test_that("predict works", {
Expand Down
4 changes: 0 additions & 4 deletions tests/testthat/test-texreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,4 @@ test_that("texreg extension works", {
treg,
"texreg"
)


})


3 changes: 1 addition & 2 deletions tests/testthat/test-zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ test_that("onLoad makes generics if texreg is present", {

e <- environment(.onLoad)

environment(.onLoad) <- new.env(parent=parent.env(e))
environment(.onLoad) <- new.env(parent = parent.env(e))
environment(.onLoad)$extract.lm_robust <- e$extract.lm_robust

expect_null(.onLoad("estimatr", "estimatr"))
environment(.onLoad) <- e

})
8 changes: 4 additions & 4 deletions vignettes/mathematical-notes.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ The default estimators have been selected for efficiency in large samples and lo
\widehat{\beta} =(\mathbf{X}^{\top}\mathbf{X})^{-1}\mathbf{X}^{\top}\mathbf{y}
\]

Our algorithm solves the least squares problem using a rank-revealing column-pivoting QR factorization that eliminates the need to invert $(\mathbf{X}^{\top}\mathbf{X})^{-1}$ explicitly and behaves much like the default `lm` function in R. However, when $\mathbf{X}$ is rank deficient, there are certain conditions under which the QR factorization algorithm we use, from the Eigen C++ library, drops different coefficients from the output than the default `lm` function. In general, users should avoid specifing models with rank-deficiencies. In fact, if users are certain their data are not rank deficient, they can improve the speed of `lm_robust` by setting `try_cholesky = TRUE`. This replaces the QR factorization with a Cholesky factorization that is only guaranteed to work $\mathbf{X}$ is of full rank.
Our algorithm solves the least squares problem using a rank-revealing column-pivoting QR factorization that eliminates the need to invert $(\mathbf{X}^{\top}\mathbf{X})^{-1}$ explicitly and behaves much like the default `lm` function in R. However, when $\mathbf{X}$ is rank deficient, there are certain conditions under which the QR factorization algorithm we use, from the Eigen C++ library, drops different coefficients from the output than the default `lm` function. In general, users should avoid specifying models with rank-deficiencies. In fact, if users are certain their data are not rank deficient, they can improve the speed of `lm_robust` by setting `try_cholesky = TRUE`. This replaces the QR factorization with a Cholesky factorization that is only guaranteed to work $\mathbf{X}$ is of full rank.

#### Weights

Expand Down Expand Up @@ -97,7 +97,7 @@ For cluster-robust inference, we provide several estimators that are essentially
* $\mathbf{e}_s$ is the elements of the residual matrix $\mathbf{e}$ in cluster $s$, or $\mathbf{e}_s = \mathbf{y}_s - \mathbf{X}_s \widehat{\beta}$
* $\mathbf{A}_s$ and $\mathbf{p}$ are defined in the notes below

**Further notes on CR2:** The variance estimator we implement is shown in equations (4) and (5) in @pustejovskytipton2016 and equation (11), where we set $\mathbf{\Phi}$ to be $I$, following @bellmccaffrey2002. Furthernote that the @pustejovskytipton2016 CR2 estimator and the @bellmccaffrey2002 estimator are identical when $\mathbf{B_s}$ is full rank. It could be rank-deficient if there were dummy variables, or fixed effects, that were also your clusters. In this case, the original @bellmccaffrey2002 estimator could not be computed. You can see the simpler @bellmccaffrey2002 estimator written up plainly on page 709 of @imbenskolesar2016 along with the degrees of freedom denoted as $K_{BM}$.
**Further notes on CR2:** The variance estimator we implement is shown in equations (4) and (5) in @pustejovskytipton2016 and equation (11), where we set $\mathbf{\Phi}$ to be $I$, following @bellmccaffrey2002. Further note that the @pustejovskytipton2016 CR2 estimator and the @bellmccaffrey2002 estimator are identical when $\mathbf{B_s}$ is full rank. It could be rank-deficient if there were dummy variables, or fixed effects, that were also your clusters. In this case, the original @bellmccaffrey2002 estimator could not be computed. You can see the simpler @bellmccaffrey2002 estimator written up plainly on page 709 of @imbenskolesar2016 along with the degrees of freedom denoted as $K_{BM}$.

In the CR2 variance calculation, we get $\mathbf{A}_s$ as follows:

Expand Down Expand Up @@ -268,7 +268,7 @@ $$

**Clustered designs**

For clustered designs, we use the following collpased estimator by setting `collapsed = TRUE`. Here, $M$ is the total number of clusters, $y_k$ is the total of the outcomes $y_i$ for all $i$ units in cluster $k$, $\pi_zk$ is the marginal probability of cluster $k$ being in condition $z \in \{0, 1\}$, and $z_k$ and $\pi_{zkwl}$ are defined analogously. Warning! If one passes `condition_pr_mat` to `horvitz_thompson` for a clustered design, but not `clusters`, the function will not use the collapsed estimator the the variance estimate will be innacurate.
For clustered designs, we use the following collapsed estimator by setting `collapsed = TRUE`. Here, $M$ is the total number of clusters, $y_k$ is the total of the outcomes $y_i$ for all $i$ units in cluster $k$, $\pi_zk$ is the marginal probability of cluster $k$ being in condition $z \in \{0, 1\}$, and $z_k$ and $\pi_{zkwl}$ are defined analogously. Warning! If one passes `condition_pr_mat` to `horvitz_thompson` for a clustered design, but not `clusters`, the function will not use the collapsed estimator the the variance estimate will be inaccurate.

$$
\begin{aligned}
Expand Down Expand Up @@ -301,6 +301,6 @@ Theory on hypothesis testing with the Horvitz-Thompson estimator is yet to be de
\mathrm{CI}^{1-\alpha} = \left(\widehat{\tau} + z_{\alpha/2} \sqrt{\widehat{\mathbb{V}}[\widehat{\tau}]}, \widehat{\tau} + z_{1 - \alpha/2} \sqrt{\widehat{\mathbb{V}}[\widehat{\tau}]}\right)
\]

The associated p-values for a two-sided null hypothesis test are computed using a normal disribution and the aforementioned significance level $\alpha$.
The associated p-values for a two-sided null hypothesis test are computed using a normal distribution and the aforementioned significance level $\alpha$.

# References
2 changes: 1 addition & 1 deletion vignettes/simulations-ols-variance.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ The function
\[
\epsilon_i \overset{i.i.d.}{\sim} N(0, \sigma^2),
\]
encodes the assumption of homoskedaticity. Because of these homoskedastic errors, we know that the true variance of the coefficients with fixed covariates is
encodes the assumption of homoskedasticity. Because of these homoskedastic errors, we know that the true variance of the coefficients with fixed covariates is

\[
\mathbb{V}[\widehat{\beta}] = \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1},
Expand Down

0 comments on commit e92cdbd

Please sign in to comment.