Merge pull request #134 from DeclareDesign/lukesonnet/cran_final

Final CRAN prep
DeclareDesign · Feb 15, 2018 · e92cdbd · e92cdbd
2 parents bdcbc7f + 9eae931
commit e92cdbd
Show file tree

Hide file tree

Showing 13 changed files with 29 additions and 49 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -27,9 +27,6 @@ Suggests:
     RcppEigen,
     fabricatr,
     randomizr (>= 0.8),
-    knitr,
-    rmarkdown,
     margins,
     prediction
 Enhances: broom, texreg
-VignetteBuilder: knitr
diff --git a/R/S3_print.R b/R/S3_print.R
@@ -9,33 +9,35 @@ print.lm_robust <-
 #' @export
 print.summary.lm_robust <-
   function(
-    x,
-    digits = max(3L, getOption("digits") - 3L),
-    ...) {
-
-    cat("\nCall:\n", paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
-        "\n\n", sep = "")
+           x,
+           digits = max(3L, getOption("digits") - 3L),
+           ...) {
+    cat(
+      "\nCall:\n", paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
+      "\n\n", sep = ""
+    )
     if (x$weighted) {
       cat("Weighted, ")
     }
     cat("Standard error type = ", x$se_type, "\n")
 
     if (x$rank < x$k) {
       singularities <- x$k - x$rank
-      cat("\nCoefficients: (", singularities, " not defined because the design matrix is rank deficient)\n",
-          sep = "")
+      cat(
+        "\nCoefficients: (", singularities, " not defined because the design matrix is rank deficient)\n",
+        sep = ""
+      )
     } else {
       cat("\nCoefficients:\n")
     }
 
     print(x$coefficients, digits = digits)
 
     if (!is.null(x$fstatistic)) {
-
       cat(
         "\nMultiple R-squared: ", formatC(x$r.squared, digits = digits),
         ",\tAdjusted R-squared: ", formatC(x$adj.r.squared, digits = digits),
-        "\nF-statistic:", formatC(x$fstatistic[1L],digits = digits),
+        "\nF-statistic:", formatC(x$fstatistic[1L], digits = digits),
         "on", x$fstatistic[2L], "and", x$fstatistic[3L],
         "DF,  p-value:",
         format.pval(pf(

diff --git a/R/estimatr_lm_lin.R b/R/estimatr_lm_lin.R
@@ -6,7 +6,7 @@
 #'
 #' @param formula an object of class formula, as in \code{\link{lm}}, such as
 #' \code{Y ~ Z} with only one variable on the right-hand side, the treatment
-#' @param covariates a right-sided formula with pre-treatment covaraites on
+#' @param covariates a right-sided formula with pre-treatment covariates on
 #' the right hand side, such as \code{ ~ x1 + x2 + x3}.
 #' @param data A \code{data.frame}
 #' @param weights the bare (unquoted) names of the weights variable in the

diff --git a/man/lm_lin.Rd b/man/lm_lin.Rd
diff --git a/tests/testthat/test-horvitz-thompson.R b/tests/testthat/test-horvitz-thompson.R
@@ -491,7 +491,6 @@ test_that("Works without variation in treatment", {
     horvitz_thompson(y ~ t, condition_pr_mat = cpm),
     NA
   )
-
 })
 
 test_that("multi-valued treatments not allowed in declaration", {

diff --git a/tests/testthat/test-lm-robust.R b/tests/testthat/test-lm-robust.R
@@ -286,17 +286,16 @@ test_that("lm robust works with rank-deficient X", {
 })
 
 test_that("r squared is right", {
-
   lmo <- summary(lm(mpg ~ hp, mtcars))
   lmow <- summary(lm(mpg ~ hp, mtcars, weights = wt))
-  lmon <- summary(lm(mpg ~ hp-1, mtcars))
-  lmown <- summary(lm(mpg ~ hp-1, mtcars, weights = wt))
+  lmon <- summary(lm(mpg ~ hp - 1, mtcars))
+  lmown <- summary(lm(mpg ~ hp - 1, mtcars, weights = wt))
 
   lmro <- lm_robust(mpg ~ hp, mtcars)
   lmrow <- lm_robust(mpg ~ hp, mtcars, weights = wt)
-  lmron <- lm_robust(mpg ~ hp-1, mtcars)
-  lmrown <- lm_robust(mpg ~ hp-1, mtcars, weights = wt)
-  lmrclust <- lm_robust(mpg ~ hp-1, mtcars, weights = wt, clusters = carb) # for good measure
+  lmron <- lm_robust(mpg ~ hp - 1, mtcars)
+  lmrown <- lm_robust(mpg ~ hp - 1, mtcars, weights = wt)
+  lmrclust <- lm_robust(mpg ~ hp - 1, mtcars, weights = wt, clusters = carb) # for good measure
 
   expect_equal(
     c(lmo$r.squared, lmo$adj.r.squared, lmo$fstatistic),
@@ -323,4 +322,3 @@ test_that("r squared is right", {
     c(lmrclust$r.squared, lmrclust$adj.r.squared, lmrclust$fstatistic)
   )
 })
-
diff --git a/tests/testthat/test-lm-robust_margins.R b/tests/testthat/test-lm-robust_margins.R
@@ -9,8 +9,7 @@ library(margins)
 
 mv <- c("AME", "SE", "z", "p")
 
-test_that("lm robust can work with margins",{
-
+test_that("lm robust can work with margins", {
   x <- lm(mpg ~ cyl * hp + wt, data = mtcars)
   lmr <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars)
 
@@ -51,11 +50,9 @@ test_that("lm robust can work with margins",{
   expect_true(!any(is.na(lmrc)))
   lmrc <- summary(margins(lmr_class, vce = "simulation", iterations = 10L))
   expect_true(!any(is.na(lmrc)))
-
 })
 
-test_that("lm robust + weights can work with margins",{
-
+test_that("lm robust + weights can work with margins", {
   x <- lm(mpg ~ cyl * hp, data = mtcars, weights = wt)
   x2 <- lm_robust(mpg ~ cyl * hp, data = mtcars, weights = wt, se_type = "classical")
   expect_equal(marginal_effects(x), marginal_effects(x2))
@@ -71,10 +68,9 @@ test_that("lm robust + weights can work with margins",{
   )
   # Have to round quite a bit!
   expect_equal(lmc, lmr)
-
 })
 
-test_that("lm robust + cluster can work with margins",{
+test_that("lm robust + cluster can work with margins", {
 
   # works but throws a lot of warnings
   x <- lm(mpg ~ cyl * hp + wt, data = mtcars)
@@ -92,12 +88,10 @@ test_that("lm robust + cluster can work with margins",{
   expect_true(
     !any(lmc[, 2] == lmr[, 2])
   )
-
 })
 
 
-test_that("lm lin can work with margins",{
-
+test_that("lm lin can work with margins", {
   data("alo_star_men")
   lml <- lm_lin(GPA_year1 ~ ssp, ~  gpa0, data = alo_star_men, se_type = "classical")
 
@@ -112,7 +106,4 @@ test_that("lm lin can work with margins",{
     round(lml_sum[, 4], 5),
     round(lmo_sum[, 4], 5)
   )
-
-
 })
-
diff --git a/tests/testthat/test-na-omit-details.R b/tests/testthat/test-na-omit-details.R
@@ -24,7 +24,6 @@ test_that("Row names are set correctly", {
 })
 
 test_that("Logic for nested dfs and lists holds", {
-
   df$X <- list(x = c(NA, 2:nrow(df)))
   df$Xmat <- matrix(rep(c(1, NA, 3:nrow(df)), 2), nrow(df))
 

diff --git a/tests/testthat/test-s3-methods.R b/tests/testthat/test-s3-methods.R
@@ -227,7 +227,6 @@ test_that("coef and confint work", {
       nobs(summary(lmro))
     )
   )
-
 })
 
 test_that("predict works", {

diff --git a/tests/testthat/test-texreg.R b/tests/testthat/test-texreg.R
@@ -14,8 +14,4 @@ test_that("texreg extension works", {
     treg,
     "texreg"
   )
-
-
 })
-
-
diff --git a/tests/testthat/test-zzz.R b/tests/testthat/test-zzz.R
@@ -5,10 +5,9 @@ test_that("onLoad makes generics if texreg is present", {
 
   e <- environment(.onLoad)
 
-  environment(.onLoad) <- new.env(parent=parent.env(e))
+  environment(.onLoad) <- new.env(parent = parent.env(e))
   environment(.onLoad)$extract.lm_robust <- e$extract.lm_robust
 
   expect_null(.onLoad("estimatr", "estimatr"))
   environment(.onLoad) <- e
-
 })
diff --git a/vignettes/mathematical-notes.Rmd b/vignettes/mathematical-notes.Rmd
@@ -45,7 +45,7 @@ The default estimators have been selected for efficiency in large samples and lo
 \widehat{\beta} =(\mathbf{X}^{\top}\mathbf{X})^{-1}\mathbf{X}^{\top}\mathbf{y}
 \]
 
-Our algorithm solves the least squares problem using a rank-revealing column-pivoting QR factorization that eliminates the need to invert $(\mathbf{X}^{\top}\mathbf{X})^{-1}$ explicitly and behaves much like the default `lm` function in R. However, when $\mathbf{X}$ is rank deficient, there are certain conditions under which the QR factorization algorithm we use, from the Eigen C++ library, drops different coefficients from the output than the default `lm` function. In general, users should avoid specifing models with rank-deficiencies. In fact, if users are certain their data are not rank deficient, they can improve the speed of `lm_robust` by setting `try_cholesky = TRUE`. This replaces the QR factorization with a Cholesky factorization that is only guaranteed to work $\mathbf{X}$ is of full rank.
+Our algorithm solves the least squares problem using a rank-revealing column-pivoting QR factorization that eliminates the need to invert $(\mathbf{X}^{\top}\mathbf{X})^{-1}$ explicitly and behaves much like the default `lm` function in R. However, when $\mathbf{X}$ is rank deficient, there are certain conditions under which the QR factorization algorithm we use, from the Eigen C++ library, drops different coefficients from the output than the default `lm` function. In general, users should avoid specifying models with rank-deficiencies. In fact, if users are certain their data are not rank deficient, they can improve the speed of `lm_robust` by setting `try_cholesky = TRUE`. This replaces the QR factorization with a Cholesky factorization that is only guaranteed to work $\mathbf{X}$ is of full rank.
 
 #### Weights
 
@@ -97,7 +97,7 @@ For cluster-robust inference, we provide several estimators that are essentially
 * $\mathbf{e}_s$ is the elements of the residual matrix $\mathbf{e}$ in cluster $s$, or $\mathbf{e}_s = \mathbf{y}_s - \mathbf{X}_s \widehat{\beta}$
 * $\mathbf{A}_s$ and $\mathbf{p}$ are defined in the notes below
 
-**Further notes on CR2:** The variance estimator we implement is shown in equations (4) and (5) in @pustejovskytipton2016 and equation (11), where we set $\mathbf{\Phi}$ to be $I$, following @bellmccaffrey2002. Furthernote that the @pustejovskytipton2016 CR2 estimator and the  @bellmccaffrey2002 estimator are identical when $\mathbf{B_s}$ is full rank. It could be rank-deficient if there were dummy variables, or fixed effects, that were also your clusters. In this case, the original @bellmccaffrey2002 estimator could not be computed. You can see the simpler @bellmccaffrey2002 estimator written up plainly on page 709 of @imbenskolesar2016 along with the degrees of freedom denoted as $K_{BM}$.
+**Further notes on CR2:** The variance estimator we implement is shown in equations (4) and (5) in @pustejovskytipton2016 and equation (11), where we set $\mathbf{\Phi}$ to be $I$, following @bellmccaffrey2002. Further note that the @pustejovskytipton2016 CR2 estimator and the  @bellmccaffrey2002 estimator are identical when $\mathbf{B_s}$ is full rank. It could be rank-deficient if there were dummy variables, or fixed effects, that were also your clusters. In this case, the original @bellmccaffrey2002 estimator could not be computed. You can see the simpler @bellmccaffrey2002 estimator written up plainly on page 709 of @imbenskolesar2016 along with the degrees of freedom denoted as $K_{BM}$.
 
 In the CR2 variance calculation, we get $\mathbf{A}_s$ as follows:
 
@@ -268,7 +268,7 @@ $$
 
 **Clustered designs**
 
-For clustered designs, we use the following collpased estimator by setting `collapsed = TRUE`. Here, $M$ is the total number of clusters, $y_k$ is the total of the outcomes $y_i$ for all $i$ units in cluster $k$, $\pi_zk$ is the marginal probability of cluster $k$ being in condition $z \in \{0, 1\}$, and $z_k$ and $\pi_{zkwl}$ are defined analogously. Warning! If one passes `condition_pr_mat` to `horvitz_thompson` for a clustered design, but not `clusters`, the function will not use the collapsed estimator the the variance estimate will be innacurate.
+For clustered designs, we use the following collapsed estimator by setting `collapsed = TRUE`. Here, $M$ is the total number of clusters, $y_k$ is the total of the outcomes $y_i$ for all $i$ units in cluster $k$, $\pi_zk$ is the marginal probability of cluster $k$ being in condition $z \in \{0, 1\}$, and $z_k$ and $\pi_{zkwl}$ are defined analogously. Warning! If one passes `condition_pr_mat` to `horvitz_thompson` for a clustered design, but not `clusters`, the function will not use the collapsed estimator the the variance estimate will be inaccurate.
 
 $$
 \begin{aligned}
@@ -301,6 +301,6 @@ Theory on hypothesis testing with the Horvitz-Thompson estimator is yet to be de
 \mathrm{CI}^{1-\alpha} = \left(\widehat{\tau} + z_{\alpha/2} \sqrt{\widehat{\mathbb{V}}[\widehat{\tau}]}, \widehat{\tau} + z_{1 - \alpha/2} \sqrt{\widehat{\mathbb{V}}[\widehat{\tau}]}\right)
 \]
 
-The associated p-values for a two-sided null hypothesis test are computed using a normal disribution and the aforementioned significance level $\alpha$.
+The associated p-values for a two-sided null hypothesis test are computed using a normal distribution and the aforementioned significance level $\alpha$.
 
 # References
diff --git a/vignettes/simulations-ols-variance.Rmd b/vignettes/simulations-ols-variance.Rmd
@@ -55,7 +55,7 @@ The function
 \[
 \epsilon_i \overset{i.i.d.}{\sim} N(0, \sigma^2),
 \]
-encodes the assumption of homoskedaticity. Because of these homoskedastic errors, we know that the true variance of the coefficients with fixed covariates is
+encodes the assumption of homoskedasticity. Because of these homoskedastic errors, we know that the true variance of the coefficients with fixed covariates is
 
 \[
 \mathbb{V}[\widehat{\beta}] = \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1},