Skip to content

Commit

Permalink
Merge pull request #169 from DeclareDesign/ls/weight_lin
Browse files Browse the repository at this point in the history
Fixes weighting for lm_lin and blocked difference_in_means
  • Loading branch information
lukesonnet committed Mar 17, 2018
2 parents cd4f7b4 + 33e3cd7 commit f73db70
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 15 deletions.
14 changes: 11 additions & 3 deletions R/estimatr_difference_in_means.R
Expand Up @@ -238,7 +238,10 @@ difference_in_means <-
stringsAsFactors = FALSE
)
data$cluster <- model_data$cluster
data$weights <- model_data$weights
# rescale weights for convenience
if (is.numeric(model_data$weights)) {
data$weights <- model_data$weights / mean(model_data$weights)
}
data$block <- model_data$block

if (!is.null(data$weights) && length(unique(data$weights)) == 1
Expand Down Expand Up @@ -355,7 +358,7 @@ difference_in_means <-
## matches lm_lin, two estimates per block
if (is.null(data$cluster)) {
design <- "Blocked"
df <- N_overall - 2 * n_blocks
df <- nrow(data) - 2 * n_blocks
} else {
design <- "Block-clustered"
# Also matches lm_lin for even sized clusters, should be conservative
Expand Down Expand Up @@ -520,10 +523,15 @@ difference_in_means_internal <-
data.frame(
coefficients = diff,
se = se,
N = N,
df = df,
stringsAsFactors = FALSE
)

if (is.numeric(data$weights)) {
return_frame$N <- sum(data$weights)
} else {
return_frame$N <- N
}

return(return_frame)
}
26 changes: 16 additions & 10 deletions R/estimatr_lm_lin.R
Expand Up @@ -253,16 +253,22 @@ lm_lin <- function(formula,
# Center and interact variables
# ----------

# Initialize as non-demeaned
demeaned_covars <-
scale(
design_matrix[
,
setdiff(colnames(design_matrix), c(design_mat_treatment, "(Intercept)")),
drop = FALSE
],
center = TRUE,
scale = FALSE
)
design_matrix[
,
setdiff(colnames(design_matrix), c(design_mat_treatment, "(Intercept)")),
drop = FALSE
]

# Choose what to center on!
if (is.numeric(weights)) {
center <- apply(demeaned_covars, 2, weighted.mean, weights)
} else {
center <- colMeans(demeaned_covars)
}

demeaned_covars <- sweep(demeaned_covars, 2, center)

original_covar_names <- colnames(demeaned_covars)

Expand Down Expand Up @@ -338,7 +344,7 @@ lm_lin <- function(formula,
formula = formula
)

return_list[["scaled_center"]] <- attr(demeaned_covars, "scaled:center")
return_list[["scaled_center"]] <- center
setNames(return_list[["scaled_center"]], original_covar_names)

return_list[["call"]] <- match.call()
Expand Down
2 changes: 0 additions & 2 deletions tests/testthat/test-difference-in-means.R
Expand Up @@ -581,8 +581,6 @@ test_that("DIM matches lm_robust under certain conditions", {
# With weights now, identical to lm_robust, HC2 by force! except for matched pairs which fails
dat$w <- runif(nrow(dat))



# simple W
expect_equivalent(
tidy(lm_robust(Y ~ z, data = dat, weights = w))[2, ],
Expand Down
57 changes: 57 additions & 0 deletions tests/testthat/test-lm-lin.R
Expand Up @@ -311,3 +311,60 @@ test_that("Test LM Lin", {
c("z", "X1_c", "z:X1_c")
)
})

test_that("lm_lin same as sampling perspective", {

# Unweighted matches sampling view
lmo <- lm_lin(mpg ~ am, ~ hp, data = mtcars)
m_hp <- mean(mtcars$hp)
areg <- lm(mpg ~ hp, data = mtcars, subset = am == 1)
breg <- lm(mpg ~ hp, data = mtcars, subset = am == 0)
ate <-
with(mtcars[mtcars$am == 1, ],
mean(mpg) + (m_hp - mean(hp)) * areg$coefficients[2]) -
with(mtcars[mtcars$am == 0, ],
mean(mpg) + (m_hp - mean(hp)) * breg$coefficients[2])

expect_equivalent(
ate,
lmo$coefficients["am"]
)
})

test_that("weighted lm_lin same as with one covar sampling view", {

# Weighted matches (one covar)
lmwo <- lm_lin(mpg ~ am, ~ hp, weights = wt, data = mtcars)
hp_wmean <- weighted.mean(mtcars$hp, mtcars$wt)
wareg <- lm(mpg ~ hp, data = mtcars, subset = am == 1, weights = wt)
wbreg <- lm(mpg ~ hp, data = mtcars, subset = am == 0, weights = wt)
wate <-
with(mtcars[mtcars$am == 1, ],
weighted.mean(mpg, wt) + (hp_wmean - weighted.mean(hp, wt)) * wareg$coefficients[2]) -
with(mtcars[mtcars$am == 0, ],
weighted.mean(mpg, wt) + (hp_wmean - weighted.mean(hp, wt)) * wbreg$coefficients[2])

expect_equivalent(
wate,
lmwo$coefficients["am"]
)
})

test_that("weighted lm_lin same as with two covar sampling view", {

# Weighted matches (two covars)
lmw2o <- lm_lin(mpg ~ am, ~ hp + cyl, weights = wt, data = mtcars)
hpcyl_wmean <- apply(mtcars[, c("hp", "cyl")], 2, weighted.mean, mtcars$wt)
w2areg <- lm(mpg ~ hp + cyl, data = mtcars, subset = am == 1, weights = wt)
w2breg <- lm(mpg ~ hp + cyl, data = mtcars, subset = am == 0, weights = wt)
w2ate <-
with(mtcars[mtcars$am == 1, ],
weighted.mean(mpg, wt) + (hpcyl_wmean - apply(cbind(hp, cyl), 2, weighted.mean, wt)) %*% w2areg$coefficients[2:3]) -
with(mtcars[mtcars$am == 0, ],
weighted.mean(mpg, wt) + (hpcyl_wmean - apply(cbind(hp, cyl), 2, weighted.mean, wt)) %*% w2breg$coefficients[2:3])

expect_equivalent(
w2ate,
lmw2o$coefficients["am"]
)
})

0 comments on commit f73db70

Please sign in to comment.