/
stats-glm-tidiers.R
193 lines (171 loc) · 5.25 KB
/
stats-glm-tidiers.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#' @templateVar class glm
#' @template title_desc_tidy
#'
#' @param x A `glm` object returned from [stats::glm()].
#' @template param_confint
#' @template param_exponentiate
#' @template param_unused_dots
#'
#' @export
#' @family lm tidiers
#' @seealso [stats::glm()]
tidy.glm <- function(x, conf.int = FALSE, conf.level = .95,
exponentiate = FALSE, ...) {
warn_on_appropriated_glm_class(x)
warn_on_subclass(x, "tidy")
ret <- as_tibble(summary(x)$coefficients, rownames = "term")
colnames(ret) <- c("term", "estimate", "std.error", "statistic", "p.value")
coefs <- stats::coef(x)
if (length(coefs) != nrow(ret)) {
# summary(x)$coefficients misses rank deficient rows (i.e. coefs that
# summary.lm() sets to NA), catch them here and add them back. This join is
# costly, so only do it when necessary.
coefs <- tibble::enframe(coefs, name = "term", value = "estimate")
ret <- left_join(coefs, ret, by = c("term", "estimate"))
}
if (conf.int) {
ci <- broom_confint_terms(x, level = conf.level)
ret <- dplyr::left_join(ret, ci, by = "term")
}
if (exponentiate) {
ret <- exponentiate(ret)
}
ret
}
#' @templateVar class glm
#' @template title_desc_augment
#'
#' @param x A `glm` object returned from [stats::glm()].
#' @template param_data
#' @template param_newdata
#' @param type.predict Passed to [stats::predict.glm()] `type`
#' argument. Defaults to `"link"`.
#' @param type.residuals Passed to [stats::residuals.glm()] and
#' to [stats::rstandard.glm()] `type` arguments. Defaults to `"deviance"`.
#' @template param_se_fit
#' @template param_unused_dots
#'
#' @evalRd return_augment(
#' ".se.fit",
#' ".hat",
#' ".sigma",
#' ".std.resid",
#' ".cooksd"
#' )
#'
#' @details If the weights for any of the observations in the model
#' are 0, then columns ".infl" and ".hat" in the result will be 0
#' for those observations.
#'
#' A `.resid` column is not calculated when data is specified via
#' the `newdata` argument.
#'
#' @export
#' @family lm tidiers
#' @seealso [stats::glm()]
#' @include stats-lm-tidiers.R
augment.glm <- function(x,
data = model.frame(x),
newdata = NULL,
type.predict = c("link", "response", "terms"),
type.residuals = c("deviance", "pearson"),
se_fit = FALSE, ...) {
warn_on_appropriated_glm_class(x)
warn_on_subclass(x, "augment")
type.predict <- rlang::arg_match(type.predict)
type.residuals <- rlang::arg_match(type.residuals)
df <- if (is.null(newdata)) data else newdata
df <- as_augment_tibble(df)
# don't use augment_newdata here; don't want raw/response residuals in .resid
if (se_fit) {
pred_obj <- predict(x, newdata, type = type.predict, se.fit = TRUE)
df$.fitted <- pred_obj$fit %>% unname()
df$.se.fit <- pred_obj$se.fit %>% unname()
} else {
df$.fitted <- predict(x, newdata, type = type.predict) %>% unname()
}
if (is.null(newdata)) {
tryCatch(
{
infl <- influence(x, do.coef = FALSE)
df$.resid <- residuals(x, type = type.residuals) %>% unname()
df <- add_hat_sigma_cols(df, x, infl)
df$.std.resid <- rstandard(x, infl = infl, type = type.residuals) %>%
unname()
df$.cooksd <- cooks.distance(x, infl = infl) %>% unname()
},
error = data_error
)
}
df
}
#' @templateVar class glm
#' @template title_desc_glance
#'
#' @param x A `glm` object returned from [stats::glm()].
#' @template param_unused_dots
#'
#' @evalRd return_glance(
#' "null.deviance",
#' "df.null",
#' "logLik",
#' "AIC",
#' "BIC",
#' "deviance",
#' "df.residual",
#' "nobs"
#' )
#'
#' @examples
#'
#' g <- glm(am ~ mpg, mtcars, family = "binomial")
#' glance(g)
#' @export
#' @family lm tidiers
#' @seealso [stats::glm()]
glance.glm <- function(x, ...) {
warn_on_appropriated_glm_class(x)
warn_on_subclass(x, "glance")
as_glance_tibble(
null.deviance = x$null.deviance,
df.null = x$df.null,
logLik = as.numeric(stats::logLik(x)),
AIC = stats::AIC(x),
BIC = stats::BIC(x),
deviance = stats::deviance(x),
df.residual = stats::df.residual(x),
nobs = stats::nobs(x),
na_types = "rirrrrii"
)
}
warn_on_appropriated_glm_class <- function(x) {
warn_on_glm2(x)
warn_on_stanreg(x)
invisible(TRUE)
}
# the output of glm2::glm2 has the same class as objects outputted
# by stats::glm2. glm2 outputs are currently not supported (intentionally)
# so warn that output is not maintained.
warn_on_glm2 <- function(x) {
if (!is.null(x$method) & is.character(x$method)) {
if (x$method == "glm.fit2") {
warning(
"The supplied model object seems to be outputted from the glm2 ",
"package. Tidiers for glm2 output are currently not ",
"maintained; please use caution in interpreting broom output."
)
}
}
invisible(TRUE)
}
# stanreg objects subclass glm, glm tidiers error out (uninformatively),
# and the maintained stanreg tidiers live in broom.mixed.
warn_on_stanreg <- function(x) {
if (!is.null(x$stan_function)) {
stop(
"The supplied model object seems to be outputted from the rstanarm ",
"package. Tidiers for mixed model output now live in the broom.mixed package."
)
}
invisible(TRUE)
}