/
paired_test_continuous.R
210 lines (177 loc) · 6.11 KB
/
paired_test_continuous.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#' Paired test for continuous variables
#'
#' Statistical tests for paired continuous variable.
#'
#' @details If the test is requested for two paired groups, the
#' \code{\link[stats]{t.test}} is used.
#'
#' If the test is requested for more than two groups, the test based on
#' ANOVA for repeated measures is used (powered by
#' \code{\link[stats]{aov}})
#'
#' @note This function could be used as `conTest` option in the
#' \code{\link[Hmisc]{summary.formula}} with method `reverse`.
#'
#'
#' @param group (fct) vector of groups
#' @param x (num) vector of observations. Note: length of `x` is
#' considered equal to the number of subjects by the number of
#' groups. Observation must be provided by subject
#' (e.g. c(a1, b1, c1, a2, b2, c2, a3, b3, c3, a4, b4, c4), where
#' the letters, a, b, c, and d represents the groups and the
#' numbers represents the patients' ids). Note only patient with
#' observation in all the levels considered will be used.
#'
#' @return A list with components
#' `P` (the computed P-value),
#' `stat` (the test statistic, either t or F),
#' `df` (degrees of freedom),
#' `testname` (test name),
#' `statname` (statistic name),
#' `namefun` ("paired_tstat", "rep_aov"),
#' `latexstat` (LaTeX representation of statname),
#' `plotmathstat` (for R - the plotmath representation of
#' `statname`, as a character string),
#' `note` (contains a character string note about the test).
#' @export
#'
#' @importFrom rlang .data
#'
#' @examples
#' \donttest{
#' library(Hmisc)
#'
#' ## two groups
#' summary(Species ~ .,
#' data = iris[iris[["Species"]] != "setosa", ],
#' method = "reverse",
#' test = TRUE,
#' conTest = paired_test_continuous
#' )
#'
#' ## more than two groups
#' summary(Species ~ .,
#' data = iris,
#' method = "reverse",
#' test = TRUE,
#' conTest = paired_test_continuous
#' )
#'
#' ## without Hmisc
#' two_obs <- iris[["Sepal.Length"]][iris[["Species"]] != "setosa"]
#' two_groups <- iris[["Species"]][iris[["Species"]] != "setosa"]
#' paired_test_continuous(two_groups, two_obs)
#'
#' obs <- iris[["Sepal.Length"]]
#' many_groups <- iris[["Species"]]
#' paired_test_continuous(many_groups, obs)
#' }
paired_test_continuous <- function(group, x) {
# Imput adjustment and checks -------------------------------------
len_g <- length(group)
len_x <- length(x)
n_lev <- length(levels(group))
if (len_g != len_x) {
ui_stop(paste(
"The length of the variable group has to be the same of",
"the length of the variable x. They aren't equal."
))
}
if (!is.factor(group)) {
ui_warn(
"Grouping variable converted to factor to compute test."
)
# explicit set levels to avoid reordering
group <- factor(group, levels = unique(group))
}
# main constants --------------------------------------------------
original_levels <- levels(group)
# Recreate ids (if possible) --------------------------------------
rle_g <- rle(as.integer(group))[["lengths"]]
ids <- vector("integer", len_g)
id <- 0L
if (length(rle_g) == length(original_levels)) {
# this means that observation are sorted by group
if (diff(range(rle_g)) >= .Machine[["double.eps"]]^0.5) {
ui_warn(paste(
"Data passed by groups and incomplete:\n",
" not same umber of observation among the groups.\n",
"P returned is the standard F statistics.\n",
"(9L is added to this P to identify the cases).\n\n"
))
res <- Hmisc::conTestkw(group, x)
res[["P"]] <- res[["P"]] + 9L
return(res)
}
# observation sorted by groups with the same length
ids <- rep(seq_len(rle_g[[1L]]), length(rle_g))
} else {
# this means observation are sorted by ids
for (i in seq_along(group)) {
actual_lev <- which(original_levels == group[[i]])
is_new_id <- (i == 1L) ||
(group[[i - 1L]] %in% original_levels[actual_lev:n_lev])
id <- id + is_new_id
ids[[i]] <- id
}
}
# main data frame creation ----------------------------------------
data_db <- dplyr::tibble(ids, x, group) %>%
dplyr::distinct() %>%
tidyr::spread("group", "x") %>%
ggplot2::remove_missing() %>%
tidyr::gather("group", "x", -ids) %>%
dplyr::mutate(group = factor(group,
levels = original_levels[original_levels %in% unique(group)]
)) %>%
dplyr::arrange(ids, group)
group_names <- levels(data_db[["group"]])
group_n <- length(group_names)
n_subjects <- length(unique(data_db[["ids"]]))
# Less Than Two groups --------------------------------------------
if (group_n < 2L || n_subjects <= group_n) return(fake_h_group_test())
# Two groups ------------------------------------------------------
if (group_n == 2L) {
data_two <- data_db %>%
tidyr::spread("group", "x")
test_out <- stats::t.test(data_two[[2L]], data_two[[3L]],
paired = TRUE,
var.equal = TRUE
)
# `return()` exits from the function here!
return(list(
# values (mandatory)
P = c(P = test_out[["p.value"]]),
stat = test_out[["statistic"]],
df = test_out[["parameter"]],
# names (mandatory)
testname = "Paired t-test",
statname = "t",
namefun = "paired_tstat",
# special labels (optional)
latexstat = "t_{df}",
plotmathstat = "t[df]",
note = "Two groups: t-test for paired values is done."
))
}
# More than two groups --------------------------------------------
test_out <- summary(
stats::aov(x ~ group + Error(ids / group), data = data_db)
)[["Error: Within"]][[1L]]
list(
# values (mandatory)
P = stats::setNames(test_out[1L, "Pr(>F)"], "P"),
stat = stats::setNames(test_out[1L, "F value"], "F"),
df = stats::setNames(test_out[1L, "Df"], "df"),
# names (mandatory)
testname = "Repeated-measure AOV",
statname = "F",
namefun = "rep_aov",
# special labels (optional)
latexstat = "F_{df}",
plotmathstat = "F[df]",
note = {
"More than two groups: ANOVA for repeated measure is used."
}
)
}