This repository has been archived by the owner on Oct 16, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 17
/
ge_reg.R
367 lines (359 loc) · 13.6 KB
/
ge_reg.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
#' Eberhart and Russell's regression model
#' @description
#' `r badge('stable')`
#'
#' Regression-based stability analysis using the Eberhart and Russell (1966) model.
#'
#' @param .data The dataset containing the columns related to Environments, Genotypes,
#' replication/block and response variable(s)
#' @param env The name of the column that contains the levels of the
#' environments.
#' @param gen The name of the column that contains the levels of the genotypes.
#' @param rep The name of the column that contains the levels of the
#' replications/blocks
#' @param resp The response variable(s). To analyze multiple variables in a
#' single procedure use, for example, `resp = c(var1, var2, var3)`.
#' @param verbose Logical argument. If `verbose = FALSE` the code will run
#' silently.
#' @return An object of class `ge_reg` with the folloing items for each
#' variable:
#' * data: The data with means for genotype and environment combinations and the
#' environment index
#' * anova: The analysis of variance for the regression model.
#' * regression: A data frame with the following columns: `GEN`, the genotypes;
#' `b0` and `b1` the intercept and slope of the regression, respectively;
#' `t(b1=1)` the calculated t-value; `pval_t` the p-value for the t test; `s2di`
#' the deviations from the regression (stability parameter); `F(s2di=0)` the
#' F-test for the deviations; `pval_f` the p-value for the F test; `RMSE` the
#' root-mean-square error; `R2` the determination coefficient of the regression.
#' * b0_variance: The variance of b0.
#' * b1_variance: The variance of b1.
#' @md
#' @seealso [superiority()], [ecovalence()], [ge_stats()]
#' @author Tiago Olivoto, \email{tiagoolivoto@@gmail.com}
#' @export
#' @examples
#' \donttest{
#' library(metan)
#'reg <- ge_reg(data_ge2,
#' env = ENV,
#' gen = GEN,
#' rep = REP,
#' resp = PH)
#'plot(reg)
#'
#'}
#' @references Eberhart, S.A., and W.A. Russell. 1966. Stability parameters for comparing Varieties.
#' Crop Sci. 6:36-40. \doi{10.2135/cropsci1966.0011183X000600010011x}
ge_reg = function(.data,
env,
gen,
rep,
resp,
verbose = TRUE){
factors <-
.data %>%
select({{env}}, {{gen}}, {{rep}}) %>%
mutate(across(everything(), as.factor))
vars <-
.data %>%
select({{resp}}, -names(factors)) %>%
select_numeric_cols()
factors %<>% set_names("ENV", "GEN", "REP")
listres <- list()
nvar <- ncol(vars)
if (verbose == TRUE) {
pb <- progress(max = nvar, style = 4)
}
for (var in 1:nvar) {
data <-
factors %>%
mutate(Y = vars[[var]])
if(has_na(data)){
data <- remove_rows_na(data)
has_text_in_num(data)
}
data2 <-
data %>%
mean_by(ENV, GEN) %>%
as.data.frame()
model1 <- lm(Y ~ GEN + ENV + ENV/REP + ENV * GEN, data = data)
modav <- anova(model1)
mydf <-
data %>%
mean_by(GEN, ENV)
iamb <-
data %>%
mean_by(ENV) %>%
add_cols(IndAmb = Y - mean(Y)) %>%
select_cols(ENV, IndAmb)
iamb2 <-
data %>%
mean_by(ENV, GEN) %>%
left_join(iamb, by = "ENV")
meandf <- make_mat(mydf, GEN, ENV, Y) %>% rownames_to_column("GEN")
matx <- make_mat(mydf, GEN, ENV, Y) %>% as.matrix()
iij <- apply(matx, 2, mean, na.rm = TRUE) - mean(matx, na.rm = TRUE)
sumij2 <- sum((iij)^2)
YiIj <- matx %*% iij
if(has_na(matx)){
missing <- which(apply(is.na(matx), 1, function(x){any(x) == TRUE}) == TRUE)
YiIj_complete <- NULL
for(i in seq_along(missing)){
YiIj_complete[i] <- matx[missing[i],][!is.na(matx[missing[i],])] %*% iij[!is.na(matx[missing[i],])]
}
YiIj[which(is.na(YiIj))] <- YiIj_complete
warning("Genotypes ", paste(names(missing), collapse = ", "), " missing in some environments", call. = FALSE)
warning("Regression parameters computed after removing missing values", call. = FALSE)
}
bij <- YiIj/sumij2
svar <- (apply(matx^2, 1, sum, na.rm = TRUE)) - (((apply(matx, 1, sum, na.rm = TRUE))^2)/ncol(matx))
bYijIj <- bij * YiIj
dij <- svar - bYijIj
pred <- apply(matx, 1, mean, na.rm = TRUE) + bij %*% iij
gof <- function(x, y){
R2 = NULL
RMSE = NULL
for (i in 1:nrow(x)){
R2[i] = cor(x[i, ], y[i, ], use = "complete.obs")^2
RMSE[i] = sqrt(sum((x[i, ] - y[i, ])^2, na.rm = TRUE)/ncol(x))
}
return(list(R2 = R2, RMSE = RMSE))
}
gof <- gof(pred, matx)
S2e <- modav$"Mean Sq"[5]
nrep <- length(levels(data$REP))
en <- length(levels(data$ENV))
ge <- length(levels(mydf$GEN))
S2di <- (dij/(en - 2)) - (S2e/nrep)
amod2 <- anova(lm(Y ~ GEN + ENV, data = data2))
# amod2 <- anova(model2)
SSL <- amod2$"Sum Sq"[2]
SSGxL <- amod2$"Sum Sq"[3]
SS.L.GxL <- SSL + SSGxL
SSL.Linear <- (1/length(levels(data$GEN))) * (colSums(matx, na.rm = TRUE) %*% iij)^2/sum(iij^2)
SS.L.GxL.linear <- sum(bYijIj) - SSL.Linear
Df <- c(en * ge - 1,
ge - 1,
ge * (en - 1),
1,
ge - 1,
ge * (en - 2),
replicate(length(dij), en - 2),
en*(nrep - 1) * (ge - 1))
poolerr <- modav$"Sum Sq"[5]/nrep
sigma2 <- modav$"Mean Sq"[5]
dferr <- modav$"Df"[5]
vbo <- sigma2 / (en * nrep)
vb1 <- sigma2 / (nrep * sumij2)
tcal <- (bij - 1) / sqrt(vb1)
ptcal <- 2 * pt(abs(tcal), dferr, lower.tail = FALSE)
SSS <- c(sum(amod2$"Sum Sq"),
amod2$"Sum Sq"[1],
SSL + SSGxL,
SSL.Linear,
SS.L.GxL.linear,
sum(dij),
dij,
poolerr) * nrep
MSSS <- (SSS/Df)
FVAL <- c(NA,
MSSS[2]/MSSS[6],
NA,
NA,
MSSS[5]/MSSS[6],
NA,
MSSS[7:(length(MSSS) - 1)]/MSSS[length(MSSS)],
NA)
PLINES <- 1 - pf(FVAL[7:(length(MSSS) - 1)], Df[7], Df[length(Df)])
pval <- c(NA,
1 - pf(FVAL[2], Df[2], Df[6]),
NA,
NA,
1 - pf(FVAL[5], Df[5], Df[6]),
NA,
PLINES,
NA)
anovadf <- data.frame(Df,
`Sum Sq` = SSS,
`Mean Sq` = MSSS,
`F value` = FVAL,
`Pr(>F)` = pval,
check.names = FALSE)
rownames(anovadf) <- c("Total", "GEN", "ENV + (GEN x ENV)", "ENV (linear)",
" GEN x ENV (linear)", "Pooled deviation",
levels(data$GEN), "Pooled error")
temp <- structure(list(data = iamb2,
anova = as_tibble(rownames_to_column(anovadf, "SV")),
regression = tibble(GEN = levels(mydf$GEN),
b0 = apply(matx, 1, mean, na.rm = TRUE),
b1 = as.numeric(bij),
`t(b1=1)` = tcal,
pval_t = ptcal,
s2di = as.numeric(S2di),
`F(s2di=0)` = FVAL[7:(length(FVAL) - 1)],
pval_f = PLINES,
RMSE = gof$RMSE,
R2 = gof$R2),
bo_variance = vbo,
b1_variance = vb1),
class = "ge_reg")
if (verbose == TRUE) {
run_progress(pb,
actual = var,
text = paste("Evaluating trait", names(vars[var])))
}
listres[[paste(names(vars[var]))]] <- temp
}
return(structure(listres, class = "ge_reg"))
}
#' Plot an object of class ge_reg
#'
#' Plot the regression model generated by the function `ge_reg`.
#'
#'
#' @param x An object of class `ge_factanal`
#' @param var The variable to plot. Defaults to `var = 1` the first
#' variable of `x`.
#' @param type The type of plot to show. `type = 1` produces a plot with
#' the environmental index in the x axis and the genotype mean yield in the y
#' axis. `type = 2` produces a plot with the response variable in the x
#' axis and the slope/deviations of the regression in the y axis.
#' @param plot_theme The graphical theme of the plot. Default is
#' `plot_theme = theme_metan()`. For more details, see
#' [ggplot2::theme()].
#' @param x.lim The range of x-axis. Default is `NULL` (maximum and minimum
#' values of the data set). New arguments can be inserted as `x.lim =
#' c(x.min, x.max)`.
#' @param x.breaks The breaks to be plotted in the x-axis. Default is
#' `authomatic breaks`. New arguments can be inserted as `x.breaks =
#' c(breaks)`
#' @param x.lab The label of x-axis. Each plot has a default value. New
#' arguments can be inserted as `x.lab = "my label"`.
#' @param y.lim The range of x-axis. Default is `NULL`. The same arguments
#' than `x.lim` can be used.
#' @param y.breaks The breaks to be plotted in the x-axis. Default is
#' `authomatic breaks`. The same arguments than `x.breaks` can be
#' used.
#' @param y.lab The label of y-axis. Each plot has a default value. New
#' arguments can be inserted as `y.lab = "my label"`.
#' @param leg.position The position of the legend.
#' @param size.tex.lab The size of the text in the axes text and labels. Default
#' is `12`.
#' @param ... Currently not used..
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @seealso [ge_factanal()]
#' @method plot ge_reg
#' @return An object of class `gg, ggplot`.
#' @export
#' @examples
#' \donttest{
#' library(metan)
#' model <- ge_reg(data_ge2, ENV, GEN, REP, PH)
#' plot(model)
#' }
#'
plot.ge_reg <- function(x,
var = 1,
type = 1,
plot_theme = theme_metan(),
x.lim = NULL,
x.breaks = waiver(),
x.lab = NULL,
y.lim = NULL,
y.breaks = waiver(),
y.lab = NULL,
leg.position = "right",
size.tex.lab = 12,
...){
x <- x[[var]]
if(!type %in% c(1, 2)){
stop("Argument 'type' must be either 1 or 2", call. = FALSE)
}
if(type == 1){
y.lab <- ifelse(missing(y.lab), "Response variable", y.lab)
x.lab <- ifelse(missing(x.lab), "Environmental index", x.lab)
p <-
ggplot(x$data, aes(x = IndAmb, y = Y))+
geom_point(aes(colour = GEN), size = 1.5)+
geom_smooth(aes(colour = GEN), method = "lm", formula = y ~ x, se = FALSE)+
theme_bw()+
labs(x = x.lab, y = y.lab)+
plot_theme %+replace%
theme(axis.text = element_text(size = size.tex.lab, colour = "black"),
axis.title = element_text(size = size.tex.lab, colour = "black"),
axis.ticks = element_line(color = "black"),
axis.ticks.length = unit(.2, "cm"),
legend.position = leg.position)
return(p)
}
if(type == 2){
y.lab <- ifelse(missing(y.lab), "Slope of the regression", y.lab)
x.lab <- ifelse(missing(x.lab), "Response variable", x.lab)
p <-
ggplot(x$regression, aes(x = b0, y = b1))+
geom_point(size = 1.5)+
geom_hline(yintercept = mean(x$regression$b1))+
geom_text_repel(aes(label = GEN))+
labs(x = x.lab, y = y.lab) +
plot_theme
p2 <-
ggplot(x$regression, aes(x = b0, y = s2di))+
geom_point(size = 1.5)+
geom_hline(yintercept = mean(x$regression$s2di))+
geom_text_repel(aes(label = GEN))+
labs(x = x.lab, y = "Deviations from the regression") +
plot_theme
p + p2
}
}
#' Print an object of class ge_reg
#'
#' Print the `ge_reg` object in two ways. By default, the results are shown
#' in the R console. The results can also be exported to the directory into a
#' *.txt file.
#'
#' @param x An object of class `ge_reg`.
#' @param export A logical argument. If `TRUE`, a *.txt file is exported to
#' the working directory.
#' @param file.name The name of the file if `export = TRUE`
#' @param digits The significant digits to be shown.
#' @param ... Options used by the tibble package to format the output. See
#' [`tibble::print()`][tibble::formatting] for more details.
#' @author Tiago Olivoto \email{tiagoolivoto@@gmail.com}
#' @method print ge_reg
#' @export
#' @examples
#' \donttest{
#'
#' library(metan)
#' model <- ge_reg(data_ge2, ENV, GEN, REP, PH)
#' print(model)
#' }
print.ge_reg <- function(x, export = FALSE, file.name = NULL, digits = 3, ...) {
opar <- options(pillar.sigfig = digits)
on.exit(options(opar))
if (export == TRUE) {
file.name <- ifelse(is.null(file.name) == TRUE, "ge_reg print", file.name)
sink(paste0(file.name, ".txt"))
}
for (i in 1:length(x)) {
var <- x[[i]]
cat("Variable", names(x)[i], "\n")
cat("---------------------------------------------------------------------------\n")
cat("Joint-regression Analysis of variance\n")
cat("---------------------------------------------------------------------------\n")
print(var$anova)
cat("---------------------------------------------------------------------------\n")
cat("Regression parameters\n")
cat("---------------------------------------------------------------------------\n")
print(var$regression)
cat("---------------------------------------------------------------------------\n")
cat("Variance of b0:", var[["bo_variance"]], "\n")
cat("Variance of b1:", var[["b1_variance"]], "\n")
cat("\n\n\n")
}
if (export == TRUE) {
sink()
}
}