-
Notifications
You must be signed in to change notification settings - Fork 11
/
hits_by_groupings.R
172 lines (155 loc) · 6.24 KB
/
hits_by_groupings.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#' Biological hits per category
#'
#' The \code{hits_by_groupings_DT} (DT option) and
#' \code{hits_by_groupings} (data frame option) functions create tables
#' with one row per category("Biological", "Chemical", or "Chemical Class").
#' The columns indicate the "Biological" groupings. The values in the table
#' signify how many sites have samples with EARs that exceeded the hit_threshold
#' for that particular "Biological"/category combination. If the user chooses
#' "Biological" as the category, it is a simple 2-column table of "Biological"
#' groupings and number of sites (nSites).
#'
#' The tables result in slightly different results for a single site, displaying
#' the number of samples with hits rather than the number of sites.
#'
#' @param chemical_summary Data frame from \code{\link{get_chemical_summary}}.
#' @param mean_logic Logical. \code{TRUE} displays the mean sample from each site,
#' \code{FALSE} displays the maximum sample from each site.
#' @param sum_logic Logical. \code{TRUE} sums the EARs in a specified grouping,
#' \code{FALSE} does not. \code{FALSE} may be better for traditional benchmarks as
#' opposed to ToxCast benchmarks.
#' @param category Character. Either "Biological", "Chemical Class", or "Chemical".
#' @param hit_threshold Numeric threshold defining a "hit".
#' @export
#' @return data frame with one row per category, and one column per Biological grouping.
#' @rdname hits_by_groupings_DT
#' @examples
#' # This is the example workflow:
#' path_to_tox <- system.file("extdata", package = "toxEval")
#' file_name <- "OWC_data_fromSup.xlsx"
#'
#' full_path <- file.path(path_to_tox, file_name)
#'
#' tox_list <- create_toxEval(full_path)
#'
#' ACC <- get_ACC(tox_list$chem_info$CAS)
#' ACC <- remove_flags(ACC)
#'
#' cleaned_ep <- clean_endPoint_info(end_point_info)
#' filtered_ep <- filter_groups(cleaned_ep)
#' chemical_summary <- get_chemical_summary(tox_list, ACC, filtered_ep)
#'
#' site_df <- hits_by_groupings(chemical_summary, category = "Biological")
#'
#' hits_by_groupings_DT(chemical_summary, category = "Biological")
#' hits_by_groupings_DT(chemical_summary, category = "Chemical Class")
#' hits_by_groupings_DT(chemical_summary, category = "Chemical")
#'
hits_by_groupings_DT <- function(chemical_summary,
category = "Biological",
mean_logic = FALSE,
sum_logic = TRUE,
hit_threshold = 0.1) {
match.arg(category, c("Biological", "Chemical Class", "Chemical"))
tableData <- hits_by_groupings(
chemical_summary = chemical_summary,
category = category,
mean_logic = mean_logic,
sum_logic = sum_logic,
hit_threshold = hit_threshold
)
cuts <- seq(0, max(as.matrix(tableData), na.rm = TRUE), length.out = 8)
colors <- RColorBrewer::brewer.pal(9, "Blues") # "RdYlBu"
tableData1 <- DT::datatable(tableData,
extensions = "Buttons",
rownames = TRUE,
options = list(
scrollX = TRUE,
dom = "Bfrtip",
buttons = list("colvis"),
order = list(list(1, "desc"))
)
)
if (category != "Biological") {
for (i in 1:ncol(tableData)) {
tableData1 <- DT::formatStyle(tableData1,
columns = names(tableData)[i],
backgroundColor = DT::styleInterval(cuts = cuts, values = colors),
color = DT::styleInterval(0.75 * max(tableData, na.rm = TRUE), values = c("black", "white")),
`font-size` = "17px"
)
}
}
return(tableData1)
}
#' @export
#' @rdname hits_by_groupings_DT
hits_by_groupings <- function(chemical_summary,
category,
mean_logic = FALSE,
sum_logic = TRUE,
hit_threshold = 0.1) {
match.arg(category, c("Biological", "Chemical Class", "Chemical"))
if (category == "Biological") {
chemical_summary$category <- chemical_summary$Bio_category
} else if (category == "Chemical Class") {
chemical_summary$category <- chemical_summary$Class
} else {
chemical_summary$category <- chemical_summary$chnm
}
if (length(unique(chemical_summary$site)) > 1) {
if (!sum_logic) {
tableData <- chemical_summary %>%
dplyr::group_by(site, Bio_category, category) %>%
dplyr::summarize(meanEAR = ifelse(mean_logic, mean(EAR), max(EAR))) %>%
dplyr::group_by(Bio_category, category) %>%
dplyr::summarize(nSites = sum(meanEAR > hit_threshold)) %>%
data.frame()
} else {
tableData <- chemical_summary %>%
dplyr::group_by(site, Bio_category, category, date) %>%
dplyr::summarize(sumEAR = sum(EAR)) %>%
dplyr::group_by(site, Bio_category, category) %>%
dplyr::summarize(meanEAR = ifelse(mean_logic, mean(sumEAR), max(sumEAR))) %>%
dplyr::group_by(Bio_category, category) %>%
dplyr::summarize(nSites = sum(meanEAR > hit_threshold)) %>%
data.frame()
}
} else {
if (!sum_logic) {
tableData <- chemical_summary %>%
dplyr::group_by(Bio_category, category) %>%
dplyr::summarise(nSites = sum(EAR > hit_threshold)) %>%
data.frame()
} else {
tableData <- chemical_summary %>%
dplyr::group_by(Bio_category, category, date) %>%
dplyr::summarise(sumEAR = sum(EAR)) %>%
data.frame() %>%
dplyr::group_by(Bio_category, category) %>%
dplyr::summarise(nSites = sum(sumEAR > hit_threshold)) %>%
data.frame()
}
}
if (category != "Biological") {
tableData <- tableData %>%
tidyr::spread(Bio_category, nSites)
sumOfColumns <- colSums(tableData[-1], na.rm = TRUE)
if (!all(sumOfColumns == 0)) {
orderData <- order(sumOfColumns, decreasing = TRUE)
orderData <- orderData[sumOfColumns[orderData] != 0] + 1
tableData <- tableData[, c(1, orderData)]
}
groups <- tableData$category
tableData <- tableData[!is.na(groups), -1, drop = FALSE]
rownames(tableData) <- groups[!is.na(groups)]
} else {
tableData <- dplyr::select(tableData, Bio_category, nSites)
rownames(tableData) <- tableData$Bio_category
tableData <- tableData[, -1, drop = FALSE]
}
if (length(unique(chemical_summary$site)) == 1) {
names(tableData)[names(tableData) == "nSites"] <- "nSamples"
}
return(tableData)
}