-
Notifications
You must be signed in to change notification settings - Fork 1
/
filter_by.R
241 lines (204 loc) · 8.82 KB
/
filter_by.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#' @title Filter using the slope filter
#' @description This function is used to improve the quality of the hits found.
#' It computes a regression line in the different samples ad uses
#' the slope of this line to see the trend
#' @importFrom stats lm
#' @importFrom dplyr rename
#' @param screenR_Object The ScreenR object obtained using the
#' \code{\link{create_screenr_object}}
#' @param genes The genes for which the slope as to be computed. Those genes
#' are the result of the three statistical methods selection
#' @param slope_treatment A value used as threshold for the treatment slope
#' @param slope_control A value used as threshold for the control slope
#' @param group_var_treatment The variable to use as independent variable (x)
#' for the linear model of the treatment
#' @param group_var_control The variable to use as independent variable (x)
#' for the linear model of the the control
#' @return A data frame with the slope for the treatment and the control
#' for each gene
#' @concept filter
#' @export
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#'
#' filter_by_slope(
#' screenR_Object = object, genes = c("Gene_1", "Gene_2"),
#' group_var_treatment = c("T1", "T2", "TRT"),
#' group_var_control = c("T1", "T2", "Time3", "Time4"),
#' slope_control = 0.5, slope_treatment = 1
#' )
#'
filter_by_slope <- function(screenR_Object, genes, group_var_treatment,
group_var_control, slope_control = NULL, slope_treatment = NULL) {
# Compute the slope of the hits in the treatment Samples
slope_trt <- ScreenR::compute_slope(screenR_Object, genes,
group_var = group_var_treatment
)
# Compute the slope of the hits in the control Samples
slope_contr <- ScreenR::compute_slope(screenR_Object, genes,
group_var = group_var_control
)
data <- screenR_Object@data_table
data <- dplyr::left_join(data, slope_trt, by = "Gene")
data <- dplyr::rename(data, slope_treatment = .data$Slope)
data <- dplyr::left_join(data, slope_contr, by = "Gene")
data <- dplyr::rename(data, slope_control = .data$Slope)
if(!is.null(slope_control)){
print(unique(data$slope_control))
data <- dplyr::filter(data, abs(.data$slope_control) <=
abs(slope_control))
}
if(!is.null(slope_treatment)){
data <- dplyr::filter(data, abs(.data$slope_treatment) >=
abs(slope_treatment))
}
data <- dplyr::distinct(data, .data$Gene,
.data$slope_control, .data$slope_treatment)
# The treatment has more effect than the control
data <- dplyr::mutate(data, keep = dplyr::case_when(
.data$slope_control <= 0 & .data$slope_treatment <= 0 &
.data$slope_treatment < .data$slope_control ~ TRUE,
.data$slope_control >= 0 & .data$slope_treatment >= 0 &
.data$slope_treatment >= .data$slope_control ~ FALSE,
.data$slope_control >= 0 & .data$slope_treatment <= 0 ~ TRUE)
)
data <- dplyr::filter(data, .data$keep)
data$keep <- NULL
return(data)
}
#' @title Compute Slope of a Gene
#' @description This function is used to compute the slope of the gene passed
#' as input
#' @importFrom rlang .data
#' @importFrom dplyr ungroup
#' @param group_var The variable to use as independent variable (x)
#' for the linear model
#' @param screenR_Object The ScreenR object obtained using the
#' \code{\link{create_screenr_object}}
#' @param genes The genes for which the slope as to be computed. Those genes
#' are the result of the three statistical methods selection
#' @return A tibble containing in each row the gene and the corresponding Slope
#' @export
#' @concept compute
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#'
#' compute_slope(object,
#' genes = c("Gene_42", "Gene_24"),
#' group_var = c("T1", "T2", "TRT")
#' )
compute_slope <- function(screenR_Object, genes, group_var) {
data <- screenR_Object@data_table
data <- dplyr::filter(data, .data$Gene %in% genes)
data <- dplyr::filter(data, .data$Treatment %in% group_var)
# in order to perform the linear regression as samples are a time series
# the ordered Sample are encoded as number
data <- dplyr::group_by(data, .data$Gene)
data <- dplyr::mutate(data, encode = seq(1, n(), 1))
data <- dplyr::ungroup(data)
data <- dplyr::mutate(dplyr::nest_by(data, .data$Gene),
Slope = lm(Frequency ~ encode, data = data)$coefficients["encode"]
)
slope_tibble <- select(data, .data$Gene, .data$Slope)
return(slope_tibble)
}
#' @title Filter using the variance filter
#' @description This function is used to improve the quality of the hits.
#' It compute the variance among the hits and filter the one with
#' a value greater than the threshold set
#'
#' @param screenR_Object The ScreenR object obtained using the
#' \code{\link{create_screenr_object}}
#' @param genes The genes for which the variance as to be computed.
#' Those genes are the result of the three statistical
#' methods selection
#' @param variance The maximum value of variance accepted
#' @param contrast The variable to use as X for the linear model
#' for the Treatment
#' @param matrix_model a matrix created using \code{\link[stats]{model.matrix}}
#' @return A data frame with the variance for the treatment and the control
#' for each gene
#' @export
#' @concept filter
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#' matrix_model <- model.matrix(~ slot(object, "groups"))
#' colnames(matrix_model) <- c("Control", "T1_T2", "Treated")
#' contrast <- limma::makeContrasts(Treated - Control, levels = matrix_model)
#'
#' data <- filter_by_variance(
#' screenR_Object = object, genes = c("Gene_42"),
#' matrix_model = matrix_model, contrast = contrast
#' )
#' head(data)
#'
filter_by_variance <- function(screenR_Object, genes, matrix_model,
variance = 0.5, contrast) {
# Save the data_table in a temporary variable
data <- screenR_Object@data_table
# Create the edgeR object and computing the LogFC
DGEList <- create_edger_obj(screenR_Object)
xglm <- edgeR::estimateDisp(DGEList, matrix_model)
fit <- edgeR::glmFit(xglm, matrix_model)
lrt <- edgeR::glmLRT(fit, contrast = contrast)
lrt <- data.frame(lrt$table)
lrt$Gene <- fit$genes$Gene
lrt <- dplyr::filter(lrt, .data$Gene %in% genes)
lrt <- dplyr::group_by(lrt, .data$Gene)
lrt <- dplyr::mutate(lrt, variance = stats::var(.data$logFC))
lrt <- dplyr::summarise(lrt,
Gene = unique(.data$Gene),
variance = mean(.data$variance), .groups = "drop"
)
# Bind the temporary data_table to the table with the fold change
data <- dplyr::left_join(data, lrt, by = "Gene")
data <- dplyr::filter(data, .data$variance <= variance)
data <- dplyr::rename(data, Variance = .data$variance)
return(data)
}
#' @title Remove rows that have zero count in all samples
#' @description This function removes the rows that have zero count in
#' all samples. It takes care of updating both count_table and
#' annotation_table.
#' Very_Important: It has to be performed before the
#' data normalization.
#'
#' @param screenR_Object The ScreenR object obtained using the
#' \code{\link{create_screenr_object}}
#' @importFrom dplyr if_all
#' @importFrom tidyselect vars_select_helpers
#' @return The ScreenR object with the count_table and the annotation_table
#' filtered.
#' @export
#' @concept compute
#' @examples
#' object <- get0("object", envir = asNamespace("ScreenR"))
#' counts <- get_count_table(object)
#' nrow(counts)
#' object <- remove_all_zero_row(object)
#' counts <- get_count_table(object)
#' nrow(counts)
remove_all_zero_row <- function(screenR_Object) {
counts <- screenR_Object@count_table
# First of all all the row with zero counts are founded
counts_zeros <- filter(
counts,
if_all(
vars_select_helpers$where(is.numeric),
~ .x == 0
)
)
# Then there are filtered
counts <- filter(counts, !.data$Barcode %in% counts_zeros$Barcode)
screenR_Object@count_table <- counts
# The same procedure as to be done for the annotation_table table
# in order to keep thigs working
anno_table <- screenR_Object@annotation_table
anno_table <-
filter(
anno_table,
!.data$Barcode %in% counts_zeros$Barcode
)
screenR_Object@annotation_table <- anno_table
return(screenR_Object)
}