-
Notifications
You must be signed in to change notification settings - Fork 9
/
searchAnalysis.R
224 lines (220 loc) · 8.33 KB
/
searchAnalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#' Look up analysis accession IDs for one or more study or sample accessions
#'
#' @details
#' Retrieve analysis accession IDs associated with the supplied study or
#' sample accession.
#'
#' @param x A \code{MgnifyClient} object.
#'
#' @param type A single character value specifying a type of
#' accession IDs specified by \code{accession}. Must be "studies" or "samples".
#'
#' @param accession A single character value or a vector of character values
#' specifying study or sample accession IDs that are used to retrieve analyses
#' IDs.
#'
#' @param ... Optional arguments; not currently used.
#'
#' @return vector of analysis accession IDs.
#'
#' @examples
#' # Create a client object
#' mg <- MgnifyClient(useCache = FALSE)
#'
#' # Retrieve analysis ids from study MGYS00005058
#' result <- searchAnalysis(mg, "studies", c("MGYS00005058"))
#'
#' \dontrun{
#' # Retrieve all analysis ids from samples
#' result <- searchAnalysis(
#' mg, "samples", c("SRS4392730", "SRS4392743"))
#' }
#'
#' @name searchAnalysis
NULL
#' @rdname searchAnalysis
#' @importFrom plyr llply
#' @include AllClasses.R AllGenerics.R MgnifyClient.R utils.R
#' @export
setMethod("searchAnalysis", signature = c(x = "MgnifyClient"), function(
x, type, accession, ...){
############################### INPUT CHECK ################################
if( !(length(type) == 1 && type %in% c("samples", "studies")) ){
stop(
"'type' must be 'samples' or 'studies'.", call. = FALSE)
}
if( !(.is_non_empty_character(accession)) ){
stop(
"'accession' must be a single character value or vector of ",
"character values specifying the MGnify accession identifier.",
call. = FALSE)
}
############################# INPUT CHECK END ##############################
# Get analysis accession IDs based on sample or study accessions
if( type == "samples" ){
result <- .mgnify_analyses_from_samples(
client = x, accession = accession, ...)
} else{
result <- .mgnify_analyses_from_studies(
client = x, accession = accession, ...)
}
return(result)
})
################################ HELP FUNCTIONS ################################
# Get analysis accessions based on studies
.mgnify_analyses_from_studies <- function(
client, accession, use.cache = useCache(client),
show.messages = verbose(client), ...){
# Input check
if( !.is_a_bool(use.cache) ){
stop(
"'use.cache' must be a single boolean value", call. = FALSE)
}
if( !.is_a_bool(show.messages) ){
stop(
"'show.messages' must be a single boolean value.", call. = FALSE)
}
show.messages <- ifelse(show.messages, "text", "none")
#
# Give message about progress
if( show.messages == "text" ){
message("Fetching analyses...")
}
# Loop over studies, get analyses accessions
analyses_accessions <- llply(as.list(accession), function(x){
# Find analyses based on studies. Get URL address.
accurl <- .mgnify_get_x_for_y(
client, x, "studies","analyses", use.cache = use.cache, ...)
# If found
if( !is.null(accurl) ){
# Get data
jsondat <- .mgnify_retrieve_json(
client, complete_url = accurl, use.cache = use.cache,
max.hits = NULL, ...)
# Just need the accession ID
res <- lapply(jsondat, function(x) x$id)
} else {
res <- accurl
warning("\nAnalyses not found for studies ", x, call. = FALSE)
}
return(res)
}, .progress=show.messages)
names(analyses_accessions) <- accession
res <- unlist(analyses_accessions)
return(res)
}
# Get analysis accessions based on sample accessions
.mgnify_analyses_from_samples <- function(
client, accession, use.cache = useCache(client),
show.messages = verbose(client), ...){
# Input check
if( !.is_a_bool(use.cache) ){
stop(
"'use.cache' must be a single boolean value", call. = FALSE)
}
if( !.is_a_bool(show.messages) ){
stop(
"'show.messages' must be a single boolean value.", call. = FALSE)
}
show.messages <- ifelse(show.messages, "text", "none")
#
# Give message about progress
if( show.messages == "text" ){
message("Fetching analyses...")
}
# Loop over sample accessions
analyses_accessions <- llply(as.list(accession), function(x){
accurl <- .mgnify_get_x_for_y(
client, x, "samples", "analyses", use.cache = use.cache, ...)
# For some reason, it appears you "sometimes" have to go from study
# to runs to analyses. Need to query this with the API people...
if( is.null(accurl) ){
temp <- .mgnify_analyses_from_samples_based_on_runs(
client, x, use.cache, ...)
} else {
jsondat <- .mgnify_retrieve_json(
client, complete_url = accurl, use.cache = use.cache, ...)
# Just need the accession ID
temp <- lapply(jsondat, function(x) x$id)
}
return(temp)
}, .progress = show.messages)
names(analyses_accessions) <- accession
res <- unlist(analyses_accessions)
return(res)
}
# Get analysis accessions based on runs or assemblies
.mgnify_analyses_from_samples_based_on_runs <- function(
client, x, use.cache = useCache(client), ...){
# Input check
if( !.is_a_bool(use.cache) ){
stop(
"'use.cache' must be a single boolean value", call. = FALSE)
}
#
# Get urö for runs
runurl <- .mgnify_get_x_for_y(
client, x, "samples","runs", use.cache = use.cache, ...)
if(is.null(runurl)){
warning("\nAnalyses not found for samples ", x, call. = FALSE)
return(runurl)
}
# If found, get data for runs
jsondat <- .mgnify_retrieve_json(
client, complete_url = runurl, use.cache = use.cache, ...)
# Get accession ID for the runs
run_accs <- lapply(jsondat, function(y) y$id)
# Loop through runs
analyses_accessions <- lapply(as.list(run_accs), function(z){
# Get data url of related analyses
accurl <- .mgnify_get_x_for_y(
client, z, "runs","analyses", use.cache = use.cache, ...)
# If no data was found, end the searching.
if( is.null(accurl) ){
return(accurl)
}
# Get data of those analyses
jsondat <- .mgnify_retrieve_json(
client, complete_url = accurl, use.cache = use.cache, ...)
# Now... if jsondat is empty, it means we couldn't find an
# analysis for this run. This is known to occur when an assembly
# has been harvested (or something like that). There may be
# other cases as well. Anyway, what we'll do is go try and look
# for an assembly->analysis entry instead.
if(length(jsondat) == 0){
# Get url addresses for assemblies based on runs
assemurl <- .mgnify_get_x_for_y(
client, z, "runs","assemblies", use.cache = use.cache, ...)
# Get data on those assemblies
jsondat <- .mgnify_retrieve_json(
client, complete_url = assemurl, use.cache = use.cache, ...)
# Get accession IDs for assemblies
assemids <- lapply(jsondat, function(x) x$id)
if(length(assemids) > 0){
# Assumes that there's only one assembly ID per run...
# I hope that's okay.
# Get analyses based on assemblies
accurl <- .mgnify_get_x_for_y(
client, assemids[[1]], "assemblies", "analyses",
use.cache = use.cache, ...)
# Get the data on analyses
jsondat <- .mgnify_retrieve_json(
client, complete_url = accurl, use.cache = use.cache, ...)
} else{
# If we've got to this point, I give up - just return an empty
# list...
warning(
"\nFailed to find an analysis for sample ", x,
call. = FALSE)
}
}
# Get analyses IDs
if( !is.null(jsondat) ){
temp <- lapply(jsondat, function(x) x$id)
} else{
temp <- NULL
}
return(temp)
})
analyses_accessions <- unlist(analyses_accessions)
}