/
atlas_counts.R
94 lines (93 loc) · 3.68 KB
/
atlas_counts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#' Return a count of records
#'
#' Prior to downloading data it is often valuable to have some estimate of how
#' many records are available, both for deciding if the query is feasible,
#' and for estimating how long it will take to download. Alternatively, for some kinds
#' of reporting, the count of observations may be all that is required, for example
#' for understanding how observations are growing or shrinking in particular
#' locations, or for particular taxa. To this end, `atlas_counts()` takes
#' arguments in the same format as [atlas_occurrences()], and
#' provides either a total count of records matching the criteria, or a
#' `data.frame` of counts matching the criteria supplied to the `group_by`
#' argument.
#'
#' @param request optional `data_request` object: generated by a call to
#' [galah_call()].
#' @param identify `data.frame`: generated by a call to
#' [galah_identify()].
#' @param filter `data.frame`: generated by a call to
#' [galah_filter()]
#' @param geolocate `string`: generated by a call to
#' [galah_geolocate()]
#' @param data_profile `string`: generated by a call to
#' [galah_apply_profile()]
#' @param group_by `data.frame`: An object of class `galah_group_by`,
#' as returned by [galah_group_by()]. Alternatively a vector of field
#' names (see `search_all(fields)` and `show_all(fields)`.
#' @param limit `numeric`: maximum number of categories to return, defaulting to 100.
#' If limit is NULL, all results are returned. For some categories this will
#' take a while.
#' @param type `string`: one of `c("occurrences-count", "species-count")`.
#' Defaults to `"occurrences-count"`, which returns the number of records
#' that match the selected criteria; alternatively returns the number of
#' species. Formerly accepted arguments (`"records"` or `"species"`) are
#' deprecated but remain functional.
#' @return
#' An object of class `tbl_df` and `data.frame` (aka a tibble) returning:
#' * A single number, if `group_by` is not specified or,
#' * A summary of counts grouped by field(s), if `group_by` is specified
#'
#' @examples \dontrun{
#' # classic syntax:
#' galah_call() |>
#' galah_filter(year == 2015) |>
#' atlas_counts()
#'
#' # synonymous with:
#' request_data() |>
#' filter(year == 2015) |>
#' count() |>
#' collect()
#' }
#' @export
atlas_counts <- function(request = NULL,
identify = NULL,
filter = NULL,
geolocate = NULL,
data_profile = NULL,
group_by = NULL,
limit = NULL,
type = c("occurrences", "species")
) {
# capture supplied arguments
args <- as.list(environment())
args$type <- match.arg(type)
dr <- check_atlas_inputs(args) # convert to `data_request` object
# check for outdated naming conventions
if(dr$type == "record"){dr$type <- "occurrences"}
# pass to collect etc
dr |>
count() |>
slice_head(n = limit) |>
collect()
}
#' @rdname atlas_counts
#' @param x An object of class `data_request`, created using [galah_call()]
#' @param wt currently ignored
#' @param ... currently ignored
#' @param sort currently ignored
#' @param name currently ignored
#' @importFrom dplyr count
#' @export
count.data_request <- function(x,
...,
wt,
sort,
name){
x$type <- switch(x$type,
"occurrences" = "occurrences-count",
"species" = "species-count",
"media" = abort("type = 'media' is not supported by `count()`"),
abort("`count()` only supports `type = 'occurrences' or` `'species'`"))
x
}