-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#' Process eBird bar chart data | ||
#' | ||
#' eBird bar charts show the frequency of detection for each week for all | ||
#' species within a region. These can be accessed by visiting any region or | ||
#' hotspot page and clicking the "Bar Charts" link in the left column. As an | ||
#' example, these [bar charts for | ||
#' Guatemala](https://ebird.org/barchart?r=GT&yr=all&m=) list all the species | ||
#' (as well as non-species taxa) that have been observed in eBird in Guatemala | ||
#' and, for each species, the width of the green bar reflects the frequency of | ||
#' detections on eBird checklists within the region (referred to as detection | ||
#' frequency). Detection frequency is provide for each of 4 "weeks" of each | ||
#' month (although these are not technically 7 day weeks since months have more | ||
#' than 28 days). The data underlying the bar charts can be downloaded via a | ||
#' link at the bottom right of the page; however, the text file that's | ||
#' downloaded is in a challenging format to work with. This function is designed | ||
#' to read these text files and return a nicely formatted data frame for use in | ||
#' R. | ||
#' | ||
#' @param filename character; path to the bar chart data text file downloaded | ||
#' from the eBird website. | ||
#' | ||
#' @return This functions returns a data frame in long format where each row | ||
#' provides data for one species in one week. `detection_frequency` gives the | ||
#' proportion of checklists in the region that reported the species in the | ||
#' given week and `n_detections` gives the number of detections. The total | ||
#' number of checklists in each week used to estimate detection frequency is | ||
#' provided as a data frame stored in the `sample_sizes` attribute. | ||
#' | ||
#' @export | ||
#' @family helpers | ||
#' @examples | ||
#' # example bar chart data for svalbard | ||
#' f <- system.file("extdata/barchart-sample.txt", package = "auk") | ||
#' # import and process barchart data | ||
#' barchart <- process_barcharts(f) | ||
#' head(barchart) | ||
#' | ||
#' # the sample sizes for each week can be access with | ||
#' attr(barchart, "sample_sizes") | ||
process_barcharts <- function(filename) { | ||
stopifnot(is.character(filename), file.exists(filename)) | ||
|
||
l <- readLines(filename) | ||
l <- l[l != ""] | ||
|
||
# column headers | ||
month_week <- tidyr::expand_grid(month = tolower(month.abb), week = seq_len(4)) | ||
week_vars <- paste(month_week$month, month_week$week, sep = "_") | ||
|
||
# number of checklists per week | ||
ss_row <- which(stringr::str_detect(l, "Sample Size:\t")) | ||
if (length(ss_row) != 1) { | ||
stop("The barchart data is in an unexpected format and cannot be read. ", | ||
"This function can only process unmodified data downloaded directly ", | ||
"from the eBird website.") | ||
} | ||
ss <- stringr::str_remove(l[ss_row], "Sample Size:\t") | ||
ss <- as.integer(stringr::str_split_1(ss, "\t")[seq_len(48)]) | ||
ss <- dplyr::bind_cols(month_week, n_checklists = ss) | ||
|
||
# detection frequency | ||
detfrq <- l[seq(ss_row + 1, length(l))] | ||
cn <- c("common_name", week_vars, "blank") | ||
ct <- c("c", rep("d", times = length(cn) - 2), "c") | ||
ct <- paste(ct, collapse = "") | ||
detfrq <- readr::read_tsv(I(detfrq), col_names = cn, col_types = ct) | ||
detfrq$blank <- NULL | ||
# transform to long | ||
detfrq <- tidyr::pivot_longer(detfrq, cols = -"common_name", | ||
values_to = "detection_frequency") | ||
detfrq <- tidyr::separate(detfrq, col = "name", into = c("month", "week")) | ||
detfrq$week <- as.integer(detfrq$week) | ||
detfrq$name <- NULL | ||
|
||
# add in species codes | ||
tax <- auk::ebird_taxonomy | ||
tax <- tax[, c("species_code", "common_name", "scientific_name")] | ||
detfrq <- dplyr::inner_join(tax, detfrq, by = "common_name") | ||
|
||
# add in num detections | ||
detfrq <- dplyr::inner_join(detfrq, ss, by = c("month", "week")) | ||
detfrq$n_detections <- round(detfrq$n_checklists * detfrq$detection_frequency) | ||
detfrq$n_checklists <- NULL | ||
detfrq <- dplyr::as_tibble(detfrq) | ||
|
||
attr(detfrq, "sample_sizes") <- ss | ||
return(detfrq) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
f_src <- "~/data/ebird/auk/ebird_SJ__2014_2024_1_12_barchart.txt" | ||
f_dst <- "inst/extdata/barchart-sample.txt" | ||
file.copy(f_src, f_dst) |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.