Skip to content

Commit

Permalink
Merge pull request epiforecasts#453 from epiforecasts/bug-449-tidyr-c…
Browse files Browse the repository at this point in the history
…omplete

Bug epiforecasts#449 tidyr complete
  • Loading branch information
seabbs committed Feb 5, 2022
2 parents 20c280c + 2f8c4d7 commit 1ebcc74
Show file tree
Hide file tree
Showing 12 changed files with 84 additions and 46 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: covidregionaldata
Title: Subnational Data for COVID-19 Epidemiology
Version: 0.9.2.3000
Version: 0.9.2.4000
Authors@R:
c(person(given = "Joseph",
family = "Palmer",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -61,6 +61,7 @@ importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_vars)
importFrom(dplyr,if_else)
importFrom(dplyr,lag)
importFrom(dplyr,last_col)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Expand Up @@ -15,6 +15,7 @@ This release is currently under development

## Bug fixes

- Fixed a bug in `fill_empty_dates_with_na()` caused by changes made in version `1.2.0` of `tidyr`.
- Fixed a bug in the data sourced from Germany so that instead of treating it as a line list of individuals it is treated as a relatively finely resolved count data which needs to be summed up (by @sbfnk).

# covidregionaldata 0.9.2
Expand Down
13 changes: 9 additions & 4 deletions R/processing.R
Expand Up @@ -51,16 +51,21 @@ set_negative_values_to_zero <- function(data) {
#' @family compulsory_processing
#' @concept compulsory_processing
#' @importFrom tidyr complete full_seq nesting
#' @importFrom dplyr starts_with
#' @importFrom dplyr starts_with group_by ungroup group_vars
#' @importFrom rlang !!! syms
fill_empty_dates_with_na <- function(data) {
regions <- select(data, starts_with("level_")) %>%
names()

groups <- group_vars(data)

data <- data %>%
ungroup() %>%
complete(
date = full_seq(data$date, period = 1),
nesting(!!!syms(regions))
)
) %>%
group_by(across(.cols = all_of(groups)))
return(data)
}

Expand Down Expand Up @@ -258,9 +263,9 @@ process_internal <- function(clean_data, level, group_vars,
"hosp_new", "hosp_total", "tested_new", "tested_total"
)),
everything()
) %>%
arrange(.data$date, all_of(group_vars_standard[1]))
)
}

dat <- ungroup(dat)

if (localise) {
Expand Down
3 changes: 2 additions & 1 deletion R/shared-methods.R
Expand Up @@ -640,7 +640,8 @@ CountryDataClass <- R6::R6Class("CountryDataClass",
if (!is.null(self$target_regions)) {
self$target_regions <- countryname(
self$target_regions,
destination = "country.name.en"
destination = "country.name.en",
warn = FALSE
)
if (all(is.na(self$target_regions))) {
stop("No countries found with target names")
Expand Down
Binary file modified tests/testthat/custom_data/ecdc.rds
Binary file not shown.
Binary file modified tests/testthat/custom_data/mexico_level_1_snap.rds
Binary file not shown.
Binary file modified tests/testthat/custom_data/mexico_level_2_snap.rds
Binary file not shown.
Binary file modified tests/testthat/custom_data/who.rds
Binary file not shown.
18 changes: 14 additions & 4 deletions tests/testthat/custom_tests/mock_data.R
Expand Up @@ -207,12 +207,16 @@ get_expected_data_for_fill_empty_dates_with_na_test <- function() {
# full data is data with all dates/regions + some NAs in the cases column
expected_data <- data.frame(expand.grid(dates, regions))
colnames(expected_data) <- c("date", "level_1_region")

expected_data$date <- as.Date(expected_data$date)
expected_data$level_1_region <- as.character(expected_data$level_1_region)
expected_data <- expected_data %>%
dplyr::arrange(date, level_1_region) %>%
dplyr::left_join(region_codes, by = c("level_1_region" = "region"))
expected_data$cases <- c(1:5, rep(NA, 4), 10:12)
expected_data <- dplyr::select(
expected_data, date, level_1_region, level_1_region_code, everything()
)
return(dplyr::tibble(expected_data))
}

Expand All @@ -234,10 +238,16 @@ get_expected_data_for_complete_cumulative_columns_test <- function() {
partial_data <- expected_data[-c(6:9), ]

# manually add cumulative cases to get expected data
full_data_with_cum_cases_filled <- covidregionaldata:::fill_empty_dates_with_na(partial_data)
full_data_with_cum_cases_filled <- dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date)
full_data_with_cum_cases_filled <- cbind(full_data_with_cum_cases_filled, as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15)))
full_data_with_cum_cases_filled <- partial_data %>%
covidregionaldata:::fill_empty_dates_with_na()
full_data_with_cum_cases_filled <-
dplyr::arrange(full_data_with_cum_cases_filled, level_1_region, date)
full_data_with_cum_cases_filled <-
cbind(
full_data_with_cum_cases_filled,
as.integer(c(1, 5, 5, 15, 2, 7, 7, 18, 3, 3, 3, 15))
)
colnames(full_data_with_cum_cases_filled)[5] <- "cases_total"

return(dplyr::tibble(full_data_with_cum_cases_filled))
return(full_data_with_cum_cases_filled)
}
17 changes: 13 additions & 4 deletions tests/testthat/test-processing.R
Expand Up @@ -80,19 +80,28 @@ test_that("fill_empty_dates_with_na fills empty dates with NA", {
expected_data <- get_expected_data_for_fill_empty_dates_with_na_test()
# partial data deletes some rows (i.e. gets rid of some dates - all the ones
# with NA in cases)
expected_data <- expected_data

partial_data <- expected_data[-c(6:9), ]
expect_equal(fill_empty_dates_with_na(partial_data), expected_data)
expect_equal(
fill_empty_dates_with_na(partial_data),
expected_data
)
expected_data <- dplyr::mutate(
expected_data,
level_2_region = level_1_region,
level_2_region_code = level_1_region_code
) %>%
group_by(level_2_region, level_2_region_code) %>%
select(
date, level_2_region, level_2_region_code,
level_1_region, level_1_region_code, cases
date, level_1_region, level_1_region_code,
level_2_region, level_2_region_code, cases
)
partial_data <- expected_data[-c(6:9), ]
expect_equal(fill_empty_dates_with_na(partial_data), expected_data)
expect_equal(
fill_empty_dates_with_na(partial_data),
expected_data
)
})

test_that("complete_cumulative_columns works", {
Expand Down
75 changes: 43 additions & 32 deletions tests/testthat/test-regional-datasets.R
Expand Up @@ -19,40 +19,51 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
download <- getOption("testDownload")
}

# get datasets for testing
sources <- get_available_datasets() %>%
dplyr::filter(.data$type %in%
c("national", "regional")) %>%
dplyr::select(
source = class,
level_1_region, level_2_region, level_3_region
) %>%
tidyr::pivot_longer(
cols = -source,
names_to = "level",
values_to = "regions"
) %>%
dplyr::mutate(
level = stringr::str_split(level, "_"),
level = purrr::map_chr(level, ~ .[2])
) %>%
tidyr::drop_na(regions)

# filter out target datasets
if (!is.null(source_of_interest)) {
sources <- sources %>%
dplyr::filter(source %in% source_of_interest)
test_regions <- TRUE
}else{
test_regions <- FALSE
}
if (!is.null(getOption("testRegions"))) {
test_regions <- getOption("testRegions")
}

# apply tests to each data source in turn
sources %>%
dplyr::rowwise() %>%
dplyr::group_split() %>%
purrr::walk(
~ test_regional_dataset(
source = .$source[[1]],
level = .$level[[1]],
download = download
if (test_regions) {
# get datasets for testing
sources <- get_available_datasets() %>%
dplyr::filter(.data$type %in%
c("national", "regional")) %>%
dplyr::select(
source = class,
level_1_region, level_2_region, level_3_region
) %>%
tidyr::pivot_longer(
cols = -source,
names_to = "level",
values_to = "regions"
) %>%
dplyr::mutate(
level = stringr::str_split(level, "_"),
level = purrr::map_chr(level, ~ .[2])
) %>%
tidyr::drop_na(regions)

# filter out target datasets
if (!is.null(source_of_interest)) {
sources <- sources %>%
dplyr::filter(source %in% source_of_interest)
}

# apply tests to each data source in turn
sources %>%
dplyr::rowwise() %>%
dplyr::group_split() %>%
purrr::walk(
~ test_regional_dataset(
source = .$source[[1]],
level = .$level[[1]],
download = download
)
)
)
}
}

0 comments on commit 1ebcc74

Please sign in to comment.