Skip to content

Commit

Permalink
Change data source for Vietnam
Browse files Browse the repository at this point in the history
  • Loading branch information
Vang Le-Quy committed Sep 13, 2021
1 parent a66539f commit 0f0cafb
Showing 1 changed file with 55 additions and 28 deletions.
83 changes: 55 additions & 28 deletions R/Vietnam.R
Expand Up @@ -31,18 +31,21 @@ Vietnam <- R6::R6Class("Vietnam",
#' @field common_data_urls List of named links to raw data.
# nolint start
common_data_urls = list(
"main" = "https://docs.google.com/spreadsheets/d/1_d7oK-SKj-7KrWAW7DbGYEad2JO4TyR7ApsUAuoiH5g/export?format=csv&gid=0"
"case_by_time" = 'https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=case_by_time',
"death_by_time" = 'https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=death_by_time',
"recovered_by_time" = 'https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=recovered_by_time'

),
# nolint end
#' @field source_data_cols existing columns within the raw data
source_data_cols = c(
"cases_new", "deaths_total"
"cases_total", "deaths_total", "recovered_total"
),
#' @field source_text Plain text description of the source of the data
source_text = "Public COVID-19 data curated by 5F team",
#' @field source_url Website address for explanation/introduction of the
#' data
source_url = "https://datastudio.google.com/u/0/reporting/1cc8d45e-2c74-4084-af70-cbbe60f1660e/page/bLUVC", # nolint
source_url = "https://covid.ncsc.gov.vn", # nolint

#' @description Set up a table of region codes for clean data
#' @importFrom tibble tibble
Expand All @@ -55,32 +58,56 @@ Vietnam <- R6::R6Class("Vietnam",
#' @param ... pass additional arguments
#'
#' @importFrom dplyr filter select mutate rename
#' @importFrom tidyr replace_na
#' @importFrom lubridate dmy
#' @importFrom tidyr replace_na drop_na
#' @importFrom lubridate dmy
#' @importFrom jsonlite fromJSON
clean_common = function() {
self$data$clean <- self$data$raw[["main"]] %>%
`colnames<-`(c('date', 'region', 'cases_new',
'case_group', 'deaths_date',
'deaths_region', 'deaths_total')) %>%
select(
date, region, cases_new, deaths_date,
deaths_region, deaths_total
) %>%
mutate(date = dmy(date),
deaths_date = dmy(deaths_date),
cases_new = as.numeric(cases_new),
deaths_total = as.numeric(deaths_total)) %>%
full_join(x = select(., date, region, cases_new)%>%
group_by(date, region)%>%
mutate(cases_new=sum(cases_new))%>%distinct(),
y=select(., deaths_date, deaths_region, deaths_total) %>%
group_by(deaths_date, deaths_region)%>%
mutate(deaths_total=first(deaths_total))%>%distinct(),
by=c("date" = "deaths_date",
"region" = "deaths_region"
)) %>%
tidyr::drop_na(date, region) %>%
rename(level_1_region = region) %>%
Sys.setenv("VROOM_CONNECTION_SIZE" = 131072*4) # Fix VROOM error
provines_url = 'https://covid.ncsc.gov.vn/api/v3/covid/provinces'
bundles = names(self$data$raw)
provines_data = jsonlite::fromJSON(provines_url)

get_bundles_data = function(bundles){
bundles_data = list()
for (bundle in bundles){
url = paste0('https://covid.ncsc.gov.vn/api/v3/covid/provinces?filter_type=', bundle)
data = jsonlite::fromJSON(url)
bundles_data = c(bundles_data, setNames(list(data), bundle))
}
bundles_data
}

bundles_data = get_bundles_data(bundles)

get_province = function(id, data){
row_dat = provines_data[(id=id),]
death_by_time= do.call(cbind, data$death_by_time[id])
case_by_time=do.call(cbind, data$case_by_time[id])
recovered_by_time=do.call(cbind, data$recovered_by_time[id])
if (!identical(row.names(death_by_time), row.names(death_by_time))) {
stop("Dates on case_by_time and death_by_time do not match!")
}
df = dplyr::tibble(date= lubridate::dmy(row.names(case_by_time)),
id = row_dat$id,
name = row_dat$name,
case_by_time= case_by_time,
death_by_time= death_by_time,
recovered_by_time= recovered_by_time)
df
}

df = do.call(rbind, lapply(provines_data$id, function(id){get_province(id, bundles_data)}))
names(df) <- c("date", "id", "region_name", "cases_total", "deaths_total", "recovered_total")

self$data$clean <- df %>%
select( date, region_name, cases_total, deaths_total, recovered_total) %>%
mutate(cases_total = as.numeric(cases_total),
deaths_total = as.numeric(deaths_total),
recovered_total = as.numeric(recovered_total),
region_name = stringr::str_replace_all(region_name, 'TP HCM', 'Hochiminh'),
) %>%
tidyr::drop_na(date, region_name) %>%
rename(level_1_region = region_name) %>%
mutate(
level_1_region = stringi::stri_trans_general(level_1_region, "latin-ascii"),
level_1_region = stringi::stri_trim_both(level_1_region),
Expand Down

0 comments on commit 0f0cafb

Please sign in to comment.