Skip to content

Commit

Permalink
Merge pull request #53 from PublicHealthEngland/DEVELOPMENT_MASTER
Browse files Browse the repository at this point in the history
Development master
  • Loading branch information
PHEgeorginaanderson committed Apr 11, 2020
2 parents cd54581 + 83398cc commit 210ff4b
Show file tree
Hide file tree
Showing 18 changed files with 142 additions and 70 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: PHEindicatormethods
Type: Package
Version: 1.3.0
Version: 1.3.1
Title: Common Public Health Statistics and their Confidence Intervals
Description: Functions to calculate commonly used public health statistics and
their confidence intervals using methods approved for use in the production
Expand Down Expand Up @@ -36,11 +36,12 @@ Imports:
broom,
tidyr,
purrr,
stats
stats,
tibble
Suggests:
knitr,
readxl,
rmarkdown,
testthat
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
VignetteBuilder: knitr
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -20,6 +20,7 @@ importFrom(rlang,sym)
importFrom(stats,lm)
importFrom(stats,qnorm)
importFrom(stats,rnorm)
importFrom(tibble,as_tibble)
importFrom(tidyr,nest)
importFrom(tidyr,spread)
importFrom(tidyr,unnest)
9 changes: 9 additions & 0 deletions NEWS.md
@@ -1,3 +1,12 @@
## PHEindicatormethods v1.3.1

`phe_proportion`, `phe_rate`, `phe_quantile`, `phe_life_expectancy`, `phe_sii`:
Functions amended to ensure continued compatibility with dplyr when v1.0.0 is released.
These changes will not be noticeable to end users.

`phe_life_expectancy`: dropped population and death columns from output as these are no longer applicable to the final LE statistic. Added pops_used and dths_used columns to output when type = 'full' which reflect the cumulative populations and deaths used in each LE calculation (ie the pops and deaths for all ages equal to or above the Life_Expectancy_At age)


## PHEindicatormethods v1.3.0
* `phe_sii function` updated to be able to output multiple confidence intervals
* `phe_life_expectancy()` function previously calculated life expectancy and confidence levels inaccurately when the number of deaths in all age groups except the final one was 40% or more of the size of the population in any age group (apart from the 1–4 age group where it’s 50%). It had an affect in very few scenarios, but was inaccurate nonetheless. This has now been corrected to agree with the published methodology
Expand Down
50 changes: 39 additions & 11 deletions R/LifeExpectancy.R
Expand Up @@ -20,6 +20,9 @@
#' added details on the calculation within the dataframe); quoted
#' string; default full
#' @inheritParams phe_dsr
#' @return returns a data frame containing the life expectancies and confidence intervals
#' for each le_age requested. When type = 'full' additionally returns the cumulative
#' populations and deaths used in each LE calculation and metadata indicating parameters passed.
#' @details This function aligns with the methodology in Public Health England's
#' \href{https://fingertips.phe.org.uk/documents/PHE\%20Life\%20Expectancy\%20Calculator.xlsm}{Life Expectancy Excel Tool}.
#'
Expand Down Expand Up @@ -70,6 +73,7 @@
#' @inheritParams phe_dsr
#' @import dplyr
#' @importFrom purrr map_chr
#' @importFrom tibble as_tibble
#' @examples
#' library(dplyr)
#'
Expand Down Expand Up @@ -195,11 +199,14 @@ phe_life_expectancy <- function(data, deaths, population, startage,
mutate_at(vars(grouping_factors), as.character) %>% #stops warning in cases where filters result in 0 records
group_by_at(group_vars(data))
}
negative_deaths <- negative_deaths %>%

negative_deaths <- as_tibble(negative_deaths) %>%
group_by_at(group_vars(data)) %>%
filter({{ deaths }} < 0) %>%
count() %>%
filter(n != 0) %>%
select(-n)

if (nrow(negative_deaths) > 0) {
warning("some age bands have negative deaths; outputs have been suppressed to NAs")
if (length(group_vars(data)) > 0) {
Expand All @@ -216,10 +223,8 @@ phe_life_expectancy <- function(data, deaths, population, startage,
select(-startage_2b_removed)
return(data)
}



}

# check for less than or equal to zero pops
negative_pops <- data
if (length(group_vars(data)) > 0) {
Expand All @@ -228,11 +233,14 @@ phe_life_expectancy <- function(data, deaths, population, startage,
mutate_at(vars(grouping_factors), as.character) %>% #stops warning in cases where filters result in 0 records
group_by_at(group_vars(data))
}
negative_pops <- negative_pops %>%

negative_pops <- as_tibble(negative_pops) %>%
group_by_at(group_vars(data)) %>%
filter({{ population }} <= 0) %>%
count() %>%
filter(n != 0) %>%
select(-n)

if (nrow(negative_pops) > 0) {
warning("some age bands have a zero or less population; outputs have been suppressed to NAs")
if (length(group_vars(data)) > 0) {
Expand All @@ -249,13 +257,12 @@ phe_life_expectancy <- function(data, deaths, population, startage,
select(-startage_2b_removed)
return(data)
}



}

# check for all rows per group
number_age_bands <- 20 #length(age_contents)
incomplete_areas <- data %>%
incomplete_areas <- as_tibble(data) %>%
group_by_at(group_vars(data)) %>%
count()
if (length(group_vars(data)) > 0) {
incomplete_areas <- incomplete_areas %>%
Expand Down Expand Up @@ -296,11 +303,13 @@ phe_life_expectancy <- function(data, deaths, population, startage,
mutate_at(vars(grouping_factors), as.character) %>% #stops warning in cases where filters result in 0 records
group_by_at(group_vars(data))
}
deaths_more_than_pops <- deaths_more_than_pops %>%
deaths_more_than_pops <- as_tibble(deaths_more_than_pops) %>%
group_by_at(group_vars(data)) %>%
filter({{ deaths }} > {{ population }}) %>%
count() %>%
filter(n != 0) %>%
select(-n)

if (nrow(deaths_more_than_pops) > 0) {
warning("some age bands have more deaths than population; outputs have been suppressed to NAs")
if (length(group_vars(data)) > 0) {
Expand Down Expand Up @@ -440,6 +449,21 @@ phe_life_expectancy <- function(data, deaths, population, startage,

if (nrow(suppressed_data) > 0) data <- bind_rows(data, suppressed_data)

# calculate cumulative pops and deaths used in each calc (sum for all startages >= startage)
cumdata <- data %>%
arrange(desc(startage_2b_removed)) %>%
select(startage_2b_removed, {{population}}, {{deaths}}) %>%
mutate(pops_used = cumsum({{population}}),
dths_used = cumsum({{deaths}})) %>%
select(-{{population}}, -{{deaths}}) %>%
arrange(startage_2b_removed)

# join cumulative deaths and pops to data frame and drop original deaths and pops
join_vars <- c(group_vars(data), "startage_2b_removed")
data <- data %>%
left_join(cumdata, by = join_vars) %>%
select(-{{population}}, -{{deaths}})

data <- data %>%
select(-ends_with("_2b_removed"))
if (length(le_age) == 1) {
Expand All @@ -465,7 +489,11 @@ phe_life_expectancy <- function(data, deaths, population, startage,
mutate(confidence = paste0(confidence * 100, "%", collapse = ", "),
statistic = paste("life expectancy at", {{ startage }}),
method = "Chiang, using Silcocks et al for confidence limits")
} else {
data <- data %>%
select(-pops_used, -dths_used)
}
return(data)

return(as.data.frame(data))

}
6 changes: 6 additions & 0 deletions R/PHEindicatormethods.R
@@ -1,6 +1,9 @@
#' PHEindicatormethods: A package for performing standard statistics for public
#' health indicators
#'
#' A package for performing standard statistics for public
#' health indicators.
#'
#' @docType package
#' @name PHEindicatormethods
NULL
Expand All @@ -10,6 +13,7 @@ globalVariables(c("a_vals",
"b_sqrt_a",
"b_vals",
"CI_params",
"dths_used",
"ei",
"esp2013",
"estimate",
Expand All @@ -25,10 +29,12 @@ globalVariables(c("a_vals",
"method",
"model",
"naflag",
"na_flag",
"nrefpop_calc",
"observed",
"p_2b_removed",
"p.value",
"pops_used",
"qchisq",
"qi_2b_removed",
"qinverted",
Expand Down
2 changes: 1 addition & 1 deletion R/Proportions.R
Expand Up @@ -83,7 +83,7 @@ phe_proportion <- function(data, x, n, type="full", confidence=0.95, multiplier=


# if data is grouped then summarise
if(!is.null(groups(data))) {
if(!n_groups(data) == 1) {
data <- data %>%
summarise({{ x }} := sum({{ x }}),
{{ n }} := sum({{ n }}))
Expand Down
11 changes: 6 additions & 5 deletions R/Quantiles.R
Expand Up @@ -90,22 +90,23 @@ phe_quantile <- function(data, values, highergeog = NULL, nquantiles=10L,
# error handling for valid data types and values
if (!(is.numeric(pull(data, {{ values }})))) {
stop("values argument must be a numeric field from data")

}

#check all invert values are identical within groups
} else if (nrow(count(data,invert_calc)) != nrow(count(data))) {
if (!n_groups(data) == nrow(unique(select(data,invert_calc)))) {
stop("invert field values must take the same logical value for each data grouping set")
}


# assign quantiles
phe_quantile <- data %>%
add_count(naflag = is.na({{ values }})) %>%
mutate(naflag = if_else(is.na({{ values }}),0,1)) %>%
add_count(name = "na_flag", wt = naflag) %>%
mutate(adj_value = if_else(invert_calc == TRUE, max({{ values }}, na.rm=TRUE)-{{ values }},{{ values }}),
rank = rank(adj_value, ties.method="min", na.last = "keep"),
quantile = floor((nquantiles+1)-ceiling(((n+1)-rank)/(n/nquantiles))),
quantile = floor((nquantiles+1)-ceiling(((na_flag+1)-rank)/(na_flag/nquantiles))),
quantile = if_else(quantile == 0,1,quantile)) %>%
select(-naflag,-n,-adj_value, -rank) %>%
select(-naflag, -na_flag, -adj_value, -rank) %>%
mutate(nquantiles= nquantiles,
groupvars = paste0(group_vars(data),collapse = ", "),
qinverted = if_else(invert_calc == TRUE,"lowest quantile represents highest values",
Expand Down
2 changes: 1 addition & 1 deletion R/Rates.R
Expand Up @@ -75,7 +75,7 @@ phe_rate <- function(data,x, n, type = "full", confidence = 0.95, multiplier = 1


# if data is grouped then summarise
if(!is.null(groups(data))) {
if(!n_groups(data) == 1) {
data <- data %>%
summarise({{ x }} := sum({{ x }}),
{{ n }} := sum({{ n }}))
Expand Down

0 comments on commit 210ff4b

Please sign in to comment.