Skip to content

Commit

Permalink
style changes for utilities. Update nameMetric to avoid leaking globals
Browse files Browse the repository at this point in the history
  • Loading branch information
Protonk committed Oct 11, 2013
1 parent ffcbd7a commit 60cbf7f
Showing 1 changed file with 65 additions and 39 deletions.
104 changes: 65 additions & 39 deletions R/util.R
@@ -1,6 +1,6 @@
#####
###
### External Utility functions for handling name data
### External utility functions for handling name data
###
#####

Expand Down Expand Up @@ -29,28 +29,40 @@ byNameCount <- function(data) {
# restrict to gender passed by argument
years <- count(data[data[, gender] > 0, ], vars = c("Name", gender))
# count gender-year
name.count <- with(years, rowsum(freq * get(gender), Name, reorder = FALSE))
name.count <- with(years,
rowsum(freq * get(gender), Name, reorder = FALSE)
)
# year only
year.count <- with(years, rowsum(freq, Name, reorder = FALSE))
year.df <- data.frame(Name = rownames(year.count),
count = name.count,
Appearances = year.count,
stringsAsFactors = FALSE)
names(year.df) <- c("Name",
paste0("Count", substitute(gender)),
paste0("Sumyears", substitute(gender)))
year.count <- with(years,
rowsum(freq, Name, reorder = FALSE)
)
year.df <- data.frame(
Name = rownames(year.count),
count = name.count,
Appearances = year.count,
stringsAsFactors = FALSE
)
names(year.df) <- c(
"Name",
paste0("Count", substitute(gender)),
paste0("Sumyears", substitute(gender))
)

return(unrowname(year.df))
}

# populate name column quickly
data.out <- data.frame(Name = sort(unique(data[, "Name"])),
freq = count(data, "Name")[, 2],
stringsAsFactors = FALSE)
data.out <- data.frame(
Name = sort(unique(data[, "Name"])),
freq = count(data, "Name")[, 2],
stringsAsFactors = FALSE
)

data.out <- cbind(data.out, merge(countYears(data, "M"),
countYears(data, "F"),
by = "Name", all = TRUE)[, -1])
data.out <- cbind(data.out,
merge(countYears(data, "M"),
countYears(data, "F"),
by = "Name", all = TRUE)[, -1]
)


# cleanup NAs generated from merging countYears()
Expand Down Expand Up @@ -82,13 +94,17 @@ byNameCount <- function(data) {

yearBirths <- function(data, bounds = NULL) {
countBy <- function(x = c("Male", "Female")) {
# Contingency table for births
births <- with(data, rowsum(get(x), group = Year))
out <- data.frame(Year = as.numeric(rownames(births)),
Births = unname(births),
Gender = match.arg(x))
return(out)
}
# Contingency table for births
births <- with(data,
rowsum(get(x), group = Year)
)
out <- data.frame(
Year = as.numeric(rownames(births)),
Births = unname(births),
Gender = match.arg(x)
)
return(out)
}
data.out <- rbind(countBy("M"), countBy("F"))
# bounds checking if provided
if (length(bounds) == 2) {
Expand All @@ -107,29 +123,39 @@ yearBirths <- function(data, bounds = NULL) {
#' @param data A data frames with columns for Name, M, F, and Year
#' e.g. one returned by \code{\link{usnames}}
#' @param names A character vector of names (potentially of length 1)
#' @param range An (optional) numeric vector of length 2
#' @param bounds An (optional) numeric vector of length 2
#' with the start and end years inclusive
#' @param metric A character vector of length 1. "Male", "Female" or "Neutral"
#' @return A single data frame with columns for the metric, number of births
#' the years and the name.
#' @export
nameMetric <- function(data, names, bounds = NULL,
metric) {
data <- subset(data, Name %in% names)
nameMetric <- function(data, names, bounds = NULL, metric) {
matches <- data[, "Name"] %in% names
if(all(matches == 0)) {
stop("Names not found")
}
data <- data[matches, ]

metric <- match.arg(metric, choices = c("Male", "Female", "Neutral"))


nameTotal <- function(name.single) {
singleton <- subset(data, Name == name.single)
tot <- with(singleton, rowsum((M + F), group = Year))
metric <- match.arg(metric, choices = c("Male", "Female", "Neutral"))
singleton <- data[data[, "Name"] == name.single, ]
tot <- with(singleton,
rowsum((M + F), group = Year)
)
metFun <- switch(metric,
Male = singleton[, "M"] / tot,
Female = singleton[, "F"] / tot,
Neutral = 1 - abs(0.5 - singleton[, "M"] / tot) * 2
)
single.df <- data.frame(Proportion = metFun,
Metric = metric,
Births = tot,
Year = as.numeric(rownames(tot)),
Name = name.single)
Male = singleton[, "M"] / tot,
Female = singleton[, "F"] / tot,
Neutral = 1 - abs(0.5 - singleton[, "M"] / tot) * 2
)
single.df <- data.frame(
Name = name.single,
Births = tot,
Year = as.numeric(rownames(tot)),
Metric = metric,
Proportion = metFun
)
return(single.df)
}
names.list <- lapply(names, nameTotal)
Expand Down

0 comments on commit 60cbf7f

Please sign in to comment.