Skip to content

Commit

Permalink
Bugfix pass pre-CRAN (#60)
Browse files Browse the repository at this point in the history
* Added test to minor draw_binary_icc case.

* Changes to error handling for draw_binary_icc and draw_normal_icc; added file to Rbuildignore

* Split of cross_level to cross_levels and link_levels. Documentation pass. Bugfixes. Fixing a broken test on R-develpp

* Webpage doc push and vignette updates.

* Another quick pass.
  • Loading branch information
aaronrudkin committed Jan 24, 2018
1 parent 775eb4b commit b00c024
Show file tree
Hide file tree
Showing 34 changed files with 747 additions and 512 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ _pkgdown.yml
cran-comments.md
^NEWS\.md$
^_pkgdown\.yml$
fabricatr_*.tar.gz
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

export(ALL)
export(add_level)
export(cross_level)
export(cross_levels)
export(draw_binary)
export(draw_binary_icc)
export(draw_binomial)
Expand All @@ -14,6 +14,7 @@ export(draw_ordered)
export(fabricate)
export(join)
export(level)
export(link_levels)
export(modify_level)
export(nest_level)
export(resample_data)
Expand Down
14 changes: 7 additions & 7 deletions R/cross_classify_helpers.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
panel_dfs <- function(dfs) {
# Error handling
if (is.data.frame(dfs) || length(dfs) < 2) {
stop("You must specify at least two data frames in a `cross_level()` call.")
stop("You must specify at least two data frames in a `cross_levels()` call.")
}

# Do repeated merges
Expand All @@ -16,13 +16,13 @@ panel_dfs <- function(dfs) {
join_dfs <- function(dfs, variables, N, sigma=NULL, rho=0) {
# Error handling
if (is.data.frame(dfs)) {
stop("You must specify at least two data frames in a `cross_level()` call.")
stop("You must specify at least two data frames in a `link_levels()` call.")
}
if (length(dfs) != length(variables)) {
stop("You must define which variables to join in a `cross_level()` call.")
stop("You must define which variables to join in a `link_levels()` call.")
}
if (length(variables) < 2) {
stop("You must define at least two variables to join on in a `cross_level()` call.")
stop("You must define at least two variables to join on in a `link_levels()` call.")
}

# Create the data list -- the subset from the dfs of the variables we're
Expand Down Expand Up @@ -73,7 +73,7 @@ joint_draw_ecdf <- function(data_list, N, ndim=length(data_list),

# Error handling for N
if (is.null(N) || is.na(N) || !is.atomic(N) || length(N) > 1 || N <= 0) {
stop("N for `cross_level()` calls must be a single integer that is positive.")
stop("N for `link_levels()` calls must be a single integer that is positive.")
}

# Error handling for rho, if specified
Expand All @@ -100,7 +100,7 @@ joint_draw_ecdf <- function(data_list, N, ndim=length(data_list),
diag(sigma) <- 1
} else {
stop(
"If `rho` is specified in a `cross_level()` call, it must be a single ",
"If `rho` is specified in a `link_levels()` call, it must be a single ",
"number"
)
}
Expand Down Expand Up @@ -139,7 +139,7 @@ joint_draw_ecdf <- function(data_list, N, ndim=length(data_list),
) %*% right_chol

message(
"`cross_level()` calls are faster if the `mvnfast` package is ",
"`link_levels()` calls are faster if the `mvnfast` package is ",
"installed."
)
} else {
Expand Down
65 changes: 42 additions & 23 deletions R/cross_level.R → R/cross_levels.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@
#' panel <- fabricate(
#' countries = add_level(N = 20, country_shock = runif(N, 1, 10)),
#' years = add_level(N = 20, year_shock = runif(N, 1, 10), nest=FALSE),
#' obs = cross_level(by=join(countries, years), GDP_it = country_shock + year_shock)
#' obs = cross_levels(by=join(countries, years), GDP_it = country_shock + year_shock)
#' )
#'
#' # Include an "N" argument to allow for cross-classified
#' # data.
#' students <- fabricate(
#' primary_school = add_level(N = 20, ps_quality = runif(N, 1, 10)),
#' secondary_school = add_level(N = 15, ss_quality = runif(N, 1, 10), nest=FALSE),
#' students = cross_level(N = 500, by = join(primary_school, secondary_school))
#' students = link_levels(N = 500, by = join(primary_school, secondary_school))
#' )
#' head(students)
#'
Expand All @@ -43,13 +43,35 @@
#' students <- fabricate(
#' primary_school = add_level(N = 20, ps_quality = runif(N, 1, 10)),
#' secondary_school = add_level(N = 15, ss_quality = runif(N, 1, 10), nest=FALSE),
#' students = cross_level(N = 500, by = join(ps_quality, ss_quality, rho = 0.5))
#' students = link_levels(N = 500, by = join(ps_quality, ss_quality, rho = 0.5))
#' )
#' cor(students$ps_quality, students$ss_quality)
#'
#' @importFrom rlang quos
#' @export
cross_levels <- function(by = NULL,
...) {
data_arguments <- quos(...)
if ("N" %in% names(data_arguments) ||
!is.null(by$sigma) || by$rho) {
stop(
"`cross_levels()` calls are used to create full panels and cannot take ",
"`N` arguments or correlation structures."
)
}

link_levels(
N = NULL,
by = by,
...
)
}

#' @importFrom rlang quos get_expr
#'
#' @rdname cross_levels
#' @export
cross_level <- function(N = NULL,
link_levels <- function(N = NULL,
by = NULL,
...) {
data_arguments <- quos(...)
Expand All @@ -60,26 +82,29 @@ cross_level <- function(N = NULL,
# This happens if either an add_level call is run external to a fabricate
# call OR if add_level is the only argument to a fabricate call and
# the data argument tries to resolve an add_level call.
stop("`cross_level()` calls must be run inside `fabricate()` calls.")
stop(
"`cross_levels()` and `link_levels()` calls must be run inside ",
"`fabricate()` calls."
)
}
if ("ID_label" %in% names(data_arguments)) {
ID_label <- get_expr(data_arguments[["ID_label"]])
data_arguments[["ID_label"]] <- NULL
}

return(cross_level_internal(
return(cross_levels_internal(
N = N, ID_label = ID_label, by = by,
working_environment_ = working_environment_,
data_arguments = data_arguments
))
}

#' @importFrom rlang quo_text eval_tidy
cross_level_internal <- function(N = NULL,
ID_label = NULL,
working_environment_ = NULL,
by = NULL,
data_arguments = NULL) {
cross_levels_internal <- function(N = NULL,
ID_label = NULL,
working_environment_ = NULL,
by = NULL,
data_arguments = NULL) {
if (any(!c("data_frame_output_", "shelved_df") %in%
names(working_environment_))) {
stop(
Expand Down Expand Up @@ -139,7 +164,7 @@ cross_level_internal <- function(N = NULL,
stop(
"The variable name ",
variable_names[i],
" that you specified as part of your `cross_level()` join was not ",
" that you specified as part of your `cross_levels()` join was not ",
"found in any of the level hierarchies"
)
}
Expand All @@ -158,15 +183,6 @@ cross_level_internal <- function(N = NULL,
simplify = FALSE
)

if (is.null(N) && (!is.null(by$sigma) || by$rho)) {
stop(
"When `N` is null in a `cross_level()` call, the data generated is a ",
"complete panel of all observations in each data frame and cannot have ",
"a specified correlation structure. Please remove the correlation structure ",
"from the `by` argument."
)
}

# Do the join.
if (!is.null(N)) {
out <- join_dfs(data_frame_objects, variable_names, N, by$sigma, by$rho)
Expand Down Expand Up @@ -200,13 +216,16 @@ cross_level_internal <- function(N = NULL,
}

#' Helper function handling specification of which variables to join a
#' cross-classified data on, and what kind of correlation structure needed
#' cross-classified data on, and what kind of correlation structure needed.
#' Correlation structures can only be provided if the underlying call is
#' a `link_levels()` call.
#'
#' @param ... A series of two or more variable names, unquoted, to join on in
#' order to create cross-classified data.
#' @param rho A fixed (Spearman's rank) correlation coefficient between the
#' variables being joined on: note that if it is not possible to make a
#' correlation matrix from this coefficient (e.g. if you are joining on three
#' or more variables and rho is negative) then the \code{cross_level()} call
#' or more variables and rho is negative) then the \code{cross_levels()} call
#' will fail. Do not provide \code{rho} if making panel data.
#' @param sigma A matrix with dimensions equal to the number of variables you
#' are joining on, specifying the correlation for the resulting joined data.
Expand Down
8 changes: 8 additions & 0 deletions R/draw_binary_icc.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ draw_binary_icc <- function(prob = 0.5, N = NULL, clusters, ICC = 0) {
}

# Let's not worry about how clusters are provided
if(!is.null(dim(clusters))) {
stop(
"You must provide cluster IDs for draw_normal_icc as a vector, not a ",
"higher dimensional object like a data frame or similar."
)
}

tryCatch({
clusters <- as.numeric(as.factor(clusters))
}, error = function(e) {
Expand All @@ -47,6 +54,7 @@ draw_binary_icc <- function(prob = 0.5, N = NULL, clusters, ICC = 0) {
"argument is numeric, factor, or can be coerced into being a factor."
)
})

number_of_clusters <- length(unique(clusters))

# Sanity check N
Expand Down
7 changes: 7 additions & 0 deletions R/draw_normal_icc.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ draw_normal_icc <- function(mean = 0,
ICC = NULL) {

# Let's not worry about how clusters are provided
if(!is.null(dim(clusters))) {
stop(
"You must provide cluster IDs for draw_normal_icc as a vector, not a ",
"higher dimensional object like a data frame or similar."
)
}

tryCatch({
clusters <- as.numeric(as.factor(clusters))
}, error = function(e) {
Expand Down
15 changes: 8 additions & 7 deletions R/fabricate.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#' \code{N}. Create hierarchical data with multiple levels of data such as
#' citizens within cities within states using \code{add_level()} or modify
#' existing hierarchical data using \code{modify_level()}. You can use any R
#' function to create each variable. Use \code{cross_level()} to make more
#' complex designs such as panel or cross-classified data.
#' function to create each variable. Use \code{cross_levels()} and
#' \code{link_levels()} to make more complex designs such as panel or
#' cross-classified data.
#'
#' We also provide several built-in options to easily create variables, including
#' \code{\link{draw_binary}}, \code{\link{draw_count}}, \code{\link{draw_likert}},
Expand Down Expand Up @@ -75,14 +76,14 @@
#'
#' # fabricatr can also make panel or cross-classified data. For more
#' # information about syntax for this functionality please read our vignette
#' # or check documentation for \code{cross_level}:
#' # or check documentation for \code{link_levels}:
#' cross_classified <- fabricate(
#' primary_schools = add_level(N = 50, ps_quality = runif(N, 0, 10)),
#' secondary_schools = add_level(N = 100, ss_quality = runif(N, 0, 10), nest=FALSE),
#' students = cross_level(N = 2000,
#' by=join(ps_quality, ss_quality, rho = 0.5),
#' student_quality = ps_quality + 3*ss_quality + rnorm(N)))
#' @seealso [cross_level()]
#' students = link_levels(N = 2000,
#' by=join(ps_quality, ss_quality, rho = 0.5),
#' student_quality = ps_quality + 3*ss_quality + rnorm(N)))
#' @seealso [link_levels()]
#' @importFrom rlang quos quo_name eval_tidy lang_name lang_modify lang_args
#' is_lang get_expr
#'
Expand Down
3 changes: 2 additions & 1 deletion R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,8 @@ check_all_levels <- function(options) {
"add_level",
"nest_level",
"modify_level",
"cross_level"
"cross_levels",
"link_levels"
)
})

Expand Down
14 changes: 8 additions & 6 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,24 @@ Once you have installed **fabricatr**, you can easily import your own data or ge
**fabricatr** is easy to learn and easy to read. Consider this example which generates data modeling the United States House of Representatives:

```{r}
set.seed(1)
library(fabricatr)
house_members <- fabricate(
party_id = add_level(
N = 2, party_names = c("Republican", "Democrat"), party_ideology = c(0.5, -0.5),
in_power = c(1, 0), party_incumbents = c(241, 194)),
N = 2, party_names = c("Republican", "Democrat"), party_ideology = c(0.5, -0.5),
in_power = c(1, 0), party_incumbents = c(241, 194)
),
rep_id = add_level(
N = party_incumbents, member_ideology = rnorm(N, party_ideology, sd = 0.5),
terms_served = draw_count(N = N, mean = 4),
female = draw_binary(N = N, prob = 0.198))
terms_served = draw_count(N = N, mean = 4),
female = draw_binary(N = N, prob = 0.198)
)
)
```

```{r echo=FALSE}
set.seed(19861108)
knitr::kable(house_members[sample.int(nrow(house_members), 5, replace=FALSE), c(2, 3, 4, 7, 8, 9)], row.names = FALSE)
knitr::kable(house_members[sample.int(nrow(house_members), 5, replace = FALSE), c(2, 3, 4, 7, 8, 9)], row.names = FALSE)
```

### Next Steps
Expand Down
21 changes: 12 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,29 @@ Once you have installed **fabricatr**, you can easily import your own data or ge
**fabricatr** is easy to learn and easy to read. Consider this example which generates data modeling the United States House of Representatives:

``` r
set.seed(1)
library(fabricatr)

house_members <- fabricate(
party_id = add_level(
N = 2, party_names = c("Republican", "Democrat"), party_ideology = c(0.5, -0.5),
in_power = c(1, 0), party_incumbents = c(241, 194)),
N = 2, party_names = c("Republican", "Democrat"), party_ideology = c(0.5, -0.5),
in_power = c(1, 0), party_incumbents = c(241, 194)
),
rep_id = add_level(
N = party_incumbents, member_ideology = rnorm(N, party_ideology, sd = 0.5),
terms_served = draw_count(N = N, mean = 4),
female = draw_binary(N = N, prob = 0.198))
terms_served = draw_count(N = N, mean = 4),
female = draw_binary(N = N, prob = 0.198)
)
)
```

| party\_names | party\_ideology| in\_power| member\_ideology| terms\_served| female|
|:-------------|----------------:|----------:|-----------------:|--------------:|-------:|
| Democrat | -0.5| 0| 0.11| 3| 0|
| Republican | 0.5| 1| -0.37| 1| 0|
| Republican | 0.5| 1| 0.71| 2| 1|
| Democrat | -0.5| 0| -1.05| 3| 0|
| Republican | 0.5| 1| 0.24| 4| 0|
| Democrat | -0.5| 0| -1.21| 4| 1|
| Democrat | -0.5| 0| -0.82| 3| 0|
| Democrat | -0.5| 0| -0.42| 6| 1|
| Republican | 0.5| 1| -0.23| 7| 0|
| Republican | 0.5| 1| 0.40| 2| 0|

### Next Steps

Expand Down
9 changes: 6 additions & 3 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
## Test environments
* local OS X install, R 3.4.0
* OS X install (on travis-ci), R 3.4.0
* local OS X install, R 3.4.2
* OS X install (on travis-ci), R 3.4.2
* OS X install (on travis-ci), R 3.3.3
* ubuntu 12.04 (on travis-ci), R 3.1.2
* ubuntu 14.04 (on travis-ci), R 3.4.2
* ubuntu 14.04 (on travis-ci), R 3.3.3
* windows server 2012 R2 X64 (on appveyor), R 3.3.3
* windows server 2012 R2 X64 (on appveyor), R 3.4.3
* win-builder (devel and release)

## R CMD check results
Expand Down
Loading

0 comments on commit b00c024

Please sign in to comment.