Skip to content

Commit

Permalink
Fixed syntax consistency and use of unexported dplyr commands with tb…
Browse files Browse the repository at this point in the history
…l_pd methods, added all remaining dplyr verbs as tbl_pd methods, and considerably sped up tlag. Plus minor error fixes.
  • Loading branch information
NickCH-K committed Jul 24, 2019
1 parent b5d6fd4 commit 108e3c6
Show file tree
Hide file tree
Showing 14 changed files with 865 additions and 173 deletions.
92 changes: 92 additions & 0 deletions NAMESPACE
Expand Up @@ -10,18 +10,43 @@ S3method(bind_cols,tbl_pd)
S3method(distinct,tbl_pd)
S3method(full_join,tbl_pd)
S3method(group_by,tbl_pd)
S3method(group_by_all,tbl_pd)
S3method(group_by_at,tbl_pd)
S3method(group_by_if,tbl_pd)
S3method(inner_join,tbl_pd)
S3method(intersect,tbl_pd)
S3method(left_join,tbl_pd)
S3method(mutate,tbl_pd)
S3method(mutate_all,tbl_pd)
S3method(mutate_at,tbl_pd)
S3method(mutate_if,tbl_pd)
S3method(nest_join,tbl_pd)
S3method(rename,tbl_pd)
S3method(rename_all,tbl_pd)
S3method(rename_at,tbl_pd)
S3method(rename_if,tbl_pd)
S3method(right_join,tbl_pd)
S3method(select,tbl_pd)
S3method(select_all,tbl_pd)
S3method(select_at,tbl_pd)
S3method(select_if,tbl_pd)
S3method(semi_join,tbl_pd)
S3method(setdiff,tbl_pd)
S3method(summarise,tbl_pd)
S3method(summarise_all,tbl_pd)
S3method(summarise_at,tbl_pd)
S3method(summarise_if,tbl_pd)
S3method(summarize,tbl_pd)
S3method(summarize_all,tbl_pd)
S3method(summarize_at,tbl_pd)
S3method(summarize_if,tbl_pd)
S3method(transmute,tbl_pd)
S3method(transmute_all,tbl_pd)
S3method(transmute_at,tbl_pd)
S3method(transmute_if,tbl_pd)
S3method(ungroup,tbl_pd)
S3method(union,tbl_pd)
S3method(union_all,tbl_pd)
export("%>%")
export(anti_join.tbl_pd)
export(as_pdeclare)
Expand All @@ -33,6 +58,9 @@ export(fixed_check)
export(fixed_force)
export(full_join.tbl_pd)
export(group_by.tbl_pd)
export(group_by_all.tbl_pd)
export(group_by_at.tbl_pd)
export(group_by_if.tbl_pd)
export(id_variable)
export(inexact_anti_join)
export(inexact_full_join)
Expand All @@ -42,26 +70,90 @@ export(inexact_nest_join)
export(inexact_right_join)
export(inexact_semi_join)
export(inner_join.tbl_pd)
export(intersect.tbl_pd)
export(is_pdeclare)
export(left_join.tbl_pd)
export(mutate.tbl_pd)
export(mutate_all.tbl_pd)
export(mutate_at.tbl_pd)
export(mutate_cascade)
export(mutate_if.tbl_pd)
export(mutate_subset)
export(nest_join.tbl_pd)
export(panel_fill)
export(panel_locf)
export(pdeclare)
export(rename.tbl_pd)
export(rename_all.tbl_pd)
export(rename_at.tbl_pd)
export(rename_if.tbl_pd)
export(right_join.tbl_pd)
export(select.tbl_pd)
export(select_all.tbl_pd)
export(select_at.tbl_pd)
export(select_if.tbl_pd)
export(semi_join.tbl_pd)
export(setdiff.tbl_pd)
export(summarise.tbl_pd)
export(summarise_all.tbl_pd)
export(summarise_at.tbl_pd)
export(summarise_if.tbl_pd)
export(summarize.tbl_pd)
export(summarize_all.tbl_pd)
export(summarize_at.tbl_pd)
export(summarize_if.tbl_pd)
export(time_variable)
export(tlag)
export(transmute.tbl_pd)
export(transmute_all.tbl_pd)
export(transmute_at.tbl_pd)
export(transmute_if.tbl_pd)
export(ungroup.tbl_pd)
export(union.tbl_pd)
export(union_all.tbl_pd)
export(within_i)
importFrom(dplyr,anti_join)
importFrom(dplyr,bind_cols)
importFrom(dplyr,distinct)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_all)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_by_if)
importFrom(dplyr,inner_join)
importFrom(dplyr,intersect)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_all)
importFrom(dplyr,mutate_at)
importFrom(dplyr,mutate_if)
importFrom(dplyr,nest_join)
importFrom(dplyr,rename)
importFrom(dplyr,rename_all)
importFrom(dplyr,rename_at)
importFrom(dplyr,rename_if)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,select_all)
importFrom(dplyr,select_at)
importFrom(dplyr,select_if)
importFrom(dplyr,semi_join)
importFrom(dplyr,setdiff)
importFrom(dplyr,summarise)
importFrom(dplyr,summarise_all)
importFrom(dplyr,summarise_at)
importFrom(dplyr,summarise_if)
importFrom(dplyr,summarize)
importFrom(dplyr,summarize_all)
importFrom(dplyr,summarize_at)
importFrom(dplyr,summarize_if)
importFrom(dplyr,transmute)
importFrom(dplyr,transmute_all)
importFrom(dplyr,transmute_at)
importFrom(dplyr,transmute_if)
importFrom(dplyr,ungroup)
importFrom(dplyr,union)
importFrom(dplyr,union_all)
importFrom(lubridate,"%m-%")
importFrom(magrittr,"%>%")
importFrom(rlang,"%@%")
Expand Down
16 changes: 13 additions & 3 deletions R/inexact_join.R
Expand Up @@ -33,18 +33,28 @@
#' )
#' # I want to match the most recent unemployment data I have to each college
#' Scorecard <- Scorecard %>%
#' inexact_left_join(unemp_data, method = "last", var = "year", jvar = "unemp_year")
#' inexact_left_join(unemp_data,
#' method = "last",
#' var = "year",
#' jvar = "unemp_year")
#'
#' # Or perhaps I want to find the most recent lagged value (i.e. no exact matches, only recent ones)
#' data(Scorecard)
#' Scorecard <- Scorecard %>%
#' inexact_left_join(unemp_data, method = "last", var = "year", jvar = "unemp_year", exact = FALSE)
#' inexact_left_join(unemp_data,
#' method = "last",
#' var = "year",
#' jvar = "unemp_year",
#' exact = FALSE)
#'
#' # Another way to do the same thing would be to specify the range of unemp_years I want exactly
#' data(Scorecard)
#' unemp_data$unemp_year2 <- unemp_data$unemp_year + 2
#' Scorecard <- Scorecard %>%
#' inexact_left_join(unemp_data, method = "between", var = "year", jvar = c("unemp_year", "unemp_year2"))
#' inexact_left_join(unemp_data,
#' method = "between",
#' var = "year",
#' jvar = c("unemp_year", "unemp_year2"))
NULL

#' @rdname inexact_join
Expand Down
19 changes: 9 additions & 10 deletions R/major_mutate_variations.R
Expand Up @@ -53,7 +53,7 @@ mutate_cascade <- function(.df, ..., .skip = TRUE, .backwards = FALSE, .group_i
.df <- as_pdeclare(.df, .i = .i, .t = .t, .d = .d, .uniqcheck = .uniqcheck)

# .d might be unspecified and so inp$d is NA, but now .d is 1 from as_pdeclare default
inp$d <- df %@% ".d"
inp$d <- .df %@% ".d"
}

if (.group_i == TRUE & (min(is.na(inp$i)) == 0)) {
Expand All @@ -72,7 +72,7 @@ mutate_cascade <- function(.df, ..., .skip = TRUE, .backwards = FALSE, .group_i
dplyr::mutate_at(inp$t, max))[[inp$t]]
}
} else {
.df[, .ncol(df) + 1] <- min(.df[[inp$t]]) - 1
.df[, ncol(.df) + 1] <- min(.df[[inp$t]]) - 1
}

indexnames <- names(.df)[(ncol(.df) - 1):ncol(.df)]
Expand All @@ -99,9 +99,9 @@ mutate_cascade <- function(.df, ..., .skip = TRUE, .backwards = FALSE, .group_i

# If it wants the original panel setting back, do that
if (.setpanel == FALSE) {
.df %@% ".i" <- inp$orig_i
.df %@% ".t" <- inp$orig_t
.df %@% ".d" <- inp$orig_d
attr(.df,".i") <- inp$orig_i
attr(.df,".t") <- inp$orig_t
attr(.df,".d") <- inp$orig_d
}
return(.df)
}
Expand Down Expand Up @@ -186,8 +186,7 @@ mutate_subset <- function(.df, ..., .filter, .group_i = TRUE, .i = NA, .t = NA,
dplyr::select(-dplyr::one_of(notgroups))))
suppressWarnings(.df <- .df %>%
dplyr::bind_cols(summdf))
}
else {
} else {
suppressWarnings(try(.df <- .df %>%
dplyr::select(-dplyr::one_of(notgroups))))
suppressWarnings(.df <- .df %>%
Expand All @@ -196,9 +195,9 @@ mutate_subset <- function(.df, ..., .filter, .group_i = TRUE, .i = NA, .t = NA,

# If it wants the original panel setting back, do that
if (.setpanel == FALSE) {
.df %@% ".i" <- inp$orig_i
.df %@% ".t" <- inp$orig_t
.df %@% ".d" <- inp$orig_d
attr(.df,".i") <- inp$orig_i
attr(.df,".t") <- inp$orig_t
attr(.df,".d") <- inp$orig_d
}

return(.df)
Expand Down
17 changes: 12 additions & 5 deletions R/panel_consistency.R
Expand Up @@ -41,13 +41,20 @@
#'
#' # We can deal with the inconsistent-gaps problem by creating new obs to fill in
#' # this version will fill in the new obs with the most recently observed data, and flag them
#' Scorecard_filled <- panel_fill(Scorecard, .i = "unitid", .t = "year", .flag = "new")
#' Scorecard_filled <- panel_fill(Scorecard,
#' .i = "unitid",
#' .t = "year",
#' .flag = "new")
#'
#' # Or maybe we want those observations in there but don't want to treat them as real data
#' # so instead of filling them in, just leave all the data in the new obs blank
#' # (note this sets EVERYTHING not in .i or .t to NA - if you only want some variables NA,
#' # make .set_NA a character vector of those variable names)
#' Scorecard_filled <- panel_fill(Scorecard, .i = "unitid", .t = "year", .flag = "new", .set_NA = TRUE)
#' Scorecard_filled <- panel_fill(Scorecard,
#' .i = "unitid",
#' .t = "year",
#' .flag = "new",
#' .set_NA = TRUE)
#'
#' # Perhaps we want a perfectly balanced panel. So let's set .max and .min to the start and end
#' # of the data, and it will fill in everything.
Expand Down Expand Up @@ -263,9 +270,9 @@ panel_fill <- function(.df, .set_NA = FALSE, .min = NA, .max = NA, .backwards =

# If it wants the original panel setting back, do that
if (.setpanel == FALSE) {
.df %@% ".i" <- inp$orig_i
.df %@% ".t" <- inp$orig_t
.df %@% ".d" <- inp$orig_d
attr(.df,".i") <- inp$orig_i
attr(.df,".t") <- inp$orig_t
attr(.df,".d") <- inp$orig_d
}

return(.df)
Expand Down
4 changes: 2 additions & 2 deletions R/pdeclare.R
Expand Up @@ -37,7 +37,7 @@
#' pd2 <- pdeclare(
#' i = c(1, 1, 1, 2, 2, 2),
#' seconds = c(123, 456, 789, 103, 234, 238),
#' .i = ".i",
#' .i = "i",
#' .t = "seconds",
#' .d = 0
#' )
Expand Down Expand Up @@ -171,7 +171,7 @@ as_pdeclare.list <- as_pdeclare.tbl_df
#' @keywords internal
#' @export
as_pdeclare.NULL <- function(x, ...) {
abort("A pdeclare must not be NULL.")
stop("A pdeclare must not be NULL.")
}

#' Low-level constructor for a pdeclare object
Expand Down

0 comments on commit 108e3c6

Please sign in to comment.