Fixed syntax consistency and use of unexported dplyr commands with tb…

…l_pd methods, added all remaining dplyr verbs as tbl_pd methods, and considerably sped up tlag. Plus minor error fixes.
NickCH-K · Jul 24, 2019 · 108e3c6 · 108e3c6
1 parent b5d6fd4
commit 108e3c6
Show file tree

Hide file tree

Showing 14 changed files with 865 additions and 173 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,18 +10,43 @@ S3method(bind_cols,tbl_pd)
 S3method(distinct,tbl_pd)
 S3method(full_join,tbl_pd)
 S3method(group_by,tbl_pd)
+S3method(group_by_all,tbl_pd)
+S3method(group_by_at,tbl_pd)
+S3method(group_by_if,tbl_pd)
 S3method(inner_join,tbl_pd)
+S3method(intersect,tbl_pd)
 S3method(left_join,tbl_pd)
 S3method(mutate,tbl_pd)
+S3method(mutate_all,tbl_pd)
+S3method(mutate_at,tbl_pd)
+S3method(mutate_if,tbl_pd)
 S3method(nest_join,tbl_pd)
 S3method(rename,tbl_pd)
+S3method(rename_all,tbl_pd)
+S3method(rename_at,tbl_pd)
+S3method(rename_if,tbl_pd)
 S3method(right_join,tbl_pd)
 S3method(select,tbl_pd)
+S3method(select_all,tbl_pd)
+S3method(select_at,tbl_pd)
+S3method(select_if,tbl_pd)
 S3method(semi_join,tbl_pd)
+S3method(setdiff,tbl_pd)
 S3method(summarise,tbl_pd)
+S3method(summarise_all,tbl_pd)
+S3method(summarise_at,tbl_pd)
+S3method(summarise_if,tbl_pd)
 S3method(summarize,tbl_pd)
+S3method(summarize_all,tbl_pd)
+S3method(summarize_at,tbl_pd)
+S3method(summarize_if,tbl_pd)
 S3method(transmute,tbl_pd)
+S3method(transmute_all,tbl_pd)
+S3method(transmute_at,tbl_pd)
+S3method(transmute_if,tbl_pd)
 S3method(ungroup,tbl_pd)
+S3method(union,tbl_pd)
+S3method(union_all,tbl_pd)
 export("%>%")
 export(anti_join.tbl_pd)
 export(as_pdeclare)
@@ -33,6 +58,9 @@ export(fixed_check)
 export(fixed_force)
 export(full_join.tbl_pd)
 export(group_by.tbl_pd)
+export(group_by_all.tbl_pd)
+export(group_by_at.tbl_pd)
+export(group_by_if.tbl_pd)
 export(id_variable)
 export(inexact_anti_join)
 export(inexact_full_join)
@@ -42,26 +70,90 @@ export(inexact_nest_join)
 export(inexact_right_join)
 export(inexact_semi_join)
 export(inner_join.tbl_pd)
+export(intersect.tbl_pd)
 export(is_pdeclare)
 export(left_join.tbl_pd)
 export(mutate.tbl_pd)
+export(mutate_all.tbl_pd)
+export(mutate_at.tbl_pd)
 export(mutate_cascade)
+export(mutate_if.tbl_pd)
 export(mutate_subset)
 export(nest_join.tbl_pd)
 export(panel_fill)
 export(panel_locf)
 export(pdeclare)
 export(rename.tbl_pd)
+export(rename_all.tbl_pd)
+export(rename_at.tbl_pd)
+export(rename_if.tbl_pd)
 export(right_join.tbl_pd)
 export(select.tbl_pd)
+export(select_all.tbl_pd)
+export(select_at.tbl_pd)
+export(select_if.tbl_pd)
 export(semi_join.tbl_pd)
+export(setdiff.tbl_pd)
 export(summarise.tbl_pd)
+export(summarise_all.tbl_pd)
+export(summarise_at.tbl_pd)
+export(summarise_if.tbl_pd)
 export(summarize.tbl_pd)
+export(summarize_all.tbl_pd)
+export(summarize_at.tbl_pd)
+export(summarize_if.tbl_pd)
 export(time_variable)
 export(tlag)
 export(transmute.tbl_pd)
+export(transmute_all.tbl_pd)
+export(transmute_at.tbl_pd)
+export(transmute_if.tbl_pd)
 export(ungroup.tbl_pd)
+export(union.tbl_pd)
+export(union_all.tbl_pd)
 export(within_i)
+importFrom(dplyr,anti_join)
+importFrom(dplyr,bind_cols)
+importFrom(dplyr,distinct)
+importFrom(dplyr,full_join)
+importFrom(dplyr,group_by)
+importFrom(dplyr,group_by_all)
+importFrom(dplyr,group_by_at)
+importFrom(dplyr,group_by_if)
+importFrom(dplyr,inner_join)
+importFrom(dplyr,intersect)
+importFrom(dplyr,left_join)
+importFrom(dplyr,mutate)
+importFrom(dplyr,mutate_all)
+importFrom(dplyr,mutate_at)
+importFrom(dplyr,mutate_if)
+importFrom(dplyr,nest_join)
+importFrom(dplyr,rename)
+importFrom(dplyr,rename_all)
+importFrom(dplyr,rename_at)
+importFrom(dplyr,rename_if)
+importFrom(dplyr,right_join)
+importFrom(dplyr,select)
+importFrom(dplyr,select_all)
+importFrom(dplyr,select_at)
+importFrom(dplyr,select_if)
+importFrom(dplyr,semi_join)
+importFrom(dplyr,setdiff)
+importFrom(dplyr,summarise)
+importFrom(dplyr,summarise_all)
+importFrom(dplyr,summarise_at)
+importFrom(dplyr,summarise_if)
+importFrom(dplyr,summarize)
+importFrom(dplyr,summarize_all)
+importFrom(dplyr,summarize_at)
+importFrom(dplyr,summarize_if)
+importFrom(dplyr,transmute)
+importFrom(dplyr,transmute_all)
+importFrom(dplyr,transmute_at)
+importFrom(dplyr,transmute_if)
+importFrom(dplyr,ungroup)
+importFrom(dplyr,union)
+importFrom(dplyr,union_all)
 importFrom(lubridate,"%m-%")
 importFrom(magrittr,"%>%")
 importFrom(rlang,"%@%")

diff --git a/R/inexact_join.R b/R/inexact_join.R
@@ -33,18 +33,28 @@
 #' )
 #' # I want to match the most recent unemployment data I have to each college
 #' Scorecard <- Scorecard %>%
-#'   inexact_left_join(unemp_data, method = "last", var = "year", jvar = "unemp_year")
+#'   inexact_left_join(unemp_data,
+#'                    method = "last",
+#'                    var = "year",
+#'                    jvar = "unemp_year")
 #'
 #' # Or perhaps I want to find the most recent lagged value (i.e. no exact matches, only recent ones)
 #' data(Scorecard)
 #' Scorecard <- Scorecard %>%
-#'   inexact_left_join(unemp_data, method = "last", var = "year", jvar = "unemp_year", exact = FALSE)
+#'   inexact_left_join(unemp_data,
+#'                     method = "last",
+#'                     var = "year",
+#'                     jvar = "unemp_year",
+#'                     exact = FALSE)
 #'
 #' # Another way to do the same thing would be to specify the range of unemp_years I want exactly
 #' data(Scorecard)
 #' unemp_data$unemp_year2 <- unemp_data$unemp_year + 2
 #' Scorecard <- Scorecard %>%
-#'   inexact_left_join(unemp_data, method = "between", var = "year", jvar = c("unemp_year", "unemp_year2"))
+#'   inexact_left_join(unemp_data,
+#'                    method = "between",
+#'                    var = "year",
+#'                    jvar = c("unemp_year", "unemp_year2"))
 NULL
 
 #' @rdname inexact_join

diff --git a/R/major_mutate_variations.R b/R/major_mutate_variations.R
@@ -53,7 +53,7 @@ mutate_cascade <- function(.df, ..., .skip = TRUE, .backwards = FALSE, .group_i
     .df <- as_pdeclare(.df, .i = .i, .t = .t, .d = .d, .uniqcheck = .uniqcheck)
 
     # .d might be unspecified and so inp$d is NA, but now .d is 1 from as_pdeclare default
-    inp$d <- df %@% ".d"
+    inp$d <- .df %@% ".d"
   }
 
   if (.group_i == TRUE & (min(is.na(inp$i)) == 0)) {
@@ -72,7 +72,7 @@ mutate_cascade <- function(.df, ..., .skip = TRUE, .backwards = FALSE, .group_i
         dplyr::mutate_at(inp$t, max))[[inp$t]]
     }
   } else {
-    .df[, .ncol(df) + 1] <- min(.df[[inp$t]]) - 1
+    .df[, ncol(.df) + 1] <- min(.df[[inp$t]]) - 1
   }
 
   indexnames <- names(.df)[(ncol(.df) - 1):ncol(.df)]
@@ -99,9 +99,9 @@ mutate_cascade <- function(.df, ..., .skip = TRUE, .backwards = FALSE, .group_i
 
   # If it wants the original panel setting back, do that
   if (.setpanel == FALSE) {
-    .df %@% ".i" <- inp$orig_i
-    .df %@% ".t" <- inp$orig_t
-    .df %@% ".d" <- inp$orig_d
+    attr(.df,".i") <- inp$orig_i
+    attr(.df,".t") <- inp$orig_t
+    attr(.df,".d") <- inp$orig_d
   }
   return(.df)
 }
@@ -186,8 +186,7 @@ mutate_subset <- function(.df, ..., .filter, .group_i = TRUE, .i = NA, .t = NA,
       dplyr::select(-dplyr::one_of(notgroups))))
     suppressWarnings(.df <- .df %>%
       dplyr::bind_cols(summdf))
-  }
-  else {
+  } else {
     suppressWarnings(try(.df <- .df %>%
       dplyr::select(-dplyr::one_of(notgroups))))
     suppressWarnings(.df <- .df %>%
@@ -196,9 +195,9 @@ mutate_subset <- function(.df, ..., .filter, .group_i = TRUE, .i = NA, .t = NA,
 
   # If it wants the original panel setting back, do that
   if (.setpanel == FALSE) {
-    .df %@% ".i" <- inp$orig_i
-    .df %@% ".t" <- inp$orig_t
-    .df %@% ".d" <- inp$orig_d
+    attr(.df,".i") <- inp$orig_i
+    attr(.df,".t") <- inp$orig_t
+    attr(.df,".d") <- inp$orig_d
   }
 
   return(.df)

diff --git a/R/panel_consistency.R b/R/panel_consistency.R
@@ -41,13 +41,20 @@
 #'
 #'   # We can deal with the inconsistent-gaps problem by creating new obs to fill in
 #'   # this version will fill in the new obs with the most recently observed data, and flag them
-#'   Scorecard_filled <- panel_fill(Scorecard, .i = "unitid", .t = "year", .flag = "new")
+#'   Scorecard_filled <- panel_fill(Scorecard,
+#'                                  .i = "unitid",
+#'                                  .t = "year",
+#'                                  .flag = "new")
 #'
 #'   # Or maybe we want those observations in there but don't want to treat them as real data
 #'   # so instead of filling them in, just leave all the data in the new obs blank
 #'   # (note this sets EVERYTHING not in .i or .t to NA - if you only want some variables NA,
 #'   # make .set_NA a character vector of those variable names)
-#'   Scorecard_filled <- panel_fill(Scorecard, .i = "unitid", .t = "year", .flag = "new", .set_NA = TRUE)
+#'   Scorecard_filled <- panel_fill(Scorecard,
+#'                                  .i = "unitid",
+#'                                  .t = "year",
+#'                                  .flag = "new",
+#'                                  .set_NA = TRUE)
 #'
 #'   # Perhaps we want a perfectly balanced panel. So let's set .max and .min to the start and end
 #'   # of the data, and it will fill in everything.
@@ -263,9 +270,9 @@ panel_fill <- function(.df, .set_NA = FALSE, .min = NA, .max = NA, .backwards =
 
   # If it wants the original panel setting back, do that
   if (.setpanel == FALSE) {
-    .df %@% ".i" <- inp$orig_i
-    .df %@% ".t" <- inp$orig_t
-    .df %@% ".d" <- inp$orig_d
+    attr(.df,".i") <- inp$orig_i
+    attr(.df,".t") <- inp$orig_t
+    attr(.df,".d") <- inp$orig_d
   }
 
   return(.df)

diff --git a/R/pdeclare.R b/R/pdeclare.R
@@ -37,7 +37,7 @@
 #' pd2 <- pdeclare(
 #'   i = c(1, 1, 1, 2, 2, 2),
 #'   seconds = c(123, 456, 789, 103, 234, 238),
-#'   .i = ".i",
+#'   .i = "i",
 #'   .t = "seconds",
 #'   .d = 0
 #' )
@@ -171,7 +171,7 @@ as_pdeclare.list <- as_pdeclare.tbl_df
 #' @keywords internal
 #' @export
 as_pdeclare.NULL <- function(x, ...) {
-  abort("A pdeclare must not be NULL.")
+  stop("A pdeclare must not be NULL.")
 }
 
 #' Low-level constructor for a pdeclare object