drifter v0.2 uses ingredients instead of ceterisParibus2

ModelOriented · May 19, 2019 · 9c40286 · 9c40286
1 parent 17f40a7
commit 9c40286
Show file tree

Hide file tree

Showing 13 changed files with 106 additions and 107 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,21 +1,21 @@
 Package: drifter
 Title: Concept Drift and Concept Shift Detection for Predictive Models
-Version: 0.1
+Version: 0.2
 Authors@R: person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com", role = c("aut", "cre"))
 Description: Concept drift refers to the change in the data distribution or 
-    in the relationships between variables over time.
-    'drifter' calculates distances between variable distributions or 
-    variable relations. 
-    'drifter' is a part of 'DrWhy' universe: tools for Explanation, Exploration and Visualisation for Predictive Models.
+  in the relationships between variables over time.
+  'drifter' calculates distances between variable distributions or 
+  variable relations and identifies both types of drift. 
+  'drifter' is a part of the 'DrWhy.AI' universe (Biecek 2018) <arXiv:1806.08915>.
 Depends: R (>= 3.1)
 License: GPL
 Encoding: UTF-8
 LazyData: true
 Imports: 
-    DALEX2,
+    DALEX,
     dplyr,
     tidyr,
-    ceterisParibus2
+    ingredients
 Suggests:
     testthat,
     ranger

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,10 +8,11 @@ export(calculate_model_drift)
 export(calculate_residuals_drift)
 export(check_drift)
 export(compare_two_profiles)
-importFrom(ceterisParibus2,individual_variable_profile)
+importFrom(DALEX,explain)
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
 importFrom(dplyr,summarise)
+importFrom(ingredients,partial_dependency)
 importFrom(stats,predict)
 importFrom(stats,sd)
 importFrom(tidyr,spread)
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,12 @@
+drifter 0.2
+----------------------------------------------------------------
+* `DALEX2` is replaced by `DALEX`
+* `ceterisParibus2` is replaced by `ingredients`
+
 drifter 0.1
 ----------------------------------------------------------------
 * `calculate_covariate_drift()` calculates 1d inverse intersection distances between two datasets
 * `calculate_residuals_drift()` calculates 1d inverse intersection distances between two residuals calculated on old and new data
 * `calculate_model_drift()` calculates distances between PDP curves calculated for new and old model
+* `check_drif()` executes all tests for drift 
 
diff --git a/R/calculate_covariate_drift.R b/R/calculate_covariate_drift.R
@@ -12,7 +12,7 @@
 #' @export
 #'
 #' @examples
-#' library("DALEX2")
+#' library("DALEX")
 #' # here we do not have any drift
 #' d <- calculate_covariate_drift(apartments, apartments_test)
 #' d
@@ -71,7 +71,7 @@ calculate_distance <- function(variable_old, variable_new, bins = 20) {
 #' @export
 #'
 #' @examples
-#' library("DALEX2")
+#' library("DALEX")
 #' # here we do not have any drift
 #' d <- calculate_covariate_drift(apartments, apartments_test)
 #' d

diff --git a/R/calculate_model_drift.R b/R/calculate_model_drift.R
@@ -14,20 +14,20 @@
 #' @importFrom dplyr filter group_by summarise
 #' @importFrom tidyr spread
 #' @importFrom stats predict sd
-#' @importFrom ceterisParibus2 individual_variable_profile
+#' @importFrom ingredients partial_dependency
+#' @importFrom DALEX explain
 #' @export
 #'
 #' @examples
-#'  library("DALEX2")
-#'  \dontrun{
+#'  library("DALEX")
+#'  \donttest{
 #'  library("ranger")
 #'  predict_function <- function(m,x,...) predict(m, x, ...)$predictions
 #'  model_old <- ranger(m2.price ~ ., data = apartments)
 #'  model_new <- ranger(m2.price ~ ., data = apartments_test)
 #'  calculate_model_drift(model_old, model_new,
 #'                   apartments_test,
 #'                   apartments_test$m2.price,
-#'                   max_obs = 1000,
 #'                   predict_function = predict_function)
 #'
 #'  # here we compare model created on male data
@@ -41,50 +41,48 @@
 #'  calculate_model_drift(model_old, model_new,
 #'                   HR_test,
 #'                   HR_test$status == "fired",
-#'                   max_obs = 1000,
 #'                   predict_function = predict_function)
 #'
 #'  # plot it
-#'  library("ceterisParibus2")
-#'  prof_old <- individual_variable_profile(model_old,
-#'                                      data = data_new,
-#'                                      new_observation = data_new[1:1000,],
+#'  library("ingredients")
+#'  prof_old <- partial_dependency(model_old,
+#'                                      data = data_new[1:500,],
 #'                                      label = "model_old",
-#'                                      predict_function = predict_function)
-#'  prof_new <- individual_variable_profile(model_new,
-#'                                      data = data_new,
-#'                                      new_observation = data_new[1:1000,],
+#'                                      predict_function = predict_function,
+#'                                      grid_points = 101,
+#'                                      variable_splits = NULL)
+#'  prof_new <- partial_dependency(model_new,
+#'                                      data = data_new[1:500,],
 #'                                      label = "model_new",
-#'                                      predict_function = predict_function)
-#'  plot(prof_old, prof_new,
-#'       selected_variables = "age", aggregate_profiles = mean,
-#'       show_observations = FALSE, color = "_label_")
+#'                                      predict_function = predict_function,
+#'                                      grid_points = 101,
+#'                                      variable_splits = NULL)
+#'  plot(prof_old, prof_new, color = "_label_")
 #' }
 #'
 calculate_model_drift <- function(model_old, model_new,
                                   data_new,
                                   y_new,
                                   predict_function = predict,
-                                  max_obs = -1,
+                                  max_obs = 100,
                                   scale = sd(y_new, na.rm = TRUE)) {
   #
   # test of model structure
-  if (max_obs > 0) {
-    data_new_small <- data_new[sample(1:nrow(data_new), max_obs),]
-  } else {
-    data_new_small <- data_new
+  if (max_obs <= 0) {
+    max_obs = nrow(data_new)
   }
 
-  prof_old <- individual_variable_profile(model_old,
-                                          data = data_new,
-                                          new_observation = data_new_small,
-                                          label = "model_old",
-                                          predict_function = predict_function)
-  prof_new <- individual_variable_profile(model_new,
-                                          data = data_new,
-                                          new_observation = data_new_small,
-                                          label = "model_new",
-                                          predict_function = predict_function)
+  explainer_old <- explain(model_old,
+                           data = data_new,
+                           label = "model_old",
+                           predict_function = predict_function)
+  explainer_new <- explain(model_old,
+                           data = data_new,
+                           label = "model_new",
+                           predict_function = predict_function)
+
+  prof_old <- partial_dependency(explainer_old, N = max_obs)
+  prof_new <- partial_dependency(explainer_new, N = max_obs)
   # for all variables
   vars <- as.character(unique(prof_old$`_vname_`))
 
@@ -109,8 +107,8 @@ calculate_model_drift <- function(model_old, model_new,
 #' @export
 #'
 #' @examples
-#'  library("DALEX2")
-#'  \dontrun{
+#'  library("DALEX")
+#'  \donttest{
 #'  library("ranger")
 #'  predict_function <- function(m,x,...) predict(m, x, ...)$predictions
 #'  model_old <- ranger(m2.price ~ ., data = apartments)
@@ -162,8 +160,8 @@ compare_two_profiles <- function(cpprofile_old, cpprofile_new, variables, scale
     var <- variables[i]
     selected_var_old <- filter(cpprofile_old, `_vname_` == var)
     selected_var_new <- filter(cpprofile_new, `_vname_` == var)
-    selected_var_old <- selected_var_old[,c(var, "_yhat_", "_label_")]
-    selected_var_new <- selected_var_new[,c(var, "_yhat_", "_label_")]
+    selected_var_old <- selected_var_old[,c("_x_", "_yhat_", "_label_")]
+    selected_var_new <- selected_var_new[,c("_x_", "_yhat_", "_label_")]
     selected_var <- rbind(selected_var_old, selected_var_new)
     colnames(selected_var)[1] <- "x"
 
@@ -190,16 +188,15 @@ compare_two_profiles <- function(cpprofile_old, cpprofile_new, variables, scale
 #' @export
 #'
 #' @examples
-#'  library("DALEX2")
-#'  \dontrun{
+#'  library("DALEX")
+#'  \donttest{
 #'  library("ranger")
 #'  predict_function <- function(m,x,...) predict(m, x, ...)$predictions
 #'  model_old <- ranger(m2.price ~ ., data = apartments)
 #'  model_new <- ranger(m2.price ~ ., data = apartments_test)
 #'  calculate_model_drift(model_old, model_new,
 #'                   apartments_test,
 #'                   apartments_test$m2.price,
-#'                   max_obs = 1000,
 #'                   predict_function = predict_function)
 #'
 #'  # here we compare model created on male data
@@ -213,24 +210,23 @@ compare_two_profiles <- function(cpprofile_old, cpprofile_new, variables, scale
 #'  calculate_model_drift(model_old, model_new,
 #'                   HR_test,
 #'                   HR_test$status == "fired",
-#'                   max_obs = 1000,
 #'                   predict_function = predict_function)
 #'
 #'  # plot it
-#'  library("ceterisParibus2")
-#'  prof_old <- individual_variable_profile(model_old,
-#'                                      data = data_new,
-#'                                      new_observation = data_new[1:1000,],
+#'  library("ingredients")
+#'  prof_old <- partial_dependency(model_old,
+#'                                      data = data_new[1:1000,],
 #'                                      label = "model_old",
-#'                                      predict_function = predict_function)
-#'  prof_new <- individual_variable_profile(model_new,
-#'                                      data = data_new,
-#'                                      new_observation = data_new[1:1000,],
+#'                                      predict_function = predict_function,
+#'                                      grid_points = 101,
+#'                                      variable_splits = NULL)
+#'  prof_new <- partial_dependency(model_new,
+#'                                      data = data_new[1:1000,],
 #'                                      label = "model_new",
-#'                                      predict_function = predict_function)
-#'  plot(prof_old, prof_new,
-#'       selected_variables = "age", aggregate_profiles = mean,
-#'       show_observations = FALSE, color = "_label_")
+#'                                      predict_function = predict_function,
+#'                                      grid_points = 101,
+#'                                      variable_splits = NULL)
+#'  plot(prof_old, prof_new, color = "_label_")
 #'
 #' }
 #'

diff --git a/R/run_drift_checks.R b/R/run_drift_checks.R
@@ -17,8 +17,8 @@
 #' @export
 #'
 #' @examples
-#'  library("DALEX2")
-#'  \dontrun{
+#'  library("DALEX")
+#'  \donttest{
 #'  library("ranger")
 #'  predict_function <- function(m,x,...) predict(m, x, ...)$predictions
 #'  model_old <- ranger(m2.price ~ ., data = apartments)
@@ -32,7 +32,7 @@ check_drift <- function(model_old, model_new,
                              data_old, data_new,
                              y_old, y_new,
                              predict_function = predict,
-                             max_obs = 500,
+                             max_obs = 100,
                              bins = 20,
                              scale = sd(y_new, na.rm = TRUE)) {
 

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -1,3 +1,3 @@
 template:
-  package: MI2template
+  package: DrWhyTemplate
   default_assets: false
diff --git a/man/calculate_covariate_drift.Rd b/man/calculate_covariate_drift.Rd
diff --git a/man/calculate_model_drift.Rd b/man/calculate_model_drift.Rd
diff --git a/man/calculate_residuals_drift.Rd b/man/calculate_residuals_drift.Rd
diff --git a/man/check_drift.Rd b/man/check_drift.Rd
diff --git a/man/print.covariate_drift.Rd b/man/print.covariate_drift.Rd