Skip to content

Commit

Permalink
drifter v0.2 uses ingredients instead of ceterisParibus2
Browse files Browse the repository at this point in the history
  • Loading branch information
pbiecek committed May 19, 2019
1 parent 17f40a7 commit 9c40286
Show file tree
Hide file tree
Showing 13 changed files with 106 additions and 107 deletions.
14 changes: 7 additions & 7 deletions DESCRIPTION
@@ -1,21 +1,21 @@
Package: drifter
Title: Concept Drift and Concept Shift Detection for Predictive Models
Version: 0.1
Version: 0.2
Authors@R: person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com", role = c("aut", "cre"))
Description: Concept drift refers to the change in the data distribution or
in the relationships between variables over time.
'drifter' calculates distances between variable distributions or
variable relations.
'drifter' is a part of 'DrWhy' universe: tools for Explanation, Exploration and Visualisation for Predictive Models.
in the relationships between variables over time.
'drifter' calculates distances between variable distributions or
variable relations and identifies both types of drift.
'drifter' is a part of the 'DrWhy.AI' universe (Biecek 2018) <arXiv:1806.08915>.
Depends: R (>= 3.1)
License: GPL
Encoding: UTF-8
LazyData: true
Imports:
DALEX2,
DALEX,
dplyr,
tidyr,
ceterisParibus2
ingredients
Suggests:
testthat,
ranger
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Expand Up @@ -8,10 +8,11 @@ export(calculate_model_drift)
export(calculate_residuals_drift)
export(check_drift)
export(compare_two_profiles)
importFrom(ceterisParibus2,individual_variable_profile)
importFrom(DALEX,explain)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,summarise)
importFrom(ingredients,partial_dependency)
importFrom(stats,predict)
importFrom(stats,sd)
importFrom(tidyr,spread)
6 changes: 6 additions & 0 deletions NEWS.md
@@ -1,6 +1,12 @@
drifter 0.2
----------------------------------------------------------------
* `DALEX2` is replaced by `DALEX`
* `ceterisParibus2` is replaced by `ingredients`

drifter 0.1
----------------------------------------------------------------
* `calculate_covariate_drift()` calculates 1d inverse intersection distances between two datasets
* `calculate_residuals_drift()` calculates 1d inverse intersection distances between two residuals calculated on old and new data
* `calculate_model_drift()` calculates distances between PDP curves calculated for new and old model
* `check_drif()` executes all tests for drift

4 changes: 2 additions & 2 deletions R/calculate_covariate_drift.R
Expand Up @@ -12,7 +12,7 @@
#' @export
#'
#' @examples
#' library("DALEX2")
#' library("DALEX")
#' # here we do not have any drift
#' d <- calculate_covariate_drift(apartments, apartments_test)
#' d
Expand Down Expand Up @@ -71,7 +71,7 @@ calculate_distance <- function(variable_old, variable_new, bins = 20) {
#' @export
#'
#' @examples
#' library("DALEX2")
#' library("DALEX")
#' # here we do not have any drift
#' d <- calculate_covariate_drift(apartments, apartments_test)
#' d
Expand Down
100 changes: 48 additions & 52 deletions R/calculate_model_drift.R
Expand Up @@ -14,20 +14,20 @@
#' @importFrom dplyr filter group_by summarise
#' @importFrom tidyr spread
#' @importFrom stats predict sd
#' @importFrom ceterisParibus2 individual_variable_profile
#' @importFrom ingredients partial_dependency
#' @importFrom DALEX explain
#' @export
#'
#' @examples
#' library("DALEX2")
#' \dontrun{
#' library("DALEX")
#' \donttest{
#' library("ranger")
#' predict_function <- function(m,x,...) predict(m, x, ...)$predictions
#' model_old <- ranger(m2.price ~ ., data = apartments)
#' model_new <- ranger(m2.price ~ ., data = apartments_test)
#' calculate_model_drift(model_old, model_new,
#' apartments_test,
#' apartments_test$m2.price,
#' max_obs = 1000,
#' predict_function = predict_function)
#'
#' # here we compare model created on male data
Expand All @@ -41,50 +41,48 @@
#' calculate_model_drift(model_old, model_new,
#' HR_test,
#' HR_test$status == "fired",
#' max_obs = 1000,
#' predict_function = predict_function)
#'
#' # plot it
#' library("ceterisParibus2")
#' prof_old <- individual_variable_profile(model_old,
#' data = data_new,
#' new_observation = data_new[1:1000,],
#' library("ingredients")
#' prof_old <- partial_dependency(model_old,
#' data = data_new[1:500,],
#' label = "model_old",
#' predict_function = predict_function)
#' prof_new <- individual_variable_profile(model_new,
#' data = data_new,
#' new_observation = data_new[1:1000,],
#' predict_function = predict_function,
#' grid_points = 101,
#' variable_splits = NULL)
#' prof_new <- partial_dependency(model_new,
#' data = data_new[1:500,],
#' label = "model_new",
#' predict_function = predict_function)
#' plot(prof_old, prof_new,
#' selected_variables = "age", aggregate_profiles = mean,
#' show_observations = FALSE, color = "_label_")
#' predict_function = predict_function,
#' grid_points = 101,
#' variable_splits = NULL)
#' plot(prof_old, prof_new, color = "_label_")
#' }
#'
calculate_model_drift <- function(model_old, model_new,
data_new,
y_new,
predict_function = predict,
max_obs = -1,
max_obs = 100,
scale = sd(y_new, na.rm = TRUE)) {
#
# test of model structure
if (max_obs > 0) {
data_new_small <- data_new[sample(1:nrow(data_new), max_obs),]
} else {
data_new_small <- data_new
if (max_obs <= 0) {
max_obs = nrow(data_new)
}

prof_old <- individual_variable_profile(model_old,
data = data_new,
new_observation = data_new_small,
label = "model_old",
predict_function = predict_function)
prof_new <- individual_variable_profile(model_new,
data = data_new,
new_observation = data_new_small,
label = "model_new",
predict_function = predict_function)
explainer_old <- explain(model_old,
data = data_new,
label = "model_old",
predict_function = predict_function)
explainer_new <- explain(model_old,
data = data_new,
label = "model_new",
predict_function = predict_function)

prof_old <- partial_dependency(explainer_old, N = max_obs)
prof_new <- partial_dependency(explainer_new, N = max_obs)
# for all variables
vars <- as.character(unique(prof_old$`_vname_`))

Expand All @@ -109,8 +107,8 @@ calculate_model_drift <- function(model_old, model_new,
#' @export
#'
#' @examples
#' library("DALEX2")
#' \dontrun{
#' library("DALEX")
#' \donttest{
#' library("ranger")
#' predict_function <- function(m,x,...) predict(m, x, ...)$predictions
#' model_old <- ranger(m2.price ~ ., data = apartments)
Expand Down Expand Up @@ -162,8 +160,8 @@ compare_two_profiles <- function(cpprofile_old, cpprofile_new, variables, scale
var <- variables[i]
selected_var_old <- filter(cpprofile_old, `_vname_` == var)
selected_var_new <- filter(cpprofile_new, `_vname_` == var)
selected_var_old <- selected_var_old[,c(var, "_yhat_", "_label_")]
selected_var_new <- selected_var_new[,c(var, "_yhat_", "_label_")]
selected_var_old <- selected_var_old[,c("_x_", "_yhat_", "_label_")]
selected_var_new <- selected_var_new[,c("_x_", "_yhat_", "_label_")]
selected_var <- rbind(selected_var_old, selected_var_new)
colnames(selected_var)[1] <- "x"

Expand All @@ -190,16 +188,15 @@ compare_two_profiles <- function(cpprofile_old, cpprofile_new, variables, scale
#' @export
#'
#' @examples
#' library("DALEX2")
#' \dontrun{
#' library("DALEX")
#' \donttest{
#' library("ranger")
#' predict_function <- function(m,x,...) predict(m, x, ...)$predictions
#' model_old <- ranger(m2.price ~ ., data = apartments)
#' model_new <- ranger(m2.price ~ ., data = apartments_test)
#' calculate_model_drift(model_old, model_new,
#' apartments_test,
#' apartments_test$m2.price,
#' max_obs = 1000,
#' predict_function = predict_function)
#'
#' # here we compare model created on male data
Expand All @@ -213,24 +210,23 @@ compare_two_profiles <- function(cpprofile_old, cpprofile_new, variables, scale
#' calculate_model_drift(model_old, model_new,
#' HR_test,
#' HR_test$status == "fired",
#' max_obs = 1000,
#' predict_function = predict_function)
#'
#' # plot it
#' library("ceterisParibus2")
#' prof_old <- individual_variable_profile(model_old,
#' data = data_new,
#' new_observation = data_new[1:1000,],
#' library("ingredients")
#' prof_old <- partial_dependency(model_old,
#' data = data_new[1:1000,],
#' label = "model_old",
#' predict_function = predict_function)
#' prof_new <- individual_variable_profile(model_new,
#' data = data_new,
#' new_observation = data_new[1:1000,],
#' predict_function = predict_function,
#' grid_points = 101,
#' variable_splits = NULL)
#' prof_new <- partial_dependency(model_new,
#' data = data_new[1:1000,],
#' label = "model_new",
#' predict_function = predict_function)
#' plot(prof_old, prof_new,
#' selected_variables = "age", aggregate_profiles = mean,
#' show_observations = FALSE, color = "_label_")
#' predict_function = predict_function,
#' grid_points = 101,
#' variable_splits = NULL)
#' plot(prof_old, prof_new, color = "_label_")
#'
#' }
#'
Expand Down
6 changes: 3 additions & 3 deletions R/run_drift_checks.R
Expand Up @@ -17,8 +17,8 @@
#' @export
#'
#' @examples
#' library("DALEX2")
#' \dontrun{
#' library("DALEX")
#' \donttest{
#' library("ranger")
#' predict_function <- function(m,x,...) predict(m, x, ...)$predictions
#' model_old <- ranger(m2.price ~ ., data = apartments)
Expand All @@ -32,7 +32,7 @@ check_drift <- function(model_old, model_new,
data_old, data_new,
y_old, y_new,
predict_function = predict,
max_obs = 500,
max_obs = 100,
bins = 20,
scale = sd(y_new, na.rm = TRUE)) {

Expand Down
2 changes: 1 addition & 1 deletion _pkgdown.yml
@@ -1,3 +1,3 @@
template:
package: MI2template
package: DrWhyTemplate
default_assets: false
2 changes: 1 addition & 1 deletion man/calculate_covariate_drift.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 16 additions & 18 deletions man/calculate_model_drift.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/calculate_residuals_drift.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/check_drift.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/print.covariate_drift.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9c40286

Please sign in to comment.