/
run_drift_checks.R
66 lines (62 loc) · 2.9 KB
/
run_drift_checks.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#' This function executes all tests for drift between two datasets / models
#'
#' Currently three checks are implemented, covariate drift, residual drift and model drift.
#'
#' @param model_old model created on historical / `old`data
#' @param model_new model created on current / `new`data
#' @param data_old data frame with historical / `old` data
#' @param data_new data frame with current / `new` data
#' @param y_old true values of target variable for historical / `old` data
#' @param y_new true values of target variable for current / `new` data
#' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions, by default it's `predict`
#' @param max_obs if negative, them all observations are used for calculation of PDP, is positive, then only `max_obs` are used for calculation of PDP
#' @param bins continuous variables are discretized to `bins` intervals of equal sizes
#' @param scale scale parameter for calculation of scaled drift
#'
#' @return This function is executed for its side effects, all checks are being printed on the screen. Additionaly it returns list with particualr checks.
#' @export
#'
#' @examples
#' library("DALEX")
#' \donttest{
#' library("ranger")
#' predict_function <- function(m,x,...) predict(m, x, ...)$predictions
#' model_old <- ranger(m2.price ~ ., data = apartments)
#' model_new <- ranger(m2.price ~ ., data = apartments_test)
#' check_drift(model_old, model_new,
#' apartments, apartments_test,
#' apartments$m2.price, apartments_test$m2.price,
#' predict_function = predict_function)
#' }
check_drift <- function(model_old, model_new,
data_old, data_new,
y_old, y_new,
predict_function = predict,
max_obs = 100,
bins = 20,
scale = sd(y_new, na.rm = TRUE)) {
# check covariate drift
dc <- calculate_covariate_drift(data_old, data_new, bins = bins)
cat(" -------------------------------------\n")
print(dc)
# check residual drift
dr <- calculate_residuals_drift(model_old,
data_old, data_new,
y_old, y_new,
predict_function = predict_function,
bins = bins)
cat(" -------------------------------------\n")
print(dr)
# check model drift
dm <- calculate_model_drift(model_old, model_new,
data_new,
y_new,
predict_function = predict_function,
max_obs = max_obs,
scale = scale)
cat(" -----------------------------------------------\n")
print(dm)
invisible(list(covariate_drift = dc,
residual_drift = dr,
model_drift = dm))
}