diff --git a/DESCRIPTION b/DESCRIPTION index 2d7567d..ec3e8c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,8 +41,10 @@ Suggests: h2o, mljar, mlr, + mlr3, randomForest, rmarkdown, + rpart, xgboost, testthat URL: https://ModelOriented.github.io/DALEXtra/, https://github.com/ModelOriented/DALEXtra diff --git a/NAMESPACE b/NAMESPACE index ca16e6e..4aa289a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,14 @@ S3method(aspect_importance,default) S3method(aspect_importance,explainer) S3method(aspect_importance_single,default) S3method(aspect_importance_single,explainer) +S3method(model_info,H2OBinomialModel) +S3method(model_info,H2ORegressionModel) +S3method(model_info,LearnerClassif) +S3method(model_info,LearnerRegr) +S3method(model_info,WrappedModel) +S3method(model_info,keras) +S3method(model_info,mljar_model) +S3method(model_info,scikitlearn_model) S3method(plot,aspect_importance) S3method(plot,funnel_measure) S3method(plot,overall_comparison) @@ -16,6 +24,8 @@ S3method(triplot,default) S3method(triplot,explainer) S3method(yhat,H2OBinomialModel) S3method(yhat,H2ORegressionModel) +S3method(yhat,LearnerClassif) +S3method(yhat,LearnerRegr) S3method(yhat,WrappedModel) S3method(yhat,keras) S3method(yhat,mljar_model) @@ -28,6 +38,7 @@ export(explain_h2o) export(explain_keras) export(explain_mljar) export(explain_mlr) +export(explain_mlr3) export(explain_scikitlearn) export(funnel_measure) export(get_sample) diff --git a/R/explain_h2o.R b/R/explain_h2o.R index a16d3b4..656e1fb 100644 --- a/R/explain_h2o.R +++ b/R/explain_h2o.R @@ -8,12 +8,15 @@ #' @param model object - a model to be explained #' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly. #' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data} +#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data} #' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions #' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated. -#' @param ... other parameters (passed for example to predict function) +#' @param ... other parameters #' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model #' @param verbose if TRUE (default) then diagnostic messages will be printed -#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE. +#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE +#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console. +#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own. #' #' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX #' @@ -50,12 +53,15 @@ explain_h2o <- function(model, data = NULL, y = NULL, + weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) { + precalculate = TRUE, + colorize = TRUE, + model_info = NULL) { if (class(y) == "H2OFrame") { y <- as.numeric(as.vector(y)) } @@ -65,11 +71,14 @@ explain_h2o <- model, data = data, y = y, + weights = weights, predict_function = predict_function, residual_function = residual_function, ..., label = label, verbose = verbose, - precalculate = precalculate + precalculate = precalculate, + colorize = colorize, + model_info = model_info ) } diff --git a/R/explain_keras.R b/R/explain_keras.R index 04384a2..0048255 100644 --- a/R/explain_keras.R +++ b/R/explain_keras.R @@ -11,12 +11,15 @@ #' @param env A path to python virtual environment. #' @param data test data set that will be passed to \code{\link[DALEX]{explain}}. #' @param y vector that will be passed to \code{\link[DALEX]{explain}}. +#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data} #' @param predict_function predict function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. #' @param residual_function residual function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. #' @param ... other parameters #' @param label label that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. #' @param verbose bool that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. -#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when \code{\link[DALEX]{explain}} is created. This will happenn also if 'verbose' is TRUE. +#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE. +#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console. +#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own. #' #' #' @author Szymon Maksymiuk @@ -84,26 +87,32 @@ explain_keras <- env = NULL, data = NULL, y = NULL, + weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) { + precalculate = TRUE, + colorize = TRUE, + model_info = NULL) { prepeare_env(yml, condaenv, env) model <- dalex_load_object(path, "keras") explain( - model = model, + model, data = data, y = y, + weights = weights, predict_function = predict_function, residual_function = residual_function, ..., label = label, verbose = verbose, - precalculate = precalculate + precalculate = precalculate, + colorize = colorize, + model_info = model_info ) } diff --git a/R/explain_mljar.R b/R/explain_mljar.R index ca6cfab..4cf15d7 100644 --- a/R/explain_mljar.R +++ b/R/explain_mljar.R @@ -9,12 +9,15 @@ #' @param project_title character - a name of project_title in which model was built. Without it predictions are unreachable. #' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly. #' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data} +#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data} #' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions #' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated. #' @param ... other parameters #' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model -#' @param verbose if TRUE (default) then diagnostic messages will be printed -#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE. +#' @param verbose if TRUE (default) then diagnostic messages will be printed. +#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE +#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console. +#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own. #' #' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX #' @@ -47,12 +50,15 @@ explain_mljar <- project_title, data = NULL, y = NULL, + weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) { + precalculate = TRUE, + colorize = TRUE, + model_info = NULL) { if (!"MLJAR_TOKEN" %in% names(Sys.getenv())) { stop( @@ -85,11 +91,14 @@ explain_mljar <- model, data = data, y = y, + weights = weights, predict_function = predict_function, residual_function = residual_function, ..., label = label, verbose = verbose, - precalculate = precalculate + precalculate = precalculate, + colorize = colorize, + model_info = model_info ) } diff --git a/R/explain_mlr.R b/R/explain_mlr.R index e3981a5..46357b7 100644 --- a/R/explain_mlr.R +++ b/R/explain_mlr.R @@ -7,13 +7,16 @@ #' #' @param model object - a model to be explained #' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly. -#' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data} +#' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}. +#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data} #' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions #' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated. #' @param ... other parameters #' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model #' @param verbose if TRUE (default) then diagnostic messages will be printed #' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE +#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console. +#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own. #' #' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX #' @@ -55,22 +58,28 @@ explain_mlr <- function(model, data = NULL, y = NULL, + weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) { + precalculate = TRUE, + colorize = TRUE, + model_info = NULL) { explain( model, data = data, y = y, + weights = weights, predict_function = predict_function, residual_function = residual_function, ..., label = label, verbose = verbose, - precalculate = precalculate + precalculate = precalculate, + colorize = colorize, + model_info = model_info ) diff --git a/R/explain_mlr3.R b/R/explain_mlr3.R new file mode 100644 index 0000000..ff3aaea --- /dev/null +++ b/R/explain_mlr3.R @@ -0,0 +1,78 @@ +#' Create explainer from your mlr model +#' +#' DALEX is designed to work with various black-box models like tree ensembles, linear models, neural networks etc. +#' Unfortunately R packages that create such models are very inconsistent. Different tools use different interfaces to train, validate and use models. +#' One of those tools, which is one of the most popular one is mlr3 package. We would like to present dedicated explain function for it. +#' +#' +#' @param model object - a fitted learned created with \code{mlr3}. +#' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly. +#' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data} +#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data} +#' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions +#' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated. +#' @param ... other parameters +#' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model +#' @param verbose if TRUE (default) then diagnostic messages will be printed. +#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE +#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console. +#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own. +#' +#' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX +#' +#' @import DALEX +#' @importFrom stats predict +#' @importFrom DALEX yhat +#' +#' @rdname explain_mlr3 +#' @export +#' @examples +#'library("DALEXtra") +#' library(mlr3) +#' titanic_imputed$survived <- as.factor(titanic_imputed$survived) +#' task_classif <- TaskClassif$new(id = "1", backend = titanic_imputed, target = "survived") +#' learner_classif <- lrn("classif.rpart", predict_type = "prob") +#' learner_classif$train(task_classif) +#' explain_mlr3(learner_classif, data = titanic_imputed, +#' y = as.numeric(as.character(titanic_imputed$survived))) +#' +#' +#' task_regr <- TaskRegr$new(id = "2", backend = apartments, target = "m2.price") +#' learner_regr <- lrn("regr.rpart") +#' learner_regr$train(task_regr) +#' explain_mlr3(learner_regr, data = apartments, apartments$m2.price) +#' + + +explain_mlr3 <- + function(model, + data = NULL, + y = NULL, + weights = NULL, + predict_function = NULL, + residual_function = NULL, + ..., + label = NULL, + verbose = TRUE, + precalculate = TRUE, + colorize = TRUE, + model_info = NULL + ) { + explain( + model, + data = data, + y = y, + weights = weights, + predict_function = predict_function, + residual_function = residual_function, + ..., + label = label, + verbose = verbose, + precalculate = precalculate, + colorize = colorize, + model_info = model_info + ) + + + + } diff --git a/R/explain_scikitlearn.R b/R/explain_scikitlearn.R index cbf4577..223e37c 100644 --- a/R/explain_scikitlearn.R +++ b/R/explain_scikitlearn.R @@ -11,12 +11,15 @@ #' @param env A path to python virtual environment. #' @param data test data set that will be passed to \code{\link[DALEX]{explain}}. #' @param y vector that will be passed to \code{\link[DALEX]{explain}}. +#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data} #' @param predict_function predict function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. #' @param residual_function residual function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. #' @param ... other parameters #' @param label label that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. #' @param verbose bool that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used. -#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when \code{\link[DALEX]{explain}} is created. This will happenn also if 'verbose' is TRUE. +#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE. +#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console. +#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own. #' #' #' @author Szymon Maksymiuk @@ -97,12 +100,15 @@ explain_scikitlearn <- env = NULL, data = NULL, y = NULL, + weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) { + precalculate = TRUE, + colorize = TRUE, + model_info = NULL) { prepeare_env(yml, condaenv, env) model <- dalex_load_object(path, "scikitlearn_model") @@ -145,17 +151,20 @@ explain_scikitlearn <- class(params) <- "scikitlearn_set" - explainer <- explain( - model = model, - data = data, - y = y, - predict_function = predict_function, - residual_function = residual_function, - ..., - label = label, - verbose = verbose, - precalculate = precalculate - ) + explainer <- explain( + model, + data = data, + y = y, + weights = weights, + predict_function = predict_function, + residual_function = residual_function, + ..., + label = label, + verbose = verbose, + precalculate = precalculate, + colorize = colorize, + model_info = model_info + ) explainer$param_set <- params explainer } diff --git a/R/model_info.R b/R/model_info.R new file mode 100644 index 0000000..3dacd31 --- /dev/null +++ b/R/model_info.R @@ -0,0 +1,143 @@ +#' Exract info from model +#' +#' This generic function let user extract base information about model. The function returns a named list of class \code{model_info} that +#' contain about package of model, version and task type. For wrappers like \code{mlr} or \code{caret} both, package and wrapper inforamtion +#' are stored +#' +#' @param model - model object +#' @param ... - another arguments +#' +#' Currently supported packages are: +#' \itemize{ +#' \item \code{mlr} models created with \code{mlr} package +#' \item \code{h2o} models created with \code{h2o} package +#' \item \code{scikit-learn} models created with \code{scikit-learn} pyhton library and accesed via \code{reticulate} +#' \item \code{keras} models created with \code{keras} pyhton library and accesed via \code{reticulate} +#' \item \code{mljar} models created with \code{mljar} API and accesed via \code{mljar} R package +#' \item \code{mlr3} models created with \code{mlr3} package +#' } +#' +#' @return A named list of class \code{model_info} + + +#' @rdname model_info +#' @export +model_info.WrappedModel <- function(model, ...) { + switch(model$task.desc$type, + "classif" = { + type <- "classification" + }, + "regr" = { + type <- "regression" + }, + stop("Model is not explainable mlr object")) + package_wrapper <- "mlr" + ver_wrapper <- as.character(utils::packageVersion("mlr")) + package <- model$learner$package + ver <- as.character(utils::packageVersion(package)) + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + +model_info.h2o <- function(model, ...) { + switch( + class(model), + "H2ORegressionModel" = { + type <- "regression" + }, + "H2OBinomialModel" = { + type <- "classification" + }, + stop("Model is not explainable h2o object") + ) + package_wrapper <- "h2o" + ver_wrapper <- as.character(utils::packageVersion("h2o")) + package <- model@algorithm + ver <- "unkown" + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + +#' @rdname model_info +#' @export +model_info.H2ORegressionModel <- model_info.h2o + +#' @rdname model_info +#' @export +model_info.H2OBinomialModel <- model_info.h2o + +#' @rdname model_info +#' @export +model_info.scikitlearn_model <- function(model, ...) { + if ("predict_proba" %in% names(model)) { + type <- "classification" + } else { + type <- "regression" + } + package_wrapper <- "reticulate" + ver_wrapper <- as.character(utils::packageVersion("reticulate")) + package <- "scikit-learn" + ver <- "unkown" + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + +#' @rdname model_info +#' @export +model_info.keras <- function(model, ...) { + if ("predict_proba" %in% names(model)) { + type <- "classification" + } else { + type <- "regression" + } + package_wrapper <- "reticulate" + ver_wrapper <- as.character(utils::packageVersion("reticulate")) + package <- "keras" + ver <- "unkown" + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + +#' @rdname model_info +#' @export +model_info.mljar_model <- function(model, ...) { + type <- "regression" + package_wrapper <- "mljar" + ver_wrapper <- as.character(utils::packageVersion("mljar")) + package <- "mljar" + ver <- "unkown" + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + +#' @rdname model_info +#' @export +model_info.LearnerRegr <- function(model, ...) { + type <- "regression" + package_wrapper <- "mlr3" + ver_wrapper <- as.character(utils::packageVersion("mlr3")) + package <- model$packages + ver <- as.character(utils::packageVersion(package)) + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + +#' @rdname model_info +#' @export +model_info.LearnerClassif <- function(model, ...) { + type <- "classification" + package_wrapper <- "mlr3" + ver_wrapper <- as.character(utils::packageVersion("mlr3")) + package <- model$packages + ver <- as.character(utils::packageVersion(package)) + model_info <- list(package = c(wrapper = package_wrapper, package = package), ver = c(wrapper = ver_wrapper, package = ver), type = type) + class(model_info) <- "model_info" + model_info +} + diff --git a/R/yhat.R b/R/yhat.R index 0541477..34f79f8 100644 --- a/R/yhat.R +++ b/R/yhat.R @@ -6,11 +6,12 @@ #' #' Currently supported packages are: #' \itemize{ -#' \item `mlr` see more in \code{\link{explain_mlr}} -#' \item `h2o` see more in \code{\link{explain_h2o}} -#' \item `scikit-learn` see more in \code{\link{explain_scikitlearn}} -#' \item `keras` see more in \code{\link{explain_keras}} -#' \item `mljar` see more in \code{\link{explain_mljar}} +#' \item \code{mlr} see more in \code{\link{explain_mlr}} +#' \item \code{h2o} see more in \code{\link{explain_h2o}} +#' \item \code{scikit-learn} see more in \code{\link{explain_scikitlearn}} +#' \item \code{keras} see more in \code{\link{explain_keras}} +#' \item \code{mljar} see more in \code{\link{explain_mljar}} +#' \item \code{mlr3} see more in \code{\link{explain_mlr3}} #' } #' #' @param X.model object - a model to be explained @@ -105,3 +106,15 @@ yhat.keras <- function(X.model, newdata, ...) { yhat.mljar_model <- function(X.model, newdata, ...) { unlist(mljar::mljar_predict(model = X.model, x_pred = newdata, project_title = X.model$project), use.names = FALSE) } + +#' @rdname yhat +#' @export +yhat.LearnerRegr <- function(X.model, newdata, ...) { + predict(X.model, newdata = newdata, ...) +} + +#' @rdname yhat +#' @export +yhat.LearnerClassif <- function(X.model, newdata, ...) { + predict(X.model, newdata = newdata, predict_type = "prob", ...)[,1] +} diff --git a/man/explain_h2o.Rd b/man/explain_h2o.Rd index e8771d8..e6e5a8f 100644 --- a/man/explain_h2o.Rd +++ b/man/explain_h2o.Rd @@ -4,9 +4,10 @@ \alias{explain_h2o} \title{Create explainer from your h2o model} \usage{ -explain_h2o(model, data = NULL, y = NULL, predict_function = NULL, - residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) +explain_h2o(model, data = NULL, y = NULL, weights = NULL, + predict_function = NULL, residual_function = NULL, ..., + label = NULL, verbose = TRUE, precalculate = TRUE, + colorize = TRUE, model_info = NULL) } \arguments{ \item{model}{object - a model to be explained} @@ -15,17 +16,23 @@ explain_h2o(model, data = NULL, y = NULL, predict_function = NULL, \item{y}{numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}} +\item{weights}{numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}} + \item{predict_function}{function that takes two arguments: model and new data and returns numeric vector with predictions} \item{residual_function}{function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.} -\item{...}{other parameters (passed for example to predict function)} +\item{...}{other parameters} \item{label}{character - the name of the model. By default it's extracted from the 'class' attribute of the model} \item{verbose}{if TRUE (default) then diagnostic messages will be printed} -\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.} +\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE} + +\item{colorize}{if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.} + +\item{model_info}{a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.} } \value{ explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX diff --git a/man/explain_keras.Rd b/man/explain_keras.Rd index 8d02c34..8199c71 100644 --- a/man/explain_keras.Rd +++ b/man/explain_keras.Rd @@ -5,9 +5,9 @@ \title{Wrapper for Python Keras Models} \usage{ explain_keras(path, yml = NULL, condaenv = NULL, env = NULL, - data = NULL, y = NULL, predict_function = NULL, + data = NULL, y = NULL, weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) + precalculate = TRUE, colorize = TRUE, model_info = NULL) } \arguments{ \item{path}{a path to the pickle file. Can be used without other arguments if you are sure that active Python version match pickle version.} @@ -22,6 +22,8 @@ explain_keras(path, yml = NULL, condaenv = NULL, env = NULL, \item{y}{vector that will be passed to \code{\link[DALEX]{explain}}.} +\item{weights}{numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}} + \item{predict_function}{predict function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.} \item{residual_function}{residual function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.} @@ -32,7 +34,11 @@ explain_keras(path, yml = NULL, condaenv = NULL, env = NULL, \item{verbose}{bool that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.} -\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when \code{\link[DALEX]{explain}} is created. This will happenn also if 'verbose' is TRUE.} +\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.} + +\item{colorize}{if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.} + +\item{model_info}{a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.} } \value{ An object of the class 'explainer'. diff --git a/man/explain_mljar.Rd b/man/explain_mljar.Rd index 67e82b3..e267e7e 100644 --- a/man/explain_mljar.Rd +++ b/man/explain_mljar.Rd @@ -5,8 +5,9 @@ \title{Create explainer from your mljar model} \usage{ explain_mljar(model, project_title, data = NULL, y = NULL, - predict_function = NULL, residual_function = NULL, ..., - label = NULL, verbose = TRUE, precalculate = TRUE) + weights = NULL, predict_function = NULL, residual_function = NULL, + ..., label = NULL, verbose = TRUE, precalculate = TRUE, + colorize = TRUE, model_info = NULL) } \arguments{ \item{model}{object - a mljar model to be explained} @@ -17,6 +18,8 @@ explain_mljar(model, project_title, data = NULL, y = NULL, \item{y}{numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}} +\item{weights}{numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}} + \item{predict_function}{function that takes two arguments: model and new data and returns numeric vector with predictions} \item{residual_function}{function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.} @@ -25,9 +28,13 @@ explain_mljar(model, project_title, data = NULL, y = NULL, \item{label}{character - the name of the model. By default it's extracted from the 'class' attribute of the model} -\item{verbose}{if TRUE (default) then diagnostic messages will be printed} +\item{verbose}{if TRUE (default) then diagnostic messages will be printed.} + +\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE} + +\item{colorize}{if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.} -\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.} +\item{model_info}{a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.} } \value{ explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX diff --git a/man/explain_mlr.Rd b/man/explain_mlr.Rd index 3ac8fcd..39c5a58 100644 --- a/man/explain_mlr.Rd +++ b/man/explain_mlr.Rd @@ -4,16 +4,19 @@ \alias{explain_mlr} \title{Create explainer from your mlr model} \usage{ -explain_mlr(model, data = NULL, y = NULL, predict_function = NULL, - residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) +explain_mlr(model, data = NULL, y = NULL, weights = NULL, + predict_function = NULL, residual_function = NULL, ..., + label = NULL, verbose = TRUE, precalculate = TRUE, + colorize = TRUE, model_info = NULL) } \arguments{ \item{model}{object - a model to be explained} \item{data}{data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly.} -\item{y}{numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}} +\item{y}{numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}.} + +\item{weights}{numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}} \item{predict_function}{function that takes two arguments: model and new data and returns numeric vector with predictions} @@ -26,6 +29,10 @@ explain_mlr(model, data = NULL, y = NULL, predict_function = NULL, \item{verbose}{if TRUE (default) then diagnostic messages will be printed} \item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE} + +\item{colorize}{if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.} + +\item{model_info}{a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.} } \value{ explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX diff --git a/man/explain_mlr3.Rd b/man/explain_mlr3.Rd new file mode 100644 index 0000000..09d6840 --- /dev/null +++ b/man/explain_mlr3.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/explain_mlr3.R +\name{explain_mlr3} +\alias{explain_mlr3} +\title{Create explainer from your mlr model} +\usage{ +explain_mlr3(model, data = NULL, y = NULL, weights = NULL, + predict_function = NULL, residual_function = NULL, ..., + label = NULL, verbose = TRUE, precalculate = TRUE, + colorize = TRUE, model_info = NULL) +} +\arguments{ +\item{model}{object - a fitted learned created with \code{mlr3}.} + +\item{data}{data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly.} + +\item{y}{numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}} + +\item{weights}{numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}} + +\item{predict_function}{function that takes two arguments: model and new data and returns numeric vector with predictions} + +\item{residual_function}{function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.} + +\item{...}{other parameters} + +\item{label}{character - the name of the model. By default it's extracted from the 'class' attribute of the model} + +\item{verbose}{if TRUE (default) then diagnostic messages will be printed.} + +\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE} + +\item{colorize}{if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.} + +\item{model_info}{a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.} +} +\value{ +explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX +} +\description{ +DALEX is designed to work with various black-box models like tree ensembles, linear models, neural networks etc. +Unfortunately R packages that create such models are very inconsistent. Different tools use different interfaces to train, validate and use models. +One of those tools, which is one of the most popular one is mlr3 package. We would like to present dedicated explain function for it. +} +\examples{ +library("DALEXtra") +library(mlr3) +titanic_imputed$survived <- as.factor(titanic_imputed$survived) +task_classif <- TaskClassif$new(id = "1", backend = titanic_imputed, target = "survived") +learner_classif <- lrn("classif.rpart", predict_type = "prob") +learner_classif$train(task_classif) +explain_mlr3(learner_classif, data = titanic_imputed, + y = as.numeric(as.character(titanic_imputed$survived))) + + +task_regr <- TaskRegr$new(id = "2", backend = apartments, target = "m2.price") +learner_regr <- lrn("regr.rpart") +learner_regr$train(task_regr) +explain_mlr3(learner_regr, data = apartments, apartments$m2.price) + +} diff --git a/man/explain_scikitlearn.Rd b/man/explain_scikitlearn.Rd index fefd686..a8bc93c 100644 --- a/man/explain_scikitlearn.Rd +++ b/man/explain_scikitlearn.Rd @@ -5,9 +5,9 @@ \title{Wrapper for Python Scikit-Learn Models} \usage{ explain_scikitlearn(path, yml = NULL, condaenv = NULL, env = NULL, - data = NULL, y = NULL, predict_function = NULL, + data = NULL, y = NULL, weights = NULL, predict_function = NULL, residual_function = NULL, ..., label = NULL, verbose = TRUE, - precalculate = TRUE) + precalculate = TRUE, colorize = TRUE, model_info = NULL) } \arguments{ \item{path}{a path to the pickle file. Can be used without other arguments if you are sure that active Python version match pickle version.} @@ -22,6 +22,8 @@ explain_scikitlearn(path, yml = NULL, condaenv = NULL, env = NULL, \item{y}{vector that will be passed to \code{\link[DALEX]{explain}}.} +\item{weights}{numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}} + \item{predict_function}{predict function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.} \item{residual_function}{residual function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.} @@ -32,7 +34,11 @@ explain_scikitlearn(path, yml = NULL, condaenv = NULL, env = NULL, \item{verbose}{bool that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.} -\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when \code{\link[DALEX]{explain}} is created. This will happenn also if 'verbose' is TRUE.} +\item{precalculate}{if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.} + +\item{colorize}{if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.} + +\item{model_info}{a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.} } \value{ An object of the class 'explainer'. It has additional field param_set when user can check parameters of scikitlearn model diff --git a/man/model_info.Rd b/man/model_info.Rd new file mode 100644 index 0000000..036b94e --- /dev/null +++ b/man/model_info.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model_info.R +\name{model_info.WrappedModel} +\alias{model_info.WrappedModel} +\alias{model_info.H2ORegressionModel} +\alias{model_info.H2OBinomialModel} +\alias{model_info.scikitlearn_model} +\alias{model_info.keras} +\alias{model_info.mljar_model} +\alias{model_info.LearnerRegr} +\alias{model_info.LearnerClassif} +\title{Exract info from model} +\usage{ +\method{model_info}{WrappedModel}(model, ...) + +\method{model_info}{H2ORegressionModel}(model, ...) + +\method{model_info}{H2OBinomialModel}(model, ...) + +\method{model_info}{scikitlearn_model}(model, ...) + +\method{model_info}{keras}(model, ...) + +\method{model_info}{mljar_model}(model, ...) + +\method{model_info}{LearnerRegr}(model, ...) + +\method{model_info}{LearnerClassif}(model, ...) +} +\arguments{ +\item{model}{- model object} + +\item{...}{- another arguments + +Currently supported packages are: +\itemize{ +\item \code{mlr} models created with \code{mlr} package +\item \code{h2o} models created with \code{h2o} package +\item \code{scikit-learn} models created with \code{scikit-learn} pyhton library and accesed via \code{reticulate} +\item \code{keras} models created with \code{keras} pyhton library and accesed via \code{reticulate} +\item \code{mljar} models created with \code{mljar} API and accesed via \code{mljar} R package +\item \code{mlr3} models created with \code{mlr3} package +}} +} +\value{ +A named list of class \code{model_info} +} +\description{ +This generic function let user extract base information about model. The function returns a named list of class \code{model_info} that +contain about package of model, version and task type. For wrappers like \code{mlr} or \code{caret} both, package and wrapper inforamtion +are stored +} diff --git a/man/yhat.Rd b/man/yhat.Rd index 6cbb09e..a06711e 100644 --- a/man/yhat.Rd +++ b/man/yhat.Rd @@ -7,6 +7,8 @@ \alias{yhat.scikitlearn_model} \alias{yhat.keras} \alias{yhat.mljar_model} +\alias{yhat.LearnerRegr} +\alias{yhat.LearnerClassif} \title{Wrapper over the predict function} \usage{ \method{yhat}{WrappedModel}(X.model, newdata, ...) @@ -20,6 +22,10 @@ \method{yhat}{keras}(X.model, newdata, ...) \method{yhat}{mljar_model}(X.model, newdata, ...) + +\method{yhat}{LearnerRegr}(X.model, newdata, ...) + +\method{yhat}{LearnerClassif}(X.model, newdata, ...) } \arguments{ \item{X.model}{object - a model to be explained} @@ -39,10 +45,11 @@ Those functions are very important since informations from many models have to b \details{ Currently supported packages are: \itemize{ -\item `mlr` see more in \code{\link{explain_mlr}} -\item `h2o` see more in \code{\link{explain_h2o}} -\item `scikit-learn` see more in \code{\link{explain_scikitlearn}} -\item `keras` see more in \code{\link{explain_keras}} -\item `mljar` see more in \code{\link{explain_mljar}} +\item \code{mlr} see more in \code{\link{explain_mlr}} +\item \code{h2o} see more in \code{\link{explain_h2o}} +\item \code{scikit-learn} see more in \code{\link{explain_scikitlearn}} +\item \code{keras} see more in \code{\link{explain_keras}} +\item \code{mljar} see more in \code{\link{explain_mljar}} +\item \code{mlr3} see more in \code{\link{explain_mlr3}} } } diff --git a/tests/testthat/test_ml3_explain.R b/tests/testthat/test_ml3_explain.R new file mode 100644 index 0000000..dbbaa93 --- /dev/null +++ b/tests/testthat/test_ml3_explain.R @@ -0,0 +1,27 @@ +context("explain_mlr") + +library("DALEXtra") +library("mlr3") + +test_that("creating explainer classif", { + titanic_imputed$survived <- as.factor(titanic_imputed$survived) + task_classif <- TaskClassif$new(id = "1", backend = titanic_imputed, target = "survived") + learner_classif <- lrn("classif.rpart", predict_type = "prob") + learner_classif$train(task_classif) + explainer_classif <- explain_mlr3(learner_classif, data = titanic_imputed, y = as.numeric(as.character(titanic_imputed$survived))) + expect_is(explainer_classif, "explainer") + expect_is(explainer_classif$y_hat, "numeric") + +}) + +test_that("creating explainer regr", { + + task_regr <- TaskRegr$new(id = "2", backend = apartments, target = "m2.price") + learner_regr <- lrn("regr.rpart") + learner_regr$train(task_regr) + explainer_regr <- explain_mlr3(learner_regr, data = apartments, apartments$m2.price) + expect_is(explainer_regr, "explainer") + expect_is(explainer_regr$y_hat, "numeric") + +}) +