Issues #24&#21 fix (#25)

ModelOriented · Nov 6, 2019 · 87e0a98 · 87e0a98
1 parent 53da0bc
commit 87e0a98
Show file tree

Hide file tree

Showing 19 changed files with 529 additions and 57 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -41,8 +41,10 @@ Suggests:
   h2o,
   mljar,
   mlr,
+  mlr3,
   randomForest,
   rmarkdown,
+  rpart,
   xgboost,
   testthat
 URL: https://ModelOriented.github.io/DALEXtra/, https://github.com/ModelOriented/DALEXtra

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,14 @@ S3method(aspect_importance,default)
 S3method(aspect_importance,explainer)
 S3method(aspect_importance_single,default)
 S3method(aspect_importance_single,explainer)
+S3method(model_info,H2OBinomialModel)
+S3method(model_info,H2ORegressionModel)
+S3method(model_info,LearnerClassif)
+S3method(model_info,LearnerRegr)
+S3method(model_info,WrappedModel)
+S3method(model_info,keras)
+S3method(model_info,mljar_model)
+S3method(model_info,scikitlearn_model)
 S3method(plot,aspect_importance)
 S3method(plot,funnel_measure)
 S3method(plot,overall_comparison)
@@ -16,6 +24,8 @@ S3method(triplot,default)
 S3method(triplot,explainer)
 S3method(yhat,H2OBinomialModel)
 S3method(yhat,H2ORegressionModel)
+S3method(yhat,LearnerClassif)
+S3method(yhat,LearnerRegr)
 S3method(yhat,WrappedModel)
 S3method(yhat,keras)
 S3method(yhat,mljar_model)
@@ -28,6 +38,7 @@ export(explain_h2o)
 export(explain_keras)
 export(explain_mljar)
 export(explain_mlr)
+export(explain_mlr3)
 export(explain_scikitlearn)
 export(funnel_measure)
 export(get_sample)

diff --git a/R/explain_h2o.R b/R/explain_h2o.R
@@ -8,12 +8,15 @@
 #' @param model object - a model to be explained
 #' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly.
 #' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}
+#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}
 #' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions
 #' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.
-#' @param ... other parameters (passed for example to predict function)
+#' @param ... other parameters
 #' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model
 #' @param verbose if TRUE (default) then diagnostic messages will be printed
-#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.
+#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE
+#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.
+#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.
 #'
 #' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX
 #'
@@ -50,12 +53,15 @@ explain_h2o <-
   function(model,
            data = NULL,
            y = NULL,
+           weights = NULL,
            predict_function = NULL,
            residual_function = NULL,
            ...,
            label = NULL,
            verbose = TRUE,
-           precalculate = TRUE) {
+           precalculate = TRUE,
+           colorize = TRUE,
+           model_info = NULL) {
     if (class(y) == "H2OFrame") {
       y <- as.numeric(as.vector(y))
     }
@@ -65,11 +71,14 @@ explain_h2o <-
       model,
       data = data,
       y = y,
+      weights = weights,
       predict_function = predict_function,
       residual_function = residual_function,
       ...,
       label = label,
       verbose = verbose,
-      precalculate = precalculate
+      precalculate = precalculate,
+      colorize = colorize,
+      model_info = model_info
     )
   }
diff --git a/R/explain_keras.R b/R/explain_keras.R
@@ -11,12 +11,15 @@
 #' @param env A path to python virtual environment.
 #' @param data test data set that will be passed to \code{\link[DALEX]{explain}}.
 #' @param y vector that will be passed to \code{\link[DALEX]{explain}}.
+#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}
 #' @param predict_function predict function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
 #' @param residual_function residual function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
 #' @param ... other parameters
 #' @param label label that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
 #' @param verbose bool that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
-#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when \code{\link[DALEX]{explain}} is created. This will happenn also if 'verbose' is TRUE.
+#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.
+#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.
+#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.
 #'
 #'
 #' @author Szymon Maksymiuk
@@ -84,26 +87,32 @@ explain_keras <-
            env = NULL,
            data = NULL,
            y = NULL,
+           weights = NULL,
            predict_function = NULL,
            residual_function = NULL,
            ...,
            label = NULL,
            verbose = TRUE,
-           precalculate = TRUE) {
+           precalculate = TRUE,
+           colorize = TRUE,
+           model_info = NULL) {
 
     prepeare_env(yml, condaenv, env)
 
     model <- dalex_load_object(path, "keras")
 
     explain(
-      model = model,
+      model,
       data = data,
       y = y,
+      weights = weights,
       predict_function = predict_function,
       residual_function = residual_function,
       ...,
       label = label,
       verbose = verbose,
-      precalculate = precalculate
+      precalculate = precalculate,
+      colorize = colorize,
+      model_info = model_info
     )
   }
diff --git a/R/explain_mljar.R b/R/explain_mljar.R
@@ -9,12 +9,15 @@
 #' @param project_title character - a name of project_title  in which model was built. Without it predictions are unreachable.
 #' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly.
 #' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}
+#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}
 #' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions
 #' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.
 #' @param ... other parameters
 #' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model
-#' @param verbose if TRUE (default) then diagnostic messages will be printed
-#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.
+#' @param verbose if TRUE (default) then diagnostic messages will be printed.
+#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE
+#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.
+#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.
 #'
 #' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX
 #'
@@ -47,12 +50,15 @@ explain_mljar <-
            project_title,
            data = NULL,
            y = NULL,
+           weights = NULL,
            predict_function = NULL,
            residual_function = NULL,
            ...,
            label = NULL,
            verbose = TRUE,
-           precalculate = TRUE) {
+           precalculate = TRUE,
+           colorize = TRUE,
+           model_info = NULL) {
 
     if (!"MLJAR_TOKEN" %in% names(Sys.getenv())) {
       stop(
@@ -85,11 +91,14 @@ explain_mljar <-
       model,
       data = data,
       y = y,
+      weights = weights,
       predict_function = predict_function,
       residual_function = residual_function,
       ...,
       label = label,
       verbose = verbose,
-      precalculate = precalculate
+      precalculate = precalculate,
+      colorize = colorize,
+      model_info = model_info
     )
   }
diff --git a/R/explain_mlr.R b/R/explain_mlr.R
@@ -7,13 +7,16 @@
 #'
 #' @param model object - a model to be explained
 #' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly.
-#' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}
+#' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}.
+#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}
 #' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions
 #' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.
 #' @param ... other parameters
 #' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model
 #' @param verbose if TRUE (default) then diagnostic messages will be printed
 #' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE
+#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.
+#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.
 #'
 #' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX
 #'
@@ -55,22 +58,28 @@ explain_mlr <-
   function(model,
            data = NULL,
            y = NULL,
+           weights = NULL,
            predict_function = NULL,
            residual_function = NULL,
            ...,
            label = NULL,
            verbose = TRUE,
-           precalculate = TRUE) {
+           precalculate = TRUE,
+           colorize = TRUE,
+           model_info = NULL) {
     explain(
       model,
       data = data,
       y = y,
+      weights = weights,
       predict_function = predict_function,
       residual_function = residual_function,
       ...,
       label = label,
       verbose = verbose,
-      precalculate = precalculate
+      precalculate = precalculate,
+      colorize = colorize,
+      model_info = model_info
     )
 
 

diff --git a/R/explain_mlr3.R b/R/explain_mlr3.R
@@ -0,0 +1,78 @@
+#' Create explainer from your mlr model
+#'
+#' DALEX is designed to work with various black-box models like tree ensembles, linear models, neural networks etc.
+#' Unfortunately R packages that create such models are very inconsistent. Different tools use different interfaces to train, validate and use models.
+#' One of those tools, which is one of the most popular one is mlr3 package. We would like to present dedicated explain function for it.
+#'
+#'
+#' @param model object - a fitted learned created with \code{mlr3}.
+#' @param data data.frame or matrix - data that was used for fitting. If not provided then will be extracted from the model. Data should be passed without target column (this shall be provided as the \code{y} argument). NOTE: If target variable is present in the \code{data}, some of the functionalities my not work properly.
+#' @param y numeric vector with outputs / scores. If provided then it shall have the same size as \code{data}
+#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}
+#' @param predict_function function that takes two arguments: model and new data and returns numeric vector with predictions
+#' @param residual_function function that takes three arguments: model, data and response vector y. It should return a numeric vector with model residuals for given data. If not provided, response residuals (\eqn{y-\hat{y}}) are calculated.
+#' @param ... other parameters
+#' @param label character - the name of the model. By default it's extracted from the 'class' attribute of the model
+#' @param verbose if TRUE (default) then diagnostic messages will be printed.
+#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE
+#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.
+#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.
+#'
+#' @return explainer object (\code{\link[DALEX]{explain}}) ready to work with DALEX
+#'
+#' @import DALEX
+#' @importFrom stats predict
+#' @importFrom DALEX yhat
+#'
+#' @rdname explain_mlr3
+#' @export
+#' @examples
+#'library("DALEXtra")
+#' library(mlr3)
+#' titanic_imputed$survived <- as.factor(titanic_imputed$survived)
+#' task_classif <- TaskClassif$new(id = "1", backend = titanic_imputed, target = "survived")
+#' learner_classif <- lrn("classif.rpart", predict_type = "prob")
+#' learner_classif$train(task_classif)
+#' explain_mlr3(learner_classif, data = titanic_imputed,
+#'              y = as.numeric(as.character(titanic_imputed$survived)))
+#'
+#'
+#' task_regr <- TaskRegr$new(id = "2", backend = apartments, target = "m2.price")
+#' learner_regr <- lrn("regr.rpart")
+#' learner_regr$train(task_regr)
+#' explain_mlr3(learner_regr, data = apartments, apartments$m2.price)
+#'
+
+
+explain_mlr3 <-
+  function(model,
+           data = NULL,
+           y = NULL,
+           weights = NULL,
+           predict_function = NULL,
+           residual_function = NULL,
+           ...,
+           label = NULL,
+           verbose = TRUE,
+           precalculate = TRUE,
+           colorize = TRUE,
+           model_info = NULL
+           ) {
+    explain(
+      model,
+      data = data,
+      y = y,
+      weights = weights,
+      predict_function = predict_function,
+      residual_function = residual_function,
+      ...,
+      label = label,
+      verbose = verbose,
+      precalculate = precalculate,
+      colorize = colorize,
+      model_info = model_info
+    )
+
+
+
+  }
diff --git a/R/explain_scikitlearn.R b/R/explain_scikitlearn.R
@@ -11,12 +11,15 @@
 #' @param env A path to python virtual environment.
 #' @param data test data set that will be passed to \code{\link[DALEX]{explain}}.
 #' @param y vector that will be passed to \code{\link[DALEX]{explain}}.
+#' @param weights numeric vector with sampling weights. By default it's \code{NULL}. If provided then it shall have the same length as \code{data}
 #' @param predict_function predict function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
 #' @param residual_function residual function that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
 #' @param ... other parameters
 #' @param label label that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
 #' @param verbose bool that will be passed into \code{\link[DALEX]{explain}}. If NULL, default will be used.
-#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when \code{\link[DALEX]{explain}} is created. This will happenn also if 'verbose' is TRUE.
+#' @param precalculate if TRUE (default) then 'predicted_values' and 'residuals' are calculated when explainer is created. This will happenn also if 'verbose' is TRUE.
+#' @param colorize if TRUE (default) then \code{WARNINGS}, \code{ERRORS} and \code{NOTES} are colorized. Will work only in the R console.
+#' @param model_info a named list (\code{package}, \code{version}, \code{type}) containg information about model. If \code{NULL}, \code{DALEX} will seek for information on it's own.
 #'
 #'
 #' @author Szymon Maksymiuk
@@ -97,12 +100,15 @@ explain_scikitlearn <-
            env = NULL,
            data = NULL,
            y = NULL,
+           weights = NULL,
            predict_function = NULL,
            residual_function = NULL,
            ...,
            label = NULL,
            verbose = TRUE,
-           precalculate = TRUE) {
+           precalculate = TRUE,
+           colorize = TRUE,
+           model_info = NULL) {
     prepeare_env(yml, condaenv, env)
 
     model <- dalex_load_object(path, "scikitlearn_model")
@@ -145,17 +151,20 @@ explain_scikitlearn <-
 
 
     class(params) <- "scikitlearn_set"
-    explainer <-  explain(
-      model = model,
-      data = data,
-      y = y,
-      predict_function = predict_function,
-      residual_function = residual_function,
-      ...,
-      label = label,
-      verbose = verbose,
-      precalculate = precalculate
-    )
+    explainer <- explain(
+                          model,
+                          data = data,
+                          y = y,
+                          weights = weights,
+                          predict_function = predict_function,
+                          residual_function = residual_function,
+                          ...,
+                          label = label,
+                          verbose = verbose,
+                          precalculate = precalculate,
+                          colorize = colorize,
+                          model_info = model_info
+                        )
     explainer$param_set <- params
     explainer
   }