code and doc maintenance

ModelOriented · Sep 28, 2020 · 2ea9c4e · 2ea9c4e
1 parent 91c9d39
commit 2ea9c4e
Show file tree

Hide file tree

Showing 77 changed files with 423 additions and 345 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: ingredients
 Title: Effects and Importances of Model Ingredients
-Version: 2.0
+Version: 2.0.1
 Authors@R: c(person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com",
                   role = c("aut", "cre"),
                   comment = c(ORCID = "0000-0001-8423-1823")),
               person("Hubert", "Baniecki", role = "aut",
                   comment = c(ORCID = "0000-0001-6661-5364")),
-              person("Adam", "Izdebski", role = "aut"))
+              person("Adam", "Izdebski", role = "ctb"))
 Description: Collection of tools for assessment of feature importance and feature effects.
     Key functions are:
     feature_importance() for assessment of global level feature importance,
@@ -32,7 +32,7 @@ Imports:
 Suggests:
     DALEX,
     gower,
-    randomForest,
+    ranger,
     testthat,
     r2d3,
     jsonlite,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+ingredients 2.0.1
+--------------------------------------------------------------
+* code and documentation maintenance [#130](https://github.com/ModelOriented/ingredients/issues/130)
+
 ingredients 2.0
 --------------------------------------------------------------
 * `plot.ceteris_paribus_explainer` now by default for categorical variables plots profiles (not lines -prev default- nor bars)

diff --git a/R/accumulated_dependence.R b/R/accumulated_dependence.R
@@ -24,12 +24,13 @@
 #' If "categorical" then only categorical variables will be calculated.
 #'
 #' @references ALEPlot: Accumulated Local Effects (ALE) Plots and Partial Dependence (PD) Plots \url{https://cran.r-project.org/package=ALEPlot},
-#' Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @return an object of the class \code{aggregated_profiles_explainer}
 #'
 #' @examples
 #' library("DALEX")
+#' library("ingredients")
 #'
 #' model_titanic_glm <- glm(survived ~ gender + age + fare,
 #'                          data = titanic_imputed, family = "binomial")
@@ -45,9 +46,9 @@
 #' plot(adp_glm)
 #'
 #' \donttest{
-#' library("randomForest")
+#' library("ranger")
 #'
-#' model_titanic_rf <- randomForest(survived ~.,  data = titanic_imputed)
+#' model_titanic_rf <- ranger(survived ~., data = titanic_imputed, probability = TRUE)
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],

diff --git a/R/aggregate_profiles.R b/R/aggregate_profiles.R
@@ -17,17 +17,18 @@
 #' @param variable_type a character. If \code{numerical} then only numerical variables will be calculated.
 #' If \code{categorical} then only categorical variables will be calculated.
 #'
-#' @references Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @importFrom stats na.omit quantile weighted.mean dnorm
 #'
 #' @return an object of the class \code{aggregated_profiles_explainer}
 #'
 #' @examples
 #' library("DALEX")
-#' library("randomForest")
+#' library("ingredients")
+#' library("ranger")
 #'
-#' model_titanic_rf <- randomForest(survived ~ .,  data = titanic_imputed)
+#' model_titanic_rf <- ranger(survived ~.,  data = titanic_imputed, probability = TRUE)
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],
@@ -107,7 +108,7 @@ aggregate_profiles <- function(x, ...,
   }
   dfl <- c(list(x), elist)
   mean_prediction <-
-    mean(do.call(rbind, lapply(dfl, function(x){ attr(x, "observation")}))$`_yhat_`, na.rm = TRUE)
+    mean(do.call(rbind, lapply(dfl, function(x){ attr(x, "observations")}))$`_yhat_`, na.rm = TRUE)
 
   all_profiles <- do.call(rbind, dfl)
   class(all_profiles) <- "data.frame"

diff --git a/R/bind_plots.R b/R/bind_plots.R
@@ -14,6 +14,8 @@
 #' @examples
 #' \donttest{
 #' library("DALEX")
+#' library("ingredients")
+#'
 #' titanic_glm <- glm(survived ~ gender + age + fare,
 #'                    data = titanic_imputed, family = "binomial")
 #'

diff --git a/R/calculate_oscillations.R b/R/calculate_oscillations.R
@@ -7,13 +7,14 @@
 #' @param sort a logical value. If \code{TRUE} then rows are sorted along the oscillations
 #' @param ... other arguments
 #'
-#' @references Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @return an object of the class \code{ceteris_paribus_oscillations}
 #'
 #' @examples
 #' library("DALEX")
-#' # smaller data, quicker example
+#' library("ingredients")
+#'
 #' titanic_small <- select_sample(titanic_imputed, n = 500, seed = 1313)
 #'
 #' # build a model
@@ -29,9 +30,9 @@
 #' calculate_oscillations(cp_rf)
 #'
 #' \donttest{
-#' library("randomForest")
+#' library("ranger")
 #'
-#' apartments_rf_model <- randomForest(m2.price ~ construction.year + surface + floor +
+#' apartments_rf_model <- ranger(m2.price ~ construction.year + surface + floor +
 #'                                     no.rooms + district, data = apartments)
 #'
 #' explainer_rf <- explain(apartments_rf_model,

diff --git a/R/calculate_variable_profile.R b/R/calculate_variable_profile.R
@@ -12,7 +12,7 @@
 #' @param model a model that will be passed to the \code{predict_function}
 #' @param ... other parameters that will be passed to the \code{predict_function}
 #'
-#' @references Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @return a data frame with profiles for selected variables and selected observations
 #'

diff --git a/R/ceteris_paribus.R b/R/ceteris_paribus.R
@@ -25,13 +25,13 @@
 #' @param variable_splits_type how variable grids shall be calculated? Use "quantiles" (default) for percentiles or "uniform" to get uniform grid of points
 #' @param variable_splits_with_obs if \code{TRUE} then all values in \code{new_observation} will be included in \code{variable_splits}
 #'
-#' @references Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @return an object of the class \code{ceteris_paribus_explainer}.
 #'
 #' @examples
 #' library("DALEX")
-#' # smaller data, quicker example
+#' library("ingredients")
 #' titanic_small <- select_sample(titanic_imputed, n = 500, seed = 1313)
 #'
 #' # build a model
@@ -41,23 +41,21 @@
 #'
 #' explain_titanic_glm <- explain(model_titanic_glm,
 #'                                data = titanic_small[,-8],
-#'                                y = titanic_small[,8],
-#'                                verbose = FALSE)
+#'                                y = titanic_small[,8])
 #'
 #' cp_rf <- ceteris_paribus(explain_titanic_glm, titanic_small[1,])
 #' cp_rf
 #'
 #' plot(cp_rf, variables = "age")
 #'
 #' \donttest{
-#' library("randomForest")
-#' model_titanic_rf <- randomForest(survived ~.,  data = titanic_imputed)
+#' library("ranger")
+#' model_titanic_rf <- ranger(survived ~., data = titanic_imputed, probability = TRUE)
 #'
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],
 #'                               y = titanic_imputed[,8],
-#'                               label = "Random Forest v7",
 #'                               verbose = FALSE)
 #'
 #' # select few passangers

diff --git a/R/ceteris_paribus_2d.R b/R/ceteris_paribus_2d.R
@@ -10,8 +10,11 @@
 #'
 #' @return an object of the class \code{ceteris_paribus_2d_explainer}.
 #'
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
+#'
 #' @examples
 #' library("DALEX")
+#' library("ingredients")
 #'
 #' model_titanic_glm <- glm(survived ~ age + fare,
 #'                        data = titanic_imputed, family = "binomial")
@@ -27,10 +30,10 @@
 #'
 #' plot(cp_rf)
 #'
-#' library("randomForest")
+#' library("ranger")
 #' set.seed(59)
 #'
-#' apartments_rf_model <- randomForest(m2.price ~., data = apartments)
+#' apartments_rf_model <- ranger(m2.price ~., data = apartments)
 #'
 #' explainer_rf <- explain(apartments_rf_model,
 #'                         data = apartments_test[,-1],

diff --git a/R/cluster_profiles.R b/R/cluster_profiles.R
@@ -14,14 +14,15 @@
 #' @param variable_type a character. If \code{numerical} then only numerical variables will be computed.
 #' If \code{categorical} then only categorical variables will be computed.
 #'
-#' @references Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @importFrom stats as.dist cutree hclust
 #'
 #' @return an object of the class \code{aggregated_profiles_explainer}
 #'
 #' @examples
 #' library("DALEX")
+#' library("ingredients")
 #'
 #' selected_passangers <- select_sample(titanic_imputed, n = 100)
 #' model_titanic_glm <- glm(survived ~ gender + age + fare,
@@ -36,14 +37,13 @@
 #' plot(clust_rf)
 #'
 #' \donttest{
-#' library("randomForest")
-#' model_titanic_rf <- randomForest(survived ~.,  data = titanic_imputed)
-#' model_titanic_rf
+#' library("ranger")
+#' model_titanic_rf <- ranger(survived ~., data = titanic_imputed, probability = TRUE)
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],
 #'                               y = titanic_imputed[,8],
-#'                               label = "Random Forest v7")
+#'                               verbose = FALSE)
 #'
 #' cp_rf <- ceteris_paribus(explain_titanic_rf, selected_passangers)
 #' cp_rf
@@ -63,12 +63,13 @@
 #' head(clust_rf)
 #' }
 #' @export
-cluster_profiles <- function(x, ...,
-                       aggregate_function = mean,
-                       variable_type = "numerical",
-                       center = FALSE,
-                       k = 3,
-                       variables = NULL) {
+cluster_profiles <- function(x,
+                             ...,
+                             aggregate_function = mean,
+                             variable_type = "numerical",
+                             center = FALSE,
+                             k = 3,
+                             variables = NULL) {
 
   check_variable_type(variable_type)
 

diff --git a/R/conditional_dependence.R b/R/conditional_dependence.R
@@ -21,30 +21,30 @@
 #' @param variable_type a character. If \code{numerical} then only numerical variables will be calculated.
 #' If \code{categorical} then only categorical variables will be calculated.
 #'
-#' @references Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. \url{https://pbiecek.github.io/ema}
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
 #'
 #' @return an object of the class \code{aggregated_profile_explainer}
 #'
 #' @examples
 #' library("DALEX")
+#' library("ingredients")
 #'
 #' model_titanic_glm <- glm(survived ~ gender + age + fare,
 #'                          data = titanic_imputed, family = "binomial")
 #'
 #' explain_titanic_glm <- explain(model_titanic_glm,
 #'                                data = titanic_imputed[,-8],
-#'                                y = titanic_imputed[,8],
-#'                                verbose = FALSE)
+#'                                y = titanic_imputed[,8])
 #'
 #' cdp_glm <- conditional_dependence(explain_titanic_glm,
 #'                                   N = 150, variables = c("age", "fare"))
 #' head(cdp_glm)
 #' plot(cdp_glm)
 #'
 #' \donttest{
-#' library("randomForest")
+#' library("ranger")
 #'
-#' model_titanic_rf <- randomForest(survived ~.,  data = titanic_imputed)
+#' model_titanic_rf <- ranger(survived ~., data = titanic_imputed, probability = TRUE)
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],
@@ -55,7 +55,7 @@
 #' plot(cdp_rf)
 #'
 #' cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical")
-#' plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE)
+#' plotD3(cdp_rf, label_margin = 100, scale_plot = TRUE)
 #' }
 #'
 #' @export

diff --git a/R/describe_aggregated_profiles.R b/R/describe_aggregated_profiles.R
@@ -8,16 +8,16 @@
 #' three most important variable values are displayed, while \code{display_numbers = FALSE} displays
 #' all the important variables, however without further details.
 #'
-#'
 #' @importFrom graphics plot
 #' @importFrom stats quantile
 #'
 #' @examples
 #' library("DALEX")
-#' library("randomForest")
+#' library("ingredients")
+#' library("ranger")
 #'
 #' \donttest{
-#' model_titanic_rf <- randomForest(survived ~.,  data = titanic_imputed)
+#' model_titanic_rf <- ranger(survived ~., data = titanic_imputed, probability = TRUE)
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],

diff --git a/R/describe_ceteris_paribus.R b/R/describe_ceteris_paribus.R
@@ -25,10 +25,11 @@
 #'
 #' @examples
 #' library("DALEX")
-#' library("randomForest")
+#' library("ingredients")
+#' library("ranger")
 #'
 #' \donttest{
-#' model_titanic_rf <- randomForest(survived ~.,  data = titanic_imputed)
+#' model_titanic_rf <- ranger(survived ~.,  data = titanic_imputed, probability = TRUE)
 #'
 #' explain_titanic_rf <- explain(model_titanic_rf,
 #'                               data = titanic_imputed[,-8],

diff --git a/R/describe_feature_importance.R b/R/describe_feature_importance.R
@@ -7,8 +7,11 @@
 #' @importFrom graphics plot
 #' @importFrom stats quantile
 #'
+#' @references Explanatory Model Analysis. Explore, Explain, and Examine Predictive Models. \url{https://pbiecek.github.io/ema/}
+#'
 #' @examples
 #' library("DALEX")
+#' library("ingredients")
 #'
 #' lm_model <- lm(m2.price~., data = apartments)
 #' explainer_lm <- explain(lm_model, data = apartments[,-1], y = apartments[,1])