diff --git a/.travis.yml b/.travis.yml index 9cc6c05d..51732c25 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,19 @@ sudo: false cache: packages dist: trusty +r: + - release + - devel + +os: + - linux + - osx + +matrix: + exclude: + - os: osx + r: devel + env: global: - R_CHECK_ARGS="--no-build-vignettes --no-manual --timings --run-donttest" @@ -10,14 +23,19 @@ env: notifications: email: false +#before_install: +# - sudo apt-get install --yes udunits-bin libproj-dev libgeos-dev libgdal-dev libgdal1-dev libudunits2-dev before_install: - - sudo apt-get install --yes udunits-bin libproj-dev libgeos-dev libgdal-dev libgdal1-dev libudunits2-dev - + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install llvm && + export PATH="/usr/local/opt/llvm/bin:$PATH" && + export LDFLAGS="-L/usr/local/opt/llvm/lib" && + export CFLAGS="-I/usr/local/opt/llvm/include"; fi + r_packages: - ggplot2 - covr -r_github_packages: modelOriented/DALEX +#r_github_packages: modelOriented/DALEX after_success: - Rscript -e 'library(covr); codecov()' diff --git a/DESCRIPTION b/DESCRIPTION index 0bbdbd24..b8868fbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,12 @@ Package: ingredients Title: Effects and Importances of Model Ingredients -Version: 0.4.1 +Version: 0.4.2 Authors@R: c(person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8423-1823")), person("Hubert", "Baniecki", role = "aut", comment = c(ORCID = "0000-0001-6661-5364")), - person("Adam", "Izdebski", role = "aut"), - person("Katarzyna", "Pekala", role = "aut")) + person("Adam", "Izdebski", role = "aut")) Description: Collection of tools for assessment of feature importance and feature effects. Key functions are: feature_importance() for assessment of global level feature importance, diff --git a/NEWS.md b/NEWS.md index a2212179..e2053e44 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +ingredients 0.4.2 +--------------------------------------------------------------- +* change `variable_type` and `variables` arguments usage in the +`aggregate_profiles`, `plot.ceteris_paribus` and `plotD3.ceteris_paribus` +* remove `variable_type` argument from `plotD3.aggregated_profiles` +(now the same as in `plot.aggregated_profiles`) +* Kasia Pekala is removed from the contributors as `aspect_importance` is moved to `DALEXtra` +([See v0.3.12 changelog](https://modeloriented.github.io/ingredients/news/index.html#ingredients-0-3-12)) +* added Travis-CI for OSX + ingredients 0.4.1 --------------------------------------------------------------- * fixed rounding problem in the describe function ([#76](https://github.com/ModelOriented/ingredients/issues/76)) @@ -31,7 +41,7 @@ cluster_profiles(), plot() and others, as requested in #15 ingredients 0.3.8 ---------------------------------------------------------------- -* Natural language description generated with `describe()` function for `ceteris_paribus()`, `feature_importance()` and `aggregated_profiles()` explanations. +* Natural language description generated with `describe()` function for `ceteris_paribus()`, `feature_importance()` and `aggregate_profiles()` explanations. ingredients 0.3.7 diff --git a/R/aggregate_profiles.R b/R/aggregate_profiles.R index 16f2adf1..32c1d8bd 100644 --- a/R/aggregate_profiles.R +++ b/R/aggregate_profiles.R @@ -104,16 +104,38 @@ aggregate_profiles <- function(x, ..., if (length(all_variables_intersect) == 0) stop(paste0("parameter variables do not overlap with ", paste(all_variables, collapse = ", "))) all_variables <- all_variables_intersect } + # only numerical or only factors? is_numeric <- sapply(all_profiles[, all_variables, drop = FALSE], is.numeric) + if (variable_type == "numerical") { vnames <- names(which(is_numeric)) - if (length(vnames) == 0) stop("There are no numerical variables") all_profiles$`_x_` <- 0 + + # there are no numerical variables + if (length(vnames) == 0) { + # change to categorical + variable_type <- "categorical" + all_profiles$`_x_` <- "" + # send message + message("'variable_type' changed to 'categorical' due to lack of numerical variables.") + # take all + vnames <- all_variables + } else if (!is.null(variables) && length(vnames) != length(variables)) { + message("Non-numerical variables (from the 'variables' argument) are rejected.") + } } else { vnames <- names(which(!is_numeric)) - if (length(vnames) == 0) stop("There are no non-numerical variables") all_profiles$`_x_` <- "" + + # there are variables selected + if (!is.null(variables)) { + # take all + vnames <- all_variables + } else if (length(vnames) == 0) { + # there were no variables selected and there are no categorical variables + stop("There are no non-numerical variables.") + } } # select only suitable variables @@ -132,7 +154,7 @@ aggregate_profiles <- function(x, ..., if (variable_type == "categorical") { all_profiles$`_x_` <- as.character(apply(all_profiles, 1, function(x) x[x["_vname_"]])) } - + if (!is.null(groups) && ! groups %in% colnames(all_profiles)) { stop("groups parameter is not a name of any column") } diff --git a/R/plotD3_aggregated_profiles.R b/R/plotD3_aggregated_profiles.R index 172df334..6ee63e9b 100644 --- a/R/plotD3_aggregated_profiles.R +++ b/R/plotD3_aggregated_profiles.R @@ -12,8 +12,6 @@ #' @param color a character. Set line/bar color #' @param size a numeric. Set width of lines #' @param alpha a numeric between \code{0} and \code{1}. Opacity of lines -#' @param variable_type a character. If "numerical" then only numerical variables will be plotted. -#' If "categorical" then only categorical variables will be plotted. #' @param facet_ncol number of columns for the \code{\link[ggplot2]{facet_wrap}} #' @param scale_plot a logical. If \code{TRUE}, the height of plot scales with window size. By default it's \code{FALSE} #' @param variables if not \code{NULL} then only \code{variables} will be presented @@ -49,25 +47,22 @@ #' pdp_rf_a <- aggregate_profiles(cp_rf, type = "accumulated", variable_type = "numerical") #' pdp_rf_a$`_label_` <- "RF_accumulated" #' -#' plotD3(pdp_rf_p, pdp_rf_c, pdp_rf_a, variable_type = "numerical", scale_plot = TRUE) +#' plotD3(pdp_rf_p, pdp_rf_c, pdp_rf_a, scale_plot = TRUE) #' #' pdp <- aggregate_profiles(cp_rf, type = "partial", variable_type = "categorical") #' pdp$`_label_` <- "RF_partial" #' -#' plotD3(pdp, variables = c("gender","class"), variable_type = "categorical", label_margin = 70) +#' plotD3(pdp, variables = c("gender","class"), label_margin = 70) #' #' @export #' @rdname plotD3_aggregated_profiles plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, color = "#46bac2", - variable_type = "numerical", facet_ncol = 2, scale_plot = FALSE, variables = NULL, chart_title = "Aggregated Profiles", label_margin = 60) { - check_variable_type(variable_type) - # if there is more explainers, they should be merged into a single data frame dfl <- c(list(x), list(...)) aggregated_profiles <- do.call(rbind, dfl) @@ -77,35 +72,13 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, if (!is.null(variables)) { all_variables <- intersect(all_variables, variables) if (length(all_variables) == 0) stop(paste0("variables do not overlap with ", paste(all_variables, collapse = ", "))) - } - hl <- split(aggregated_profiles, f = as.character(aggregated_profiles$`_vname_`), drop = FALSE)[all_variables] - - # only numerical or only factor? - is_numeric <- unlist(lapply(hl, function(x){ - is.numeric(x$`_x_`) - })) - - if (variable_type == "numerical") { - vnames <- names(which(is_numeric)) - - if (length(vnames) == 0) { - # but `variables` are selected, then change to factor - if (length(variables) > 0) { - variable_type <- "categorical" - vnames <- variables - } else { - stop("There are no numerical variables") - } - } - } else { - vnames <- names(which(!is_numeric)) - # there are no numerical features - if (length(vnames) == 0) stop("There are no non-numerical variables") + aggregated_profiles <- aggregated_profiles[aggregated_profiles$`_vname_` %in% all_variables, ] } + is_x_numeric <- is.numeric(aggregated_profiles$`_x_`) + # prepare profiles data - aggregated_profiles <- aggregated_profiles[aggregated_profiles$`_vname_` %in% vnames, ] aggregated_profiles$`_vname_` <- droplevels(aggregated_profiles$`_vname_`) rownames(aggregated_profiles) <- NULL @@ -118,7 +91,7 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, min_max_list <- ymean <- label_names <- NULL # line plot or bar plot? - if (variable_type == "numerical") { + if (is_x_numeric) { aggregated_profiles_list <- lapply(aggregated_profiles_list, function(x){ ret <- x[, c('_x_', "_yhat_", "_vname_", "_label_")] colnames(ret) <- c("xhat", "yhat", "vname", "label") @@ -150,12 +123,12 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, ymean <- round(attr(x, "mean_prediction"),3) } - options <- list(variableNames = as.list(vnames), - n = length(vnames), c = length(list(...)) + 1, + options <- list(variableNames = as.list(all_variables), + n = length(all_variables), c = length(list(...)) + 1, yMax = ymax + ymargin, yMin = ymin - ymargin, yMean = ymean, labelNames = label_names, size = size, alpha = alpha, color = color, - onlyNumerical = variable_type == "numerical", + onlyNumerical = is_x_numeric, facetNcol = facet_ncol, scalePlot = scale_plot, chartTitle = chart_title, labelMargin = label_margin) diff --git a/R/plotD3_ceteris_paribus.R b/R/plotD3_ceteris_paribus.R index b778c41f..06bf1217 100644 --- a/R/plotD3_ceteris_paribus.R +++ b/R/plotD3_ceteris_paribus.R @@ -85,20 +85,32 @@ plotD3.ceteris_paribus_explainer <- function(x, ..., size = 2, alpha = 1, if (variable_type == "numerical") { vnames <- names(which(is_numeric)) + all_profiles$`_x_` <- 0 + # there are no numerical variables if (length(vnames) == 0) { - # but `variables` are selected, then change to factor - if (length(variables) > 0) { - variable_type <- "categorical" - vnames <- variables - } else { - stop("There are no numerical variables") - } + # change to categorical + variable_type <- "categorical" + all_profiles$`_x_` <- "" + # send message + message("'variable_type' changed to 'categorical' due to lack of numerical variables.") + # take all + vnames <- all_variables + } else if (!is.null(variables) && length(vnames) != length(variables)) { + message("Non-numerical variables (from the 'variables' argument) are rejected.") } } else { vnames <- names(which(!is_numeric)) - # there are no numerical features - if (length(vnames) == 0) stop("There are no non-numerical variables") + all_profiles$`_x_` <- "" + + # there are variables selected + if (!is.null(variables)) { + # take all + vnames <- all_variables + } else if (length(vnames) == 0) { + # there were no variables selected and there are no categorical variables + stop("There are no non-numerical variables.") + } } # prepare clean observations data for tooltips diff --git a/R/plot_ceteris_paribus.R b/R/plot_ceteris_paribus.R index eeda8efb..b585b147 100644 --- a/R/plot_ceteris_paribus.R +++ b/R/plot_ceteris_paribus.R @@ -102,29 +102,38 @@ plot.ceteris_paribus_explainer <- function(x, ..., } # is color a variable or literal? is_color_a_variable <- color %in% c(all_variables, "_label_", "_vname_", "_ids_") + # only numerical or only factors? is_numeric <- sapply(all_profiles[, all_variables, drop = FALSE], is.numeric) + if (variable_type == "numerical") { vnames <- names(which(is_numeric)) all_profiles$`_x_` <- 0 + # there are no numerical variables if (length(vnames) == 0) { - # but `variables` are selected, then change to factor - if (length(variables) > 0) { - variable_type <- "categorical" - vnames <- variables - all_profiles$`_x_` <- "" - } else { - stop("There are no numerical variables") - } + # change to categorical + variable_type <- "categorical" + all_profiles$`_x_` <- "" + # send message + message("'variable_type' changed to 'categorical' due to lack of numerical variables.") + # take all + vnames <- all_variables + } else if (!is.null(variables) && length(vnames) != length(variables)) { + message("Non-numerical variables (from the 'variables' argument) are rejected.") } - } else { vnames <- names(which(!is_numeric)) - # there are no numerical features - if (length(vnames) == 0) stop("There are no non-numerical variables") - all_profiles$`_x_` <- "" + + # there are variables selected + if (!is.null(variables)) { + # take all + vnames <- all_variables + } else if (length(vnames) == 0) { + # there were no variables selected and there are no categorical variables + stop("There are no non-numerical variables.") + } } # how to plot profiles diff --git a/docs/404.html b/docs/404.html index 71ab905e..eec5693e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@
part of the DrWhy.AI developed by the MI^2 DataLab - 0.4 + 0.4.2
@@ -146,7 +146,7 @@

Page not found (404)