Skip to content

Commit

Permalink
candidate fix for #79
Browse files Browse the repository at this point in the history
  • Loading branch information
pbiecek committed Dec 10, 2019
1 parent 88c89cc commit d112f69
Show file tree
Hide file tree
Showing 29 changed files with 142 additions and 312 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ importFrom(scales,trans_new)
importFrom(stats,aggregate)
importFrom(stats,as.dist)
importFrom(stats,cutree)
importFrom(stats,dnorm)
importFrom(stats,hclust)
importFrom(stats,lm)
importFrom(stats,model.frame)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
ingredients 0.4.2
---------------------------------------------------------------
* `aggregate_profiles` use now gaussian kernel smoothing. Use the `span` argument for fine control over this parameter ([#79](https://github.com/ModelOriented/ingredients/issues/79))
* change `variable_type` and `variables` arguments usage in the
`aggregate_profiles`, `plot.ceteris_paribus` and `plotD3.ceteris_paribus`
* remove `variable_type` argument from `plotD3.aggregated_profiles`
(now the same as in `plot.aggregated_profiles`)
* Kasia Pekala is removed from the contributors as `aspect_importance` is moved to `DALEXtra`
* Kasia Pekala is moved as contributor to the `DALEXtra` as `aspect_importance` is moved to `DALEXtra` as well
([See v0.3.12 changelog](https://modeloriented.github.io/ingredients/news/index.html#ingredients-0-3-12))
* added Travis-CI for OSX

Expand Down
2 changes: 1 addition & 1 deletion R/accumulated_dependency.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ accumulated_dependency.default <- function(x,
variable_splits = variable_splits,
label = label, ...)

aggregate_profiles(cp, variables = variables, type = "accumulated", variable_type = variable_type)
aggregate_profiles(cp, variables = variables, type = "accumulated", variable_type = variable_type, ...)
}


Expand Down
40 changes: 26 additions & 14 deletions R/aggregate_profiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
#' @param type either \code{partial/conditional/accumulated} for partial dependence, conditional profiles of accumulated local effects
#' @param groups a variable name that will be used for grouping.
#' By default \code{NULL} which means that no groups shall be calculated
#' @param span smoothing coeffcient, by default 0.25.It's the sd for gaussian kernel
#' @param variable_type a character. If \code{numerical} then only numerical variables will be calculated.
#' If \code{categorical} then only categorical variables will be calculated.
#'
#' @references Predictive Models: Visual Exploration, Explanation and Debugging \url{https://pbiecek.github.io/PM_VEE}
#'
#' @importFrom stats na.omit quantile weighted.mean
#' @importFrom stats na.omit quantile weighted.mean dnorm
#'
#' @return an object of the class \code{aggregated_profiles_explainer}
#'
Expand Down Expand Up @@ -85,7 +86,8 @@ aggregate_profiles <- function(x, ...,
variable_type = "numerical",
groups = NULL,
type = "partial",
variables = NULL) {
variables = NULL,
span = 0.25) {

check_variable_type(variable_type)
check_type(type)
Expand Down Expand Up @@ -167,12 +169,12 @@ aggregate_profiles <- function(x, ...,
"partial_dependency_explainer", "data.frame")
}
if (type == "conditional") {
aggregated_profiles <- aggregated_profiles_conditional(all_profiles, groups)
aggregated_profiles <- aggregated_profiles_conditional(all_profiles, groups, span = span)
class(aggregated_profiles) <- c("aggregated_profiles_explainer",
"conditional_dependency_explainer", "data.frame")
}
if (type == "accumulated") {
aggregated_profiles <- aggregated_profiles_accumulated(all_profiles, groups)
aggregated_profiles <- aggregated_profiles_accumulated(all_profiles, groups, span = span)
class(aggregated_profiles) <- c("aggregated_profiles_explainer",
"accumulated_dependency_explainer", "data.frame")
}
Expand All @@ -185,7 +187,7 @@ aggregate_profiles <- function(x, ...,
}


aggregated_profiles_accumulated <- function(all_profiles, groups = NULL) {
aggregated_profiles_accumulated <- function(all_profiles, groups = NULL, span = 0.25) {
observations <- attr(all_profiles, "observations")
# just initialisation
if (is.numeric(all_profiles$`_x_`)) {
Expand Down Expand Up @@ -217,10 +219,15 @@ aggregated_profiles_accumulated <- function(all_profiles, groups = NULL) {

if (is.numeric(split_profile$`_x_`)) {
# for continuous variables we will calculate weighted average
# where weights depends on square distance between points
diffs <- (split_profile$`_orginal_` - split_profile$`_x_`)^2
diffsd <- sqrt(mean(diffs^2))
split_profile$`_w_` <- diffs/ifelse(diffsd > 0, diffsd, 1)
# where weights come from gaussian kernel and distance between points

# scaling factor, range if the range i > 0
range_x <- diff(range(split_profile$`_x_`))
if (range_x == 0) range_x <- 1

# scalled differences
diffs <- (split_profile$`_orginal_` - split_profile$`_x_`) /range_x
split_profile$`_w_` <- dnorm(diffs, sd = span)
} else {
# for categorical variables we will calculate weighted average
# but weights are 0-1, 1 if it's the same level and 0 otherwise
Expand Down Expand Up @@ -291,7 +298,7 @@ aggregated_profiles_partial <- function(all_profiles, groups = NULL) {
aggregated_profiles
}

aggregated_profiles_conditional <- function(all_profiles, groups = NULL) {
aggregated_profiles_conditional <- function(all_profiles, groups = NULL, span = 0.25) {

observations <- attr(all_profiles, "observations")
# just initialisation
Expand Down Expand Up @@ -324,10 +331,15 @@ aggregated_profiles_conditional <- function(all_profiles, groups = NULL) {

if (is.numeric(split_profile$`_x_`)) {
# for continuous variables we will calculate weighted average
# where weights depends on square distance between points
diffs <- (split_profile$`_orginal_` - split_profile$`_x_`)^2
diffsd <- sqrt(mean(diffs^2))
split_profile$`_w_` <- diffs/ifelse(diffsd > 0, diffsd, 1)
# where weights come from gaussian kernel and distance between points

# scaling factor, range if the range i > 0
range_x <- diff(range(split_profile$`_x_`))
if (range_x == 0) range_x <- 1

# scalled differences
diffs <- (split_profile$`_orginal_` - split_profile$`_x_`) /range_x
split_profile$`_w_` <- dnorm(diffs, sd = span)
} else {
# for categorical variables we will calculate weighted average
# but weights are 0-1, 1 if it's the same level and 0 otherwise
Expand Down
2 changes: 1 addition & 1 deletion R/conditional_dependency.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ conditional_dependency.default <- function(x,
variable_splits = variable_splits,
label = label, ...)

conditional_dependency.ceteris_paribus_explainer(cp, variables = variables, variable_type = variable_type)
conditional_dependency.ceteris_paribus_explainer(cp, variables = variables, variable_type = variable_type, ...)
}


Expand Down
35 changes: 11 additions & 24 deletions man/accumulated_dependency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 4 additions & 8 deletions man/aggregate_profiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 4 additions & 14 deletions man/calculate_variable_profile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/calculate_variable_split.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 7 additions & 22 deletions man/ceteris_paribus.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/ceteris_paribus_2d.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 3 additions & 9 deletions man/cluster_profiles.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 11 additions & 24 deletions man/conditional_dependency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 9 additions & 20 deletions man/describe.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d112f69

Please sign in to comment.