Skip to content

Commit

Permalink
DS-3136 Replace sum and variants with verbs equivalents (#5)
Browse files Browse the repository at this point in the history
* DS-3136 Replace sum with verbs::Sum

* DS-3136 Replace row/colSums with verb equivalents

* DS-3136 Fix typo in DESCRIPTION

* DS-3136 Update NAMESPACE

* DS-3136 Skip dim removal in SumRows/Cols calls [revdep skip]

* DS-3136 Empty commit to trigger build for speed test [revdep skip]
  • Loading branch information
jrwishart committed Feb 26, 2021
1 parent d73086e commit ead93b5
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 29 deletions.
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: flipTrees
Type: Package
Title: Tools for classification and regression trees
Version: 1.0.2
Version: 1.0.3
Author: Displayr <opensource@displayr.com>
Maintainer: Displayr <opensource@displayr.com>
Description: Wrappers for classification and regression trees using the
Expand All @@ -19,11 +19,13 @@ Imports: rpart,
hash,
colorspace,
rhtmlSankeyTree,
stats
stats,
verbs
Remotes: Displayr/flipData,
Displayr/flipFormat,
Displayr/flipRegression,
Displayr/flipU,
Displayr/rhtmlSankeyTree
RoxygenNote: 7.0.2
Displayr/rhtmlSankeyTree,
Displayr/verbs
RoxygenNote: 7.1.1
Encoding: UTF-8
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ importFrom(stats,quantile)
importFrom(stats,terms)
importFrom(stats,xtabs)
importFrom(utils,capture.output)
importFrom(verbs,Sum)
importFrom(verbs,SumColumns)
importFrom(verbs,SumRows)
3 changes: 2 additions & 1 deletion R/cart.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ getNodeHash <- function(xlevels)
result = list(features.hash, xlevels.hash)
}

#' @importFrom verbs Sum
getShortenedLevels <- function(lvls)
{
.appendNum <- function(text, text.hash, c) {
Expand All @@ -218,7 +219,7 @@ getShortenedLevels <- function(lvls)
node.texts <- rep("", length(lvls))
for (j in 1:length(lvls)) {
text <- lvls[j]
text.len <- sapply(gregexpr("[[:alnum:]]+", text), function(x) sum(x > 0)) # count number of words
text.len <- sapply(gregexpr("[[:alnum:]]+", text), function(x) Sum(x > 0, remove.missing = FALSE)) # count number of words
if (text.len == 0) {
node.text <- "X"
} else if (text.len == 1) {
Expand Down
37 changes: 22 additions & 15 deletions R/chaid.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ chaid_control <- function(alpha2 = 0.05,
#' @param height current height of the tree
#' @return an object of class partynode representing the root node of the tree
#' @importFrom partykit nodeids kidids_split
#' @importFrom verbs Sum
#' @noRd
step5 <- function(id = 1L,
response,
Expand All @@ -158,7 +159,7 @@ step5 <- function(id = 1L,
# data, the sum of the weights is the effective number of data points. Here,
# we don't split the data further if we have less (effective) data points
# than the threshold.
if (sum(weights) < ctrl$minsplit)
if (Sum(weights, remove.missing = FALSE) < ctrl$minsplit)
return(partynode(id = id))

# Don't go deeper than one level if this is meant to be a stump
Expand Down Expand Up @@ -259,6 +260,7 @@ step1 <- function(response, xvars, weights, indices = NULL, ctrl) {
#' @note this does not do the actual merging, we save that for when we've
#' decided what the best thing to merge is.
#' @importFrom stats aggregate
#' @importFrom verbs Sum
#' @noRd
step1internal <- function(response, x, weights, index = NULL, ctrl)
{
Expand Down Expand Up @@ -288,8 +290,8 @@ step1internal <- function(response, x, weights, index = NULL, ctrl)
break()

# is step 3 necessary?
runstep3 <- sum(mlev[1] == index) > 1 ||
sum(mlev[2] == index) > 1
runstep3 <- Sum(mlev[1] == index, remove.missing = FALSE) > 1 ||
Sum(mlev[2] == index, remove.missing = FALSE) > 1
runstep3 <- runstep3 && (alpha3 > 0)

# Merge levels by giving all grouped levels the same group number
Expand Down Expand Up @@ -333,6 +335,7 @@ step1internal <- function(response, x, weights, index = NULL, ctrl)
#' state of merging. \code{NULL} is no suitable merge could be found (all
#' levels are too different to each other).
#' @importFrom stats xtabs
#' @importFrom verbs Sum SumRows SumColumns
#' @noRd
step2 <- function(response,
x,
Expand Down Expand Up @@ -388,10 +391,10 @@ step2 <- function(response,
c(pos %% nrow(logpmaxs), as.integer(pos / nrow(logpmaxs)) + 1)

# sample size stopping criteria
nmin <- min(c(ceiling(ctrl$minprob * sum(weights)), ctrl$minbucket))
nmin <- min(c(ceiling(ctrl$minprob * Sum(weights, remove.missing = FALSE)), ctrl$minbucket))

if (exp(logpmax) > ctrl$alpha2 || any(rowSums(xytab) < nmin)) {
xytab[min(levindx), ] <- colSums(xytab[levindx, ])
if (exp(logpmax) > ctrl$alpha2 || any(SumRows(xytab, remove.columns = NULL, remove.missing = FALSE) < nmin)) {
xytab[min(levindx), ] <- SumColumns(xytab[levindx, ], remove.rows = NULL, remove.missing = FALSE)
mergedx[mergedx == rownames(xytab)[max(levindx)]] <-
rownames(xytab)[min(levindx)]
xytab <- xytab[-max(levindx), ]
Expand Down Expand Up @@ -424,10 +427,11 @@ step2 <- function(response,

#' Splits two merged groups of levels within a predictor apart
#' @return the new mapping of levels to groups after the split
#' @importFrom verbs Sum
#' @noRd
step3 <- function(x, y, weights, alpha3 = 0.049, index, kat) {
split_indx <- index
if (sum(index == kat) > 2) {
if (Sum(index == kat, remove.missing = FALSE) > 2) {
sp <- step3intern(x, y, weights, alpha3, index, kat)
# compute minimum p-value and split
if (!is.null(sp))
Expand Down Expand Up @@ -530,6 +534,7 @@ step4 <- function(response,
}

#' Calculates adjusted p-value for a particular way of splitting the tree
#' @importFrom verbs Sum
#' @noRd
step4internal <- function(response, x, weights, index, ctrl) {
if (nlevels(response[, drop = TRUE]) < 2)
Expand All @@ -541,7 +546,7 @@ step4internal <- function(response, x, weights, index, ctrl) {
mx <- state$mergedx

nmin <-
min(c(ceiling(ctrl$minprob * sum(weights)), ctrl$minbucket))
min(c(ceiling(ctrl$minprob * Sum(weights, remove.missing = FALSE)), ctrl$minbucket))
if (any(table(mx[weights > 0]) < nmin))
return(0)

Expand All @@ -568,8 +573,8 @@ step4internal <- function(response, x, weights, index, ctrl) {
ret <- logp + lchoose(c_levels - 1, r_levels - 1)
} else {
i <- 0:(r_levels - 1) # formula (3.2)
fact <- sum((-1) ^ i * ((r_levels - i) ^ c_levels) /
(factorial(i) * factorial(r_levels - i)))
fact <- Sum((-1) ^ i * ((r_levels - i) ^ c_levels) /
(factorial(i) * factorial(r_levels - i)), remove.missing = FALSE)
ret <- logp + log(fact)
}
attr(logp, "Chisq") <- attr(logp, "Chisq")
Expand All @@ -579,13 +584,14 @@ step4internal <- function(response, x, weights, index, ctrl) {
#' Performs a (log) chi-squared test on a given crosstab
#' @return the log of the p-value and the chi-squared value for the crosstab
#' @importFrom stats chisq.test pchisq
#' @importFrom verbs Sum SumRows SumColumns
#' @noRd
logchisq.test <- function(x) {
cs <- colSums(x) > 0
rs <- rowSums(x) > 0
if (sum(cs) < 2 || sum(rs) < 2)
cs <- SumColumns(x, remove.rows = NULL, remove.missing = FALSE) > 0
rs <- SumRows(x, remove.columns = NULL,remove.missing = FALSE) > 0
if (Sum(cs, remove.missing = FALSE) < 2 || Sum(rs, remove.missing = FALSE) < 2)
return(0)
if (min(x) < 10 && sum(x) < 100) {
if (min(x) < 10 && Sum(x, remove.missing = FALSE) < 100) {
ctest <- chisq.test(
x[rs, cs],
correct = FALSE,
Expand Down Expand Up @@ -628,9 +634,10 @@ mergelevels <- function(index, merge) {
}

#' split a merged group into two groups
#' @importFrom verbs Sum
#' @noRd
splitlevels <- function(index, level, split) {
stopifnot(sum(index == level) > length(split))
stopifnot(Sum(index == level, remove.missing = FALSE) > length(split))

# levels must be relabeled
gr <- index > level
Expand Down
3 changes: 2 additions & 1 deletion R/partytree.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#' @importFrom partykit party
#' @importFrom flipU OutcomeName
#' @importFrom stats formula
#' @importFrom verbs Sum
treeFrameToParty <- function(frame, xlevels, model, terms, labels)
{
df <- data.frame()

node.hash <- getNodeHash(xlevels)

not.leaf <- frame$var != "<leaf>"
n.splits <- sum(not.leaf)
n.splits <- Sum(not.leaf, remove.missing = FALSE)
non.leaf.indices <- (1:nrow(frame))[not.leaf]
var.names <- as.character(frame$var[non.leaf.indices])
numeric.breaks <- rep(NA, n.splits)
Expand Down
13 changes: 7 additions & 6 deletions R/sankey.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#' @importFrom flipFormat FormatAsReal FormatAsPercent
#' @importFrom colorspace diverge_hsv
#' @importFrom grDevices rgb rgb2hsv col2rgb hsv
#' @importFrom verbs Sum
#'
treeFrameToList <- function(frame, xlevels, model, assigned, labels, max.tooltip.length = 150,
numeric.distribution = TRUE, custom.color = "default", num.color.div = 101,
Expand Down Expand Up @@ -122,8 +123,8 @@ treeFrameToList <- function(frame, xlevels, model, assigned, labels, max.tooltip
#node.descriptions <- paste0("<br>",node.descriptions)

node.color <- rep("0", nrow(frame))
l.na = sum(is.na(custom.color))
l.col = sum(.areColors(custom.color))
l.na = Sum(is.na(custom.color), remove.missing = FALSE)
l.col = Sum(.areColors(custom.color), remove.missing = FALSE)
if (custom.color == "default" || (l.na == 0 && l.col == length(custom.color)))
{
if (custom.color == "default" || l.col < 2) {
Expand Down Expand Up @@ -189,13 +190,13 @@ treeFrameToList <- function(frame, xlevels, model, assigned, labels, max.tooltip
bins.breaks[1] = xmin - abs(xmin)/100
bins.breaks[length(bins.breaks)] = xmax + xmax/100
overall.distribution = hist(outcome.variable, breaks = bins.breaks, plot = FALSE)$counts
overall.distribution = overall.distribution/sum(overall.distribution)
overall.distribution = overall.distribution/Sum(overall.distribution, remove.missing = FALSE)

for (i in 1:nrow(frame)) {
this.node.values = nodes.distribution.temp[i,!is.na(nodes.distribution.temp[i,])]
if (length(this.node.values) > 0){
nodes.hist = hist(this.node.values, breaks = bins.breaks, plot = FALSE)$counts
nodes.hist = nodes.hist/sum(nodes.hist)
nodes.hist = nodes.hist/Sum(nodes.hist, remove.missing = FALSE)
} else {
nodes.hist = 0
}
Expand Down Expand Up @@ -229,8 +230,8 @@ treeFrameToList <- function(frame, xlevels, model, assigned, labels, max.tooltip
}

node.color <- rep("0", nrow(frame))
l.na = sum(is.na(custom.color))
l.col = sum(.areColors(custom.color))
l.na = Sum(is.na(custom.color), remove.missing = FALSE)
l.col = Sum(.areColors(custom.color), remove.missing = FALSE)
if (custom.color == "default" || (l.na == 0 && l.col == length(custom.color)))
{
if (custom.color == "default" || l.col < 2) {
Expand Down
6 changes: 4 additions & 2 deletions man/spam7.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ead93b5

Please sign in to comment.