Skip to content

Commit

Permalink
Update partial cor and vif plots
Browse files Browse the repository at this point in the history
  • Loading branch information
KelvynBladen committed Oct 12, 2023
1 parent fff5a38 commit a523135
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 26 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ importFrom(gbm,relative.influence)
importFrom(ggeasy,easy_center_title)
importFrom(ggeasy,easy_plot_legend_size)
importFrom(ggplot2,aes)
importFrom(ggplot2,element_text)
importFrom(ggplot2,facet_grid)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_line)
Expand Down
24 changes: 13 additions & 11 deletions R/mtry_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,6 @@ mtry_compare <- function(formula, data = NULL, scale = FALSE, sqrt = TRUE,
}
}

ifelse(!missing(num_var),
num_var <- ifelse(dplyr::between(num_var, 0, num_preds),
ifelse(num_var < 1,
round(num_var * num_preds),
round(num_var)
),
num_preds
),
num_var <- num_preds
)

for (i in mvec) {
x <- paste0("srf", i)
eval(call("<-", as.name(x), randomForest(
Expand Down Expand Up @@ -139,7 +128,20 @@ mtry_compare <- function(formula, data = NULL, scale = FALSE, sqrt = TRUE,
yl <- "Misclassification Rate"
)

num_var <- ifelse(!missing(num_var), num_var, num_preds)

if (!missing(num_var)) {
num_var <- ifelse(num_var > 0,
ifelse(num_var <= num_preds,
ifelse(num_var < 1,
round(num_var * num_preds),
round(num_var)
),
num_preds
),
num_preds
)

d <- sd %>%
group_by(names) %>%
summarise(mean = mean(get(colnames(sd)[1]))) %>%
Expand Down
39 changes: 32 additions & 7 deletions R/partial_cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @importFrom randomForest randomForest
#' @importFrom stats lm model.frame cor model.matrix predict
#' @importFrom ggplot2 ggplot aes geom_point xlim ylim geom_line ggtitle
#' @importFrom ggeasy easy_center_title
#' @importFrom dplyr %>% arrange desc
#' @importFrom minerva mine
#' @description A list of data.frames and useful plots for user evaluations of
Expand All @@ -26,6 +27,7 @@
#' pcs$plot_y_part_cors
#' @export

# fix xlim for all MI plots!!!
# fix cor for factors, remove or adjust
# unsupervised forest MI

Expand All @@ -40,6 +42,8 @@ partial_cor <- function(formula, data = NULL, model = lm, num_var, ...) {
mfi <- model.frame(formula, data = data)
mf <- as.data.frame(cbind(mfi[1], mm))

res <- gsub("\\)", "", gsub(".*\\(", "", colnames(mf)[1]))

m <- ncol(mf) - 1
if (!missing(num_var)) {
num_var <- ifelse(num_var > m | num_var <= 0, m,
Expand Down Expand Up @@ -91,13 +95,19 @@ partial_cor <- function(formula, data = NULL, model = lm, num_var, ...) {
g <- cd %>% ggplot(aes(y = var, x = cor)) +
geom_point() +
xlim(-1, 1) +
ylab(NULL) +
xlab("Correlation") +
geom_vline(xintercept = 0, color = "blue") +
ggtitle("Correlations Between Predictor Variables and Response")
ggtitle(paste0("Correlation with ", res)) +
easy_center_title()

g1 <- cd %>% ggplot(aes(y = var, x = abs(cor))) +
geom_point() +
xlim(0, 1) +
ggtitle("Absolute Value of Correlations")
ylab(NULL) +
xlab("abs(Correlation)") +
ggtitle(paste0("Absolute Value of Correlation with ", res)) +
easy_center_title()

cd <- cd %>% arrange(desc(abs(cor)))

Expand Down Expand Up @@ -129,13 +139,19 @@ partial_cor <- function(formula, data = NULL, model = lm, num_var, ...) {
g2 <- cdf %>% ggplot(aes(y = var, x = part_cor)) +
geom_point() +
xlim(-1, 1) +
ylab(NULL) +
xlab("Partial Correlation") +
geom_vline(xintercept = 0, color = "blue") +
ggtitle("Partial Correlations")
ggtitle(paste0("Partial Correlation with ", res)) +
easy_center_title()

g3 <- cdf %>% ggplot(aes(y = var, x = abs(part_cor))) +
geom_point() +
xlim(0, 1) +
ggtitle("Absolute Value of Partial Correlations")
ylab(NULL) +
xlab("abs(Partial Correlation)") +
ggtitle(paste0("Absolute Value of Partial Correlation with ", res)) +
easy_center_title()

cdf <- cdf %>% arrange(desc(abs(part_cor)))

Expand All @@ -159,7 +175,10 @@ partial_cor <- function(formula, data = NULL, model = lm, num_var, ...) {
g4 <- mdfm %>% ggplot(aes(y = var, x = Mutual_Info)) +
geom_point() +
xlim(0, 1) +
ggtitle("Mutual Information Between Predictor Variables and Response")
ylab(NULL) +
xlab("Mutual Information") +
ggtitle(paste0("Mutual Information with ", res)) +
easy_center_title()

mdf <- mdf[do.call(base::order, as.list(mdf[3])), ]
mdf$var <- factor(mdf$var, levels = mdf$var)
Expand All @@ -181,7 +200,10 @@ partial_cor <- function(formula, data = NULL, model = lm, num_var, ...) {
ifelse(any(mdf$urfAccuracy < 0), min(mdf$urfAccuracy), 0),
max(mdf$urfAccuracy)
) +
ggtitle("URF Accuracy Mutual Information")
ylab(NULL) +
xlab("Mutual Information") +
ggtitle(paste0("URF Accuracy Mutual Information with ", res)) +
easy_center_title()

mdf <- mdf[do.call(base::order, as.list(mdf[4])), ]
mdf$var <- factor(mdf$var, levels = mdf$var)
Expand All @@ -203,7 +225,10 @@ partial_cor <- function(formula, data = NULL, model = lm, num_var, ...) {
ifelse(any(mdf$urfPurity < 0), min(mdf$urfPurity), 0),
max(mdf$urfPurity)
) +
ggtitle("URF Purity Mutual Information")
ylab(NULL) +
xlab("Mutual Information") +
ggtitle(paste0("URF Purity Mutual Information with ", res)) +
easy_center_title()

mdf <- mdf %>% arrange(desc(Mutual_Info))

Expand Down
2 changes: 1 addition & 1 deletion R/pdp_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' @importFrom dplyr arrange desc filter select %>%
#' summarise group_by left_join case_when
#' @importFrom ggplot2 ggplot geom_point geom_line xlab theme theme_bw
#' scale_y_continuous aes facet_wrap guides geom_smooth
#' scale_y_continuous aes facet_wrap guides geom_smooth element_text
#' @importFrom gridExtra grid.arrange
#' @importFrom stats model.frame getCall mad sd
#' @importFrom pdp partial
Expand Down
33 changes: 27 additions & 6 deletions R/robust_vifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @importFrom stats lm model.frame
#' @importFrom ggplot2 ggplot geom_point xlim ylim aes
#' geom_line ggtitle geom_vline
#' @importFrom ggeasy easy_center_title
#' @importFrom dplyr %>% arrange desc
#' @importFrom car vif
#' @description A list of data.frames and useful plots for user evaluations of
Expand Down Expand Up @@ -45,6 +46,8 @@ robust_vifs <- function(formula, data, model = randomForest,
mf <- model.frame(formula, data = data)
m <- ncol(mf) - 1

res <- gsub("\\)", "", gsub(".*\\(", "", colnames(mf)[1]))

if (!missing(num_var)) {
num_var <- ifelse(num_var > m | num_var <= 0, m,
ifelse(num_var < 1, round(num_var * m), round(num_var))
Expand Down Expand Up @@ -104,20 +107,29 @@ robust_vifs <- function(formula, data, model = randomForest,
g <- vdfl %>% ggplot(aes(y = var, x = lm_vif)) +
geom_point() +
xlim(0, max(c(vdf$Log10_lm_vif, 10))) +
ggtitle("Linear VIFs") +
ylab(NULL) +
xlab("VIFs") +
ggtitle(paste0("Linear VIFs for ", res)) +
easy_center_title() +
geom_vline(xintercept = 10, color = "blue")
} else {
g <- vdfl %>% ggplot(aes(y = var, x = Log10_lm_vif)) +
geom_point() +
xlim(0, max(c(vdf$Log10_lm_vif, 1))) +
ggtitle("Log10 Linear VIFs") +
ylab(NULL) +
xlab("log10(VIFs)") +
ggtitle(paste0("Log10 Linear VIFs for ", res)) +
easy_center_title() +
geom_vline(xintercept = 1, color = "blue")
}

g1 <- vdfl %>% ggplot(aes(y = var, x = lm_r2)) +
geom_point() +
xlim(0, 1) +
ggtitle("Linear R2 for Modeling each Predictor on all Others") +
ylab(NULL) +
xlab("Linear R2") +
ggtitle("Linear R2: Variable ~ Other Predictors") +
easy_center_title() +
geom_vline(xintercept = 0.9, color = "blue")

vdf <- vdf[do.call(base::order, as.list(vdf[4])), ]
Expand All @@ -138,20 +150,29 @@ robust_vifs <- function(formula, data, model = randomForest,
g2 <- vdfm %>% ggplot(aes(y = var, x = model_vif)) +
geom_point() +
xlim(0, max(c(vdf$model_vif, 10))) +
ggtitle("Non-linear VIFs") +
ylab(NULL) +
xlab("VIFs") +
ggtitle(paste0("Non-Linear VIFs for ", res)) +
easy_center_title() +
geom_vline(xintercept = 10, color = "blue")
} else {
g2 <- vdfm %>% ggplot(aes(y = var, x = Log10_model_vif)) +
geom_point() +
xlim(0, max(c(vdf$Log10_model_vif, 1))) +
ggtitle("Log10 Non-Linear VIFs") +
ylab(NULL) +
xlab("log10(VIFs)") +
ggtitle(paste0("Log10 Non-Linear VIFs for ", res)) +
easy_center_title() +
geom_vline(xintercept = 1, color = "blue")
}

g3 <- vdfm %>% ggplot(aes(y = var, x = model_R2)) +
geom_point() +
xlim(0, 1) +
ggtitle("Non-linear R2 for Modeling each Predictor on all Others") +
ylab(NULL) +
xlab("Non-Linear R2") +
ggtitle("Non-Linear R2: Variable ~ Other Predictors") +
easy_center_title() +
geom_vline(xintercept = 0.9, color = "blue")

if (log10 != TRUE) {
Expand Down
2 changes: 1 addition & 1 deletion man/mtry_compare.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a523135

Please sign in to comment.