# Visualize metapath performance against permutation

In [1]:
library(dplyr, warn = FALSE)

In [2]:
metaedge_df = readr::read_tsv('data/metaedge-in-metapath.tsv') %>%
  dplyr::select(-position) %>%
  dplyr::distinct() %>%
  dplyr::mutate(length = as.character(length))

all_abbrevs = unique(metaedge_df$metaedge_abbrev)

fsp_df = readr::read_tsv('data/feature-performance/auroc.tsv') %>%
  dplyr::inner_join(metaedge_df)

metaedge_df = fsp_df %>%
  dplyr::group_by(metaedge_abbrev, metaedge) %>%
  dplyr::summarize(
    max_dwpc_auroc = max(dwpc_auroc),
    max_delta_auroc = max(delta_auroc),
    max_rdwpc_auroc = max(rdwpc_auroc)
  ) %>%
  dplyr::ungroup() %>%
  dplyr::arrange(max_delta_auroc, desc(metaedge_abbrev))

Joining by: "metapath"


In [3]:
#abbrev_to_metaedge = setNames(metaedge_df$metaedge_abbrev, metaedge_df$metaedge)

In [4]:
length_to_color = c('1' = '#b2df8a', '2' = '#1f78b4', '3' = '#33a02c', '4' = '#a6cee3')

In [7]:
w = 7; h = 5.5
options(repr.plot.width=w, repr.plot.height=h)
set.seed(0)
gg_fsp = fsp_df %>%
  dplyr::mutate(title = 'Feature Performance by Metaedge') %>%
  ggplot2::ggplot(ggplot2::aes(x = delta_auroc, y = metaedge)) +
  ggplot2::geom_vline(xintercept = 0, linetype='dashed', color='gray') +
  ggplot2::geom_jitter(ggplot2::aes(fill = length), height=0.4, alpha=1, shape = 21, size=2, color= '#6C6C6C', stroke=0.7) +
  ggplot2::facet_grid(. ~ title) +
  ggplot2::scale_fill_manual(name = 'Length', values=length_to_color) +
  ggplot2::scale_x_continuous(labels=scales::percent) +
  ggplot2::scale_y_discrete(limits = metaedge_df$metaedge) +
  ggplot2::xlab('Δ AUROC of DWPCs Due to Permutation') + ggplot2::ylab(NULL) +
  hetior::theme_dhimmel() +
  ggplot2::theme(
    legend.justification=c(1, 0), legend.position=c(1, 0))

ggplot2::ggsave('./data/feature-performance/delta-auroc.png', dpi=300, width = w, height = h)

In [6]:
# Percent of DWPC features passing bonferroni, FDR, and unadjusted significance thresholds
mean(p.adjust(p = fsp_df$pval_delta_auroc, method = 'bonferroni') < 0.01)
mean(p.adjust(p = fsp_df$pval_delta_auroc, method = 'fdr') < 0.01)
mean(fsp_df$pval_delta_auroc < 0.01)