# Plot and comparisons

In [None]:
library(tidyverse)
library(ggpubr)

## Functions

In [None]:
save_plot <- function(p, fn, w=7, h=6){
    for(ext in c(".pdf", ".png", ".svg")){
        ggsave(filename=paste0(fn,ext), plot=p, width=w, height=h)
    }
}


get_ml_summary <- function(fn){
    ml_df = data.table::fread(fn) %>% mutate_at("fold", as.character) %>%
        select(fold, n_features, n_redundant, starts_with("test_score_r2")) %>%
        pivot_longer(-fold) %>% group_by(name) %>%
        summarise(Mean=mean(value), Median=median(value), Std=sd(value), .groups = "keep")
    return(ml_df)
}


get_metrics <- function(filename, tissue){
    datalist = list()
    for(fn in Sys.glob(filename)){
        gene_id = str_extract(fn, "ENSG\\d+_\\d+")
        dat <- get_ml_summary(fn)
        dat["Geneid"] = gene_id
        datalist[[gene_id]] <- dat
    }
    ml_df <- bind_rows(datalist)
    ml_df["Tissue"] = tissue
    return(ml_df)
}

## Load metrics

### Random forest

In [None]:
rf = data.table::fread("../../rf/summary_10Folds_allTissues.tsv") %>%
    as.data.frame %>% mutate_if(is.character, as.factor) %>%
    mutate_at("fold", as.character) %>%
    select(tissue, feature, fold, n_features, starts_with("test_score_r2")) %>%
    pivot_longer(-c(tissue, feature, fold), names_to="metric", values_to="score") %>%
    group_by(tissue, feature, metric) %>%
    summarise(Mean=mean(score), Median=median(score), Std=sd(score), .groups = "keep") %>%
    filter(metric == "test_score_r2") %>% mutate("model"="Random Forest")
dim(rf)
rf %>% head(2)

### Elastic net

In [None]:
enet = data.table::fread("../../enet/summary_10Folds_allTissues.tsv") %>%
    as.data.frame %>% mutate_if(is.character, as.factor) %>%
    mutate_at("fold", as.character) %>%
    select(tissue, feature, fold, n_features, starts_with("test_score_r2")) %>%
    pivot_longer(-c(tissue, feature, fold), names_to="metric", values_to="score") %>%
    group_by(tissue, feature, metric) %>%
    summarise(Mean=mean(score), Median=median(score), Std=sd(score), .groups = "keep") %>%
    filter(metric == "test_score_r2") %>% mutate("model"="Elastic Net")
dim(enet)
enet %>% head(2)

## Annotate

In [None]:
dtu = data.table::fread(paste0("../../../../../differential_analysis/tissue_comparison/",
                               "ds_summary/_m/diffSplicing_ancestry_FDR05_4regions.tsv")) %>%
    select(gene, Tissue) %>% distinct %>% rename("gene_name"="gene")

degs = data.table::fread("../../../../_m/degs_annotation.txt") %>%
    select(V1, ensemblID, gene_name, Tissue) %>% distinct %>%
    rename("Feature"="V1") %>% inner_join(dtu, by=c("Tissue", "gene_name")) %>%
    rename("tissue"="Tissue") %>% mutate("DTU"="DTU")

In [None]:
df = bind_rows(rf, enet) %>% mutate(Feature=gsub("_", ".", feature)) %>%
    left_join(degs, by=c("tissue", "Feature")) %>% as.data.frame %>%
    mutate(DTU = replace_na(DTU, "DEG")) %>%
    mutate_if(is.character, as.factor)
dim(df)
df %>% head(2)

## Merge and plot

In [None]:
df %>% #filter(DTU == "DTU") %>%
    ggboxplot(x="model", y="Median", color="DTU", add="jitter", 
              facet.by="tissue", palette="npg", ylim=c(-0.75, 1), 
              ylab="Median R2", xlab="",
              panel.labs.font=list(face='bold'), ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")

In [None]:
bxp = df %>% ggboxplot(x="tissue", y="Median", color="DTU", add="jitter", 
                       facet.by="model", palette="npg", ylim=c(-0.75, 1), 
                       ylab="Median R2", xlab="", legend="None", 
                       panel.labs.font=list(face='bold', size = 14)) + 
    rotate_x_text(45) + font("xy.title", size=18, face="bold") + 
    font("xy.text", size=16) + font("legend.text", size=16)
save_plot(bxp, "summary_boxplots_r2_2methods", 9, 6)
bxp

In [None]:
df2 = bind_rows(rf, enet)
df2 %>% head(2)

In [None]:
bxp = df2 %>% ggboxplot(x="tissue", y="Median", fill="tissue", add="jitter", 
                       facet.by="model", palette="npg", ylim=c(-0.75, 1), 
                       ylab="Median R2", xlab="", legend="None", 
                       panel.labs.font=list(face='bold', size = 14)) + 
    rotate_x_text(45) + font("xy.title", size=18, face="bold") + 
    font("xy.text", size=16) + font("legend.text", size=16)
save_plot(bxp, "summary_boxplots_r2_2methods", 6, 5)
bxp

## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()