# Plot and comparisons

In [None]:
library(tidyverse)
library(ggpubr)

## Functions

In [None]:
save_plot <- function(p, fn, w=7, h=6){
    for(ext in c(".pdf", ".png", ".svg")){
        ggsave(filename=paste0(fn,ext), plot=p, width=w, height=h)
    }
}


get_metrics <- function(fn, model, label){
    dt = data.table::fread(fn) %>% as.data.frame %>% mutate_if(is.character, as.factor) %>%
        mutate_at("fold", as.character) %>% 
        select(tissue, feature, fold, n_features, starts_with("test_score_r2")) %>%
        pivot_longer(-c(tissue, feature, fold), names_to="metric", values_to="score") %>%
        group_by(tissue, feature, metric) %>%
        summarise(Mean=mean(score), Median=median(score), Std=sd(score), .groups = "keep") %>%
        filter(metric == "test_score_r2") %>% mutate("model"=model) %>% mutate(Type = label)
    return(dt)
}

## Genetic variation prediction for expression of ancestry DE genes and random genes

### Load data

In [None]:
top100 = data.table::fread("../../../_m/degs_annotation.txt") %>% 
    group_by(Tissue) %>% mutate(rank = row_number(adj.P.Val)) %>% 
    filter(rank <= 100) %>% select(V1, ensemblID, gene_name, Tissue) %>% 
    distinct %>% rename("Feature"="V1") %>% rename("tissue"="Tissue")

#### Annotate and merge data

In [None]:
dtu = data.table::fread(paste0("../../../../differential_analysis/tissue_comparison/",
                               "ds_summary/_m/diffSplicing_ancestry_FDR05_4regions.tsv")) %>%
    select(gene, Tissue) %>% distinct %>% rename("gene_name"="gene")

degs = data.table::fread("../../../_m/degs_annotation.txt") %>%
    select(V1, ensemblID, gene_name, Tissue) %>% distinct %>%
    rename("Feature"="V1") %>% inner_join(dtu, by=c("Tissue", "gene_name")) %>%
    rename("tissue"="Tissue") %>% mutate("DTU"="DTU")

random = data.table::fread("../../../_m/randomGenes_annotation.txt") %>%
    select(V1, ensemblID, gene_name, Tissue) %>% distinct %>%
    rename("Feature"="V1") %>% inner_join(dtu, by=c("Tissue", "gene_name")) %>%
    rename("tissue"="Tissue") %>% mutate("DTU"="DTU")

#### DE genes

In [None]:
rf = get_metrics("../../de_genes/rf/summary_10Folds_allTissues.tsv", "Random Forest", "DE")
enet = get_metrics("../../de_genes/enet/summary_10Folds_allTissues.tsv", "Elastic Net", "DE")
de = bind_rows(rf, enet) %>% mutate(Feature=gsub("_", ".", feature)) %>%
    left_join(degs, by=c("tissue", "Feature")) %>% as.data.frame %>%
    mutate(New_Type = paste(Type, replace_na(DTU, ""))) %>%
    mutate_if(is.character, as.factor)
de %>% head
de$Type %>% unique

#### Top 100 DE genes

In [None]:
de100 = bind_rows(rf, enet) %>% mutate(Feature=gsub("_", ".", feature)) %>% 
    inner_join(top100, by=c("tissue", "Feature"))
de100 %>% dim

#### Random genes

In [None]:
rf = get_metrics("../../random_genes/rf/summary_10Folds_allTissues.tsv", 
                     "Random Forest", "Random")
enet = get_metrics("../../random_genes/enet/summary_10Folds_allTissues.tsv", 
                       "Elastic Net", "Random")
rand = bind_rows(rf, enet)%>% mutate(Feature=gsub("_", ".", feature)) %>%
    left_join(random, by=c("tissue", "Feature")) %>% as.data.frame %>%
    mutate(New_Type = paste(Type, replace_na(DTU, ""))) %>%
    mutate_if(is.character, as.factor)
rand %>% head
rand$Type %>% unique

#### Merge data

In [None]:
df = bind_rows(de, rand)
dim(df)
df %>% head(2)
df$Type %>% unique

### Summarize

In [None]:
df %>% group_by(tissue, Type, model) %>% 
    summarise(Mean=mean(Median), Median=median(Median), .groups = "keep") 

In [None]:
df %>% group_by(tissue, Type, model) %>% 
    summarise(Mean=mean(Median), Median=median(Median), .groups = "keep") %>%
    filter(model == "Elastic Net")

In [None]:
df %>% group_by(tissue, Type, model) %>% 
    summarise(Mean=mean(Median), Median=median(Median), .groups = "keep") %>%
    filter(model == "Random Forest")

In [None]:
df %>% filter(DTU == "DTU") %>% group_by(tissue, New_Type, model) %>% 
    summarise(Mean=mean(Median), Median=median(Median), .groups = "keep")

In [None]:
df %>% filter(DTU == "DTU") %>% group_by(tissue, New_Type, model) %>% 
    summarise(Mean=mean(Median), Median=median(Median), .groups = "keep") %>%
    filter(model == "Elastic Net")

In [None]:
de100 %>% group_by(tissue, model) %>% 
    summarise(Mean=mean(Median), .groups = "keep") %>% as.data.frame %>%
    pivot_wider(names_from="model", values_from="Mean")

In [None]:
de100 %>% group_by(tissue, model) %>% 
    summarise(Median=median(Median), .groups = "keep") %>% as.data.frame %>%
    pivot_wider(names_from="model", values_from="Median")

#### Test if DE genes are significant more predictive than random genes

In [None]:
for(tissue in c("Caudate", "Dentate Gyrus", "DLPFC", "Hippocampus")){
    xx = de %>% filter(tissue == tissue)
    yy = rand %>% filter(tissue == tissue)
    tt = t.test(xx$Median, yy$Median, alternative = "greater")$p.value
    print(tt)
}

In [None]:
df %>% group_by(tissue) %>% 
    do(fit = broom::tidy(lm(Median ~ Type, data=.))) %>%
    unnest(fit) %>% filter(term != '(Intercept)') %>%
    mutate(p.bonf = p.adjust(p.value, "bonf"))

In [None]:
df %>% filter(Type == "DE") %>% group_by(tissue) %>% 
    do(fit = broom::tidy(lm(Median ~ New_Type, data=.))) %>%
    unnest(fit) %>% filter(term != '(Intercept)') %>%
    mutate(p.bonf = p.adjust(p.value, "bonf"))

In [None]:
df %>% filter(Type == "Random") %>% group_by(tissue) %>% 
    do(fit = broom::tidy(lm(Median ~ New_Type, data=.))) %>%
    unnest(fit) %>% filter(term != '(Intercept)') %>%
    mutate(p.bonf = p.adjust(p.value, "bonf"))

### Plot

#### Boxplots

In [None]:
bxp = de100 %>% 
    ggboxplot(x="model", y="Median", color="model", add="jitter", 
              facet.by="tissue", legend="none", palette="npg", 
              ylim=c(-0.5, 1), ylab="Test Score\n(Median R2)", 
              xlab="", ncol=4, panel.labs.font=list(face='bold'), 
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_top100", 9, 4)
bxp

In [None]:
bxp = df %>% filter(DTU == "DTU") %>%
    ggboxplot(x="model", y="Median", color="New_Type", add="jitter", 
                 facet.by="tissue", palette="npg", ylim=c(-1, 1), 
                 ylab="Median R2", xlab="", ncol=4,
                 panel.labs.font=list(face='bold'), 
                 ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_byType_dtu", 10, 5)
bxp

In [None]:
bxp = df %>% filter(New_Type %in% c("DE ", "Random ")) %>%
    ggboxplot(x="model", y="Median", color="New_Type", add="jitter", 
                 facet.by="tissue", palette="npg", ylim=c(-1, 1), 
                 ylab="Median R2", xlab="", ncol=4,
                 panel.labs.font=list(face='bold'), 
                 ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_byType_de", 10, 5)
bxp

In [None]:
bxp = df %>% ggboxplot(x="New_Type", y="Median", color="model", add="jitter", 
                 facet.by="tissue", palette="npg", ylim=c(-1, 1), 
                 ylab="Median R2", xlab="", ncol=4,
                 panel.labs.font=list(face='bold'), 
                 ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_byModel", 10, 5)
bxp

In [None]:
bxp = df %>% ggboxplot(x="model", y="Median", color="New_Type", add="jitter", 
                 facet.by="tissue", palette="npg", ylim=c(-1, 1), 
                 ylab="Median R2", xlab="", ncol=4,
                 panel.labs.font=list(face='bold'), 
                 ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_byType", 10, 5)
bxp

In [None]:
bxp = df %>% filter(Type == "DE") %>%
    ggboxplot(x="model", y="Median", color="New_Type", add="jitter", 
                 facet.by="tissue", palette="npg", ylim=c(-1, 1), 
                 ylab="Median R2", xlab="", ncol=4,
                 panel.labs.font=list(face='bold'), 
                 ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_deg", 10, 5)
bxp

In [None]:
bxp = df %>% filter(Type == "Random") %>%
    ggboxplot(x="model", y="Median", color="New_Type", add="jitter", 
                 facet.by="tissue", palette="npg", ylim=c(-1.2, 1), 
                 ylab="Median R2", xlab="", ncol=4,
                 panel.labs.font=list(face='bold'), 
                 ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "summary_boxplots_r2_random", 10, 5)
bxp

#### Distribution

In [None]:
freq = df %>%  filter(New_Type %in% c("DE ", "Random ")) %>%
    ggdensity(x = "Median", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("model", "tissue"), xlab="Median R2", 
              panel.labs.font=list(face='bold'), ylab="",
              ggtheme=theme_pubr(base_size=15, border=TRUE))
save_plot(freq, "summary_distribution_r2_de", 10, 5)
freq

In [None]:
freq = df %>%  filter(DTU == "DTU") %>%
    ggdensity(x = "Median", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("model", "tissue"), xlab="Median R2", 
              panel.labs.font=list(face='bold'), ylab="",
              ggtheme=theme_pubr(base_size=15, border=TRUE))
save_plot(freq, "summary_distribution_r2_dtu", 10, 5)
freq

In [None]:
freq = df %>% filter(Type == "DE") %>%
    ggdensity(x = "Median", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("model", "tissue"), xlab="Median R2", 
              panel.labs.font=list(face='bold'), ylab="",
              ggtheme=theme_pubr(base_size=15, border=TRUE))
save_plot(freq, "summary_distribution_r2_degs", 10, 5)
freq

In [None]:
freq = df %>% filter(Type == "Random") %>%
    ggdensity(x = "Median", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("model", "tissue"), xlab="Median R2", 
              panel.labs.font=list(face='bold'), ylab="",
              ggtheme=theme_pubr(base_size=15, border=TRUE))
save_plot(freq, "summary_distribution_r2_random", 10, 5)
freq

## Explained variance with partial r2

### Load data

In [None]:
de2 = data.table::fread("../../de_genes/partial_r2/enet_partial_r2_metrics.tsv") %>%
    mutate(Type = "DE") %>% left_join(degs, by=c("Tissue"="tissue", "Geneid"="Feature")) %>% 
    as.data.frame %>% mutate(New_Type = paste(Type, replace_na(DTU, ""))) %>%
    mutate_if(is.character, as.factor)
de2 %>% head(2)

In [None]:
de100_v2 = data.table::fread("../../de_genes/partial_r2/enet_partial_r2_metrics.tsv") %>%
    mutate(Type = "DE") %>% inner_join(top100, by=c("Tissue"="tissue", "Geneid"="Feature"))
de100_v2 %>% dim
de100_v2 %>% head(2)

In [None]:
rand2 = data.table::fread("../../random_genes/partial_r2/rf_partial_r2_metrics.tsv") %>%
    mutate(Type = "Random") %>% left_join(random, by=c("Tissue"="tissue", "Geneid"="Feature")) %>% 
    as.data.frame %>% mutate(New_Type = paste(Type, replace_na(DTU, ""))) %>%
    mutate_if(is.character, as.factor)
rand2 %>% head(2)

In [None]:
df2 = bind_rows(de2, rand2)
df2 %>% head(2)

### Summary

In [None]:
df2 %>% group_by(Tissue, Type) %>% 
    summarise(Mean=mean(Partial_R2), Median=median(Partial_R2), 
              Std=sd(Partial_R2), .groups = "keep") 

In [None]:
de100_v2 %>% group_by(Tissue) %>% 
    summarise(Mean=mean(Partial_R2), Median=median(Partial_R2), 
              Std=sd(Partial_R2), .groups = "keep")

In [None]:
df2 %>% group_by(Tissue, New_Type) %>% 
    summarise(Mean=mean(Partial_R2), Median=median(Partial_R2), 
              Std=sd(Partial_R2), .groups = "keep") 

### Significance

In [None]:
## Test if DE genes are significant more predictive than random genes
for(tissue in c("Caudate", "Dentate Gyrus", "DLPFC", "Hippocampus")){
    xx = de2 %>% filter(Tissue == tissue)
    yy = rand2 %>% filter(Tissue == tissue)
    tt = t.test(xx$Partial_R2, yy$Partial_R2, alternative = "greater")$p.value
    print(tt)
}

In [None]:
df2 %>% group_by(Tissue) %>% 
    do(fit = broom::tidy(lm(Partial_R2 ~ Type, data=.))) %>%
    unnest(fit) %>% filter(term != '(Intercept)') %>%
    mutate(p.bonf = p.adjust(p.value, "bonf"))

In [None]:
df2 %>% filter(Type == "DE") %>% group_by(Tissue) %>% 
    do(fit = broom::tidy(lm(Partial_R2 ~ New_Type, data=.))) %>%
    unnest(fit) %>% filter(term != '(Intercept)') %>%
    mutate(p.bonf = p.adjust(p.value, "bonf"))

In [None]:
df2 %>% filter(Type == "Random") %>% group_by(Tissue) %>% 
    do(fit = broom::tidy(lm(Partial_R2 ~ New_Type, data=.))) %>%
    unnest(fit) %>% filter(term != '(Intercept)') %>%
    mutate(p.bonf = p.adjust(p.value, "bonf"))

### Plot

#### Boxplots

In [None]:
bxp = de100_v2 %>% 
    ggboxplot(x="Tissue", y="Partial_R2", fill="Tissue", add="jitter", 
              palette="npg", legend="none", ylim=c(0, 1), xlab="", ncol=4, 
              ylab="Explained Variance", panel.labs.font=list(face='bold'), 
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "explained_variance_boxplots_top100", 6, 5)
bxp

In [None]:
bxp = df2 %>% #filter(Type == "DE") %>%
    ggboxplot(x="Tissue", y="Partial_R2", color="New_Type", add="jitter", 
              palette="npg", ylim=c(0, 1), ylab="Explained Variance", 
              xlab="Brain Region", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "explained_variance_boxplot_all", 7, 5.5)
bxp

In [None]:
bxp = df2 %>% filter(Type == "DE") %>%
    ggboxplot(x="Tissue", y="Partial_R2", color="New_Type", add="jitter", 
              palette="npg", ylim=c(0, 1), ylab="Explained Variance", 
              xlab="Brain Region", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "explained_variance_boxplot_de", 6, 5.5)
bxp

In [None]:
bxp = df2 %>% filter(Type == "Random") %>%
    ggboxplot(x="Tissue", y="Partial_R2", color="New_Type", add="jitter", 
              palette="npg", ylim=c(0, 1), ylab="Explained Variance", 
              xlab="Brain Region", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "explained_variance_boxplot_random", 6, 5.5)
bxp

In [None]:
bxp = df2 %>%  filter(DTU == "DTU") %>%
    ggboxplot(x="Tissue", y="Partial_R2", color="Type", add="jitter", 
              palette="npg", ylim=c(0, 1), ylab="Explained Variance", 
              xlab="Brain Region", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "explained_variance_boxplot_dtu", 6, 5.5)
bxp

In [None]:
bxp = df2 %>%  filter(New_Type %in% c("DE ", "Random ")) %>%
    ggboxplot(x="Tissue", y="Partial_R2", color="New_Type", add="jitter", 
              palette="npg", ylim=c(0, 1), ylab="Explained Variance", 
              xlab="Brain Region", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    rotate_x_text(45) + font("xy.title", face="bold")
save_plot(bxp, "explained_variance_boxplot", 6, 5.5)
bxp

#### Distribution

In [None]:
freq = df2 %>%  filter(New_Type %in% c("DE ", "Random ")) %>%
    ggdensity(x = "Partial_R2", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("Tissue"), xlab="Explained Variance", 
              panel.labs.font=list(face='bold'), ylab="", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    font("xy.title", face="bold")
save_plot(freq, "explained_variance_distribution", 10, 4)
freq

In [None]:
freq = df2 %>%  filter(DTU == "DTU") %>%
    ggdensity(x = "Partial_R2", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("Tissue"), xlab="Explained Variance", 
              panel.labs.font=list(face='bold'), ylab="", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    font("xy.title", face="bold")
save_plot(freq, "explained_variance_distribution_dtu", 10, 4)
freq

In [None]:
freq = df2 %>% filter(Type == "DE") %>%
    ggdensity(x = "Partial_R2", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("Tissue"), xlab="Explained Variance", 
              panel.labs.font=list(face='bold'), ylab="", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    font("xy.title", face="bold")
save_plot(freq, "explained_variance_distribution_de", 10, 4)
freq

In [None]:
freq = df2 %>% filter(Type == "Random") %>%
    ggdensity(x = "Partial_R2", add = "mean", rug = TRUE,
              color = "New_Type", fill = "New_Type", palette="npg", 
              facet.by=c("Tissue"), xlab="Explained Variance", 
              panel.labs.font=list(face='bold'), ylab="", ncol=4,
              ggtheme=theme_pubr(base_size=15, border=TRUE)) + 
    font("xy.title", face="bold")
save_plot(freq, "explained_variance_distribution_random", 10, 4)
freq

## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()