# eQTL boxplot

This is script ported from python to fix unknown plotting error.

In [None]:
suppressPackageStartupMessages({
    library(tidyverse)
    library(ggpubr)
})

## Functions

In [None]:
feature = "exons"

### Cached functions

In [None]:
get_eqtl_df <- function(){
    eGenes_file = paste0('/ceph/projects/v4_phase3_paper/analysis/eqtl_analysis/all/',
                        feature, '/expression_gct/prepare_expression/fastqtl_permutation/',
                        '_m/Brainseq_LIBD.genes.txt.gz')
    eGenes = data.table::fread(eGenes_file) %>% 
        select(gene_id, variant_id, maf, slope, slope_se, pval_nominal, qval) %>%
        arrange(qval)
    return(eGenes)
}
memEQTL <- memoise::memoise(get_eqtl_df)

get_residualized_df <- function(){
    expr_file = paste0("/ceph/projects/v4_phase3_paper/analysis/eqtl_analysis/all/",
                      feature,"/expression_gct/covariates/residualized_expression/_m/",
                      feature, "_residualized_expression.csv")
    return(data.table::fread(expr_file) %>% column_to_rownames("gene_id"))
}
memRES <- memoise::memoise(get_residualized_df)

get_genotypes <- function(){
    traw_file = paste0("/ceph/projects/brainseq/genotype/download/topmed/convert2plink/",
                      "filter_maf_01/a_transpose/_m/LIBD_Brain_TopMed.traw")
    traw = data.table::fread(traw_file) %>% rename_with(~ gsub('\\_.*', '', .x))
    return(traw)
}
memSNPs <- memoise::memoise(get_genotypes)

### Simple functions

In [None]:
feature_map <- function(feature){
    return(list("genes"="Gene", "transcripts"= "Transcript", 
                "exons"= "Exon", "junctions"= "Junction")[[feature]])
}

get_geno_annot <- function(){
    return(memSNPs() %>% select(CHR, SNP, POS, COUNTED, ALT))
}

get_snps_df <- function(){
    return(memSNPs() %>% select("SNP", starts_with("Br")))
}

letter_snp <- function(number, a0, a1){
    if(is.na(number)){ return(NA) }
    if( length(a0) == 1 & length(a1) == 1){
        seps = ""; collapse=""
    } else {
        seps = " "; collapse=NULL
    }
    return(paste(paste0(rep(a0, number), collapse = collapse), 
                 paste0(rep(a1, (2-number)), collapse = collapse), sep=seps))
}

get_snp_df <- function(variant_id, gene_id){
    zz = get_geno_annot() %>% filter(SNP == variant_id)
    xx = get_snps_df() %>% filter(SNP == variant_id) %>% 
        column_to_rownames("SNP") %>% t %>% as.data.frame %>% 
        rownames_to_column("BrNum") %>% mutate(COUNTED=zz$COUNTED, ALT=zz$ALT) %>% 
        rename("SNP"=all_of(variant_id))
    yy = memRES()[gene_id, ] %>% t %>% as.data.frame %>% 
        rownames_to_column("BrNum")
    ## Annotated SNPs
    letters = c()
    for(ii in seq_along(xx$COUNTED)){
        a0 = xx$COUNTED[ii]; a1 = xx$ALT[ii]; number = xx$SNP[ii]
        letters <- append(letters, letter_snp(number, a0, a1))
    }
    xx = xx %>% mutate(LETTER=letters, ID=paste(SNP, LETTER, sep="\n"))
    df = inner_join(xx, yy, by="BrNum") %>% mutate_if(is.character, as.factor)
    return(df)
}
memDF <- memoise::memoise(get_snp_df)

save_ggplots <- function(fn, p, w, h){
    for(ext in c('.pdf', '.png', '.svg')){
        ggsave(paste0(fn, ext), plot=p, width=w, height=h)
    }
}

get_biomart_df <- function(){
    biomart = data.table::fread("../_h/biomart.csv")
}
memMART <- memoise::memoise(get_biomart_df)

get_gene_symbol <- function(gene_id){
    ensemblID = gsub("\\..*", "", gene_id)
    geneid = memMART() %>% filter(ensembl_gene_id == gsub("\\..*", "", gene_id))
    if(dim(geneid)[1] == 0){
        return("")
    } else {
        return(geneid$external_gene_name)
    }
}

plot_simple_eqtl <- function(fn, gene_id, variant_id, eqtl_annot, prefix){
    bxp = memDF(variant_id, gene_id) %>%
        ggboxplot(x="ID", y=gene_id, fill="red", add="jitter", xlab=variant_id,
                  ylab="Residualized Expression", outlier.shape=NA,
                  add.params=list(alpha=0.5), alpha=0.4,
                  ggtheme=theme_pubr(base_size=20, border=TRUE)) +
        font("xy.title", face="bold") + 
        ggtitle(paste(prefix, eqtl_annot, sep='\n')) +
        theme(plot.title = element_text(hjust = 0.5, face="bold"))
    print(bxp)
    save_ggplots(fn, bxp, 7, 7)
}

### GWAS plots

In [None]:
get_gwas_snps <- function(){
    gwas_snp_file = paste0('../../summary_table/_m/Brainseq_LIBD_caudate',
                           '_4features_PGC2.signifpairs.txt.gz')
    gwas_df = data.table::fread(gwas_snp_file) %>% filter(Type == feature_map(feature)) %>%
        select(c("variant_id", "gene_id", "rsid", "hg38chrc", "OR", "SE", "P", "A1", 
                 "A2", "pgc2_a1_same_as_our_counted", "is_index_snp")) %>% 
        distinct() %>% arrange(P)
    return(gwas_df)
}
memGWAS <- memoise::memoise(get_gwas_snps)

get_gwas_snp <- function(variant){
    return(memGWAS() %>% filter(variant_id == variant))
}

get_risk_allele <- function(variant){
    gwas_snp = get_gwas_snp(variant)
    if(gwas_snp$OR > 1){
        ra = gwas_snp$A1
    }else{
        ra = gwas_snp$A2
    }
    return(ra)
}

get_eqtl_gwas_df <- function(){
    return(memEQTL() %>% inner_join(memGWAS(), by="variant_id"))
}

get_gwas_ordered_snp_df <- function(variant_id, gene_id, pgc2_a1_same_as_our_counted, OR){
    df = memDF(variant_id, gene_id)
    if(pgc2_a1_same_as_our_counted){
        if(OR < 1){ df = df %>% mutate(SNP = 2-SNP, ID=paste(SNP, LETTER, sep="\n")) }
    } else {
        if(OR > 1){ df = df %>% mutate(SNP = 2-SNP, ID=paste(SNP, LETTER, sep="\n")) }
    }
    return(df)
}

plot_gwas_eqtl <- function(fn, gene_id, variant_id, eqtl_annot, 
                           pgc2_a1_same_as_our_counted, OR, title){
    bxp = get_gwas_ordered_snp_df(variant_id, gene_id, 
                                  pgc2_a1_same_as_our_counted, OR) %>%
        mutate_if(is.character, as.factor) %>%
        ggboxplot(x="ID", y=gene_id, fill="red", add="jitter", xlab=variant_id,
                  ylab="Residualized Expression", outlier.shape=NA,
                  add.params=list(alpha=0.5), alpha=0.4,
                  ggtheme=theme_pubr(base_size=20, border=TRUE)) +
        font("xy.title", face="bold") + ggtitle(title) +
        theme(plot.title = element_text(hjust = 0.5, face="bold"))
    print(bxp)
    save_ggplots(fn, bxp, 7, 7)
}

## Plot eQTL

In [None]:
get_drd2_exon_annotation <- function(brainseq_exon_id){
    return(list(
        'e667152'= 'DRD2 exon 1S',
         'e667153'= 'DRD2 exon 2',
         'e667154'= 'DRD2 exon 3',
         'e667155'= 'DRD2 exon 4',
         'e667156'= 'DRD2 exon 5',
         'e667157'= 'DRD2 exon 7',
         'e667158'= 'DRD2 exon 8',
         'e667159'= 'DRD2 exon 1L',
         'e667164'= 'DRD2 exon 6'
    )[[brainseq_exon_id]])
}

### DRD2 plot

In [None]:
drd2_short = "ENST00000346454.7"; drd2_long = "ENST00000362072.7"
exons_df = data.table::fread("../../../../DE_gwas_eqtl_integration/_h/exons.csv") %>%
    filter(transcript_id %in% c(drd2_short, drd2_long)) %>%
    select(gene_id, gene_name, exon_id, brainseq_exon_id) %>%
    distinct(brainseq_exon_id, .keep_all=TRUE)
all_drd2_exons = exons_df$brainseq_exon_id
drd2_df = memEQTL() %>% filter(gene_id %in% all_drd2_exons)
drd2_df

In [None]:
for(x in seq_along(drd2_df$gene_id)){
    anno = get_drd2_exon_annotation(drd2_df$gene_id[x])
    en = gsub(" ", "_", anno)
    ids = exons_df %>% filter(brainseq_exon_id == drd2_df$gene_id[x]) %>% select("exon_id")
    fn = paste("drd2_eqtl", en, sep="_")
    eqtl_annot = paste("eQTL q-value:", signif(drd2_df$qval[x], 2))
    prefix = paste(anno, ids$exon_id, sep='\n')
    plot_simple_eqtl(fn, drd2_df$gene_id[x], drd2_df$variant_id[x], eqtl_annot, prefix)
    #print(eqtl_annot)
}

### GWAS association

In [None]:
memGWAS() %>% filter(gene_id %in% all_drd2_exons)

## Session Info

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()