# eQTL boxplot

This is script ported from python to fix unknown plotting error.

In [None]:
suppressPackageStartupMessages({
    library(tidyverse)
    library(ggpubr)
})

## Functions

In [None]:
tissue = "DLPFC"

### Cached functions

In [None]:
get_residualized_df <- function(){
    expr_file = "../../_m/genes_residualized_expression.csv"
    return(data.table::fread(expr_file) %>% column_to_rownames("gene_id"))
}
memRES <- memoise::memoise(get_residualized_df)

get_biomart_df <- function(){
    biomart = data.table::fread("../_h/biomart.csv")
}
memMART <- memoise::memoise(get_biomart_df)

get_pheno_df <- function(){
    phenotype_file = paste0('/ceph/projects/v4_phase3_paper/inputs/',
                            'phenotypes/_m/merged_phenotypes.csv')
    return(data.table::fread(phenotype_file))
}
memPHENO <- memoise::memoise(get_pheno_df)

get_genotypes <- function(){
    traw_file = paste0("/ceph/projects/brainseq/genotype/download/topmed/convert2plink/",
                      "filter_maf_01/a_transpose/_m/LIBD_Brain_TopMed.traw")
    traw = data.table::fread(traw_file) %>% rename_with(~ gsub('\\_.*', '', .x))
    return(traw)
}
memSNPs <- memoise::memoise(get_genotypes)

get_tissue_eqtls <- function(){
    mashr_file = paste0("../../../mashr/summary_table/_m/",
                       "BrainSeq_sexGenotypes_4features_3regions.txt.gz")
    dt = data.table::fread(mashr_file) %>% filter(Type == "Gene")
    cc = dt %>% filter(Tissue == "Caudate")
    dd = dt %>% filter(Tissue == "DLPFC")
    hh = dt %>% filter(Tissue == "Hippocampus")
    geneids = setdiff(setdiff(unique(dd$gene_id), unique(cc$gene_id)), 
                      unique(hh$gene_id))
    return(data.table::fread(mashr_file) %>% 
           filter(Type == "Gene", gene_id %in% geneids) %>% 
           select(gene_id, variant_id, lfsr))
}
memTISSUE <- memoise::memoise(get_tissue_eqtls)

get_eqtl_df <- function(){
    fastqtl_file = paste0("../../../../../prep_eqtl_analysis/",tolower(tissue),
                         "/genes/prepare_expression/fastqtl_nominal/",
                         "multiple_corrections/_m/Brainseq_LIBD.txt.gz")
    eqtl_df = data.table::fread(fastqtl_file) %>% 
        filter(gene_id %in% unique(memTISSUE()$gene_id)) %>% 
        arrange(BF)
    return(eqtl_df)
}
memEQTL <- memoise::memoise(get_eqtl_df)

get_snp_df <- function(variant_id, gene_id){
    zz = get_geno_annot() %>% filter(SNP == variant_id)
    xx = get_snps_df() %>% filter(SNP == variant_id) %>% 
        column_to_rownames("SNP") %>% t %>% as.data.frame %>% 
        rownames_to_column("BrNum") %>% mutate(COUNTED=zz$COUNTED, ALT=zz$ALT) %>% 
        rename("SNP"=all_of(variant_id))
    yy = memRES()[gene_id, ] %>% t %>% as.data.frame %>% 
        rownames_to_column("RNum") %>% inner_join(memPHENO(), by="RNum")
    ## Annotated SNPs
    letters = c()
    for(ii in seq_along(xx$COUNTED)){
        a0 = xx$COUNTED[ii]; a1 = xx$ALT[ii]; number = xx$SNP[ii]
        letters <- append(letters, letter_snp(number, a0, a1))
    }
    xx = xx %>% mutate(LETTER=letters, ID=paste(SNP, LETTER, sep="\n"))
    df = inner_join(xx, yy, by="BrNum") %>% mutate_if(is.character, as.factor)
    return(df)
}
memDF <- memoise::memoise(get_snp_df)

### Simple functions

In [None]:
get_geno_annot <- function(){
    return(memSNPs() %>% select(CHR, SNP, POS, COUNTED, ALT))
}

get_snps_df <- function(){
    return(memSNPs() %>% select("SNP", starts_with("Br")))
}

letter_snp <- function(number, a0, a1){
    if(is.na(number)){ return(NA) }
    if( length(a0) == 1 & length(a1) == 1){
        seps = ""; collapse=""
    } else {
        seps = " "; collapse=NULL
    }
    return(paste(paste0(rep(a0, number), collapse = collapse), 
                 paste0(rep(a1, (2-number)), collapse = collapse), sep=seps))
}

save_ggplots <- function(fn, p, w, h){
    for(ext in c('.pdf', '.png', '.svg')){
        ggsave(paste0(fn, ext), plot=p, width=w, height=h)
    }
}

get_gene_symbol <- function(gene_id){
    ensemblID = gsub("\\..*", "", gene_id)
    geneid = memMART() %>% filter(ensembl_gene_id == gsub("\\..*", "", gene_id))
    if(dim(geneid)[1] == 0){
        return("")
    } else {
        return(geneid$external_gene_name)
    }
}

plot_simple_eqtl <- function(fn, gene_id, variant_id){
    bxp = memDF(variant_id, gene_id) %>% 
        mutate(Region=gsub("HIPPO", "Hippocampus", Region)) %>%
        ggboxplot(x="ID", y=gene_id, fill="Sex", color="Sex", add="jitter", 
                  facet.by="Region", panel.labs.font=list(face='bold'),
                  xlab=variant_id, ylab="Residualized Expression", outlier.shape=NA,
                  add.params=list(alpha=0.5), alpha=0.4, legend="bottom", 
                  palette="npg", ggtheme=theme_pubr(base_size=20, border=TRUE)) +
        font("xy.title", face="bold") + 
        ggtitle(paste(get_gene_symbol(gene_id), gene_id, sep='\n')) +
        theme(plot.title = element_text(hjust = 0.5, face="bold"))
    print(bxp)
    save_ggplots(fn, bxp, 10, 6)
}


## Plot eQTL

In [None]:
eqtl_df = memTISSUE() %>% group_by(gene_id) %>% arrange(lfsr) %>% 
    slice(1) %>% inner_join(select(get_eqtl_df(), -variant_id), by=c("gene_id")) %>% 
    arrange(lfsr) 
eqtl_df %>% head(5)

### Top 5 eQTLs

In [None]:
dir.create(tolower(tissue))

In [None]:
for(num in 1:5){
    variant_id = eqtl_df$variant_id[num]
    gene_id = eqtl_df$gene_id[num]
    #eqtl_annot = paste("eQTL q-value:", signif(memEQTL()$BF[num], 2))
    fn = paste0(tolower(tissue), "/top_",num,"_interacting_eqtl")
    plot_simple_eqtl(fn, gene_id, variant_id)
}

## Session Info

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()