In [1]:
library(scales)
library(stringr)
library(RColorBrewer)

### LCLs caQTLs from rasqual paper

In [2]:
caqt = read.table("/nfs/lab/projects/pbmc_snATAC/data/publicdata/rasqual_paper/rasqual_atac_1M")

In [3]:
colnames(caqt) = c('Feature','rsID','Chromosome','position','Ref' ,'Alt' ,'Af','HWEChi_square' ,'IA','Log10_qval','Chi_square',
        'Effect_size','Sequencing_mapping_error_rate','Ref_allele_bias' ,'Overdispersion','SNPid_within_region',
        'No_fSNPs','No_tested_SNPs','No_iterations_for_H0','No_iterations_for_H1',"ties",'Log_likelihood_H0',
        'Convergence_status','r2_fSNPs','r2_rSNP')

In [4]:
caqt$P_VAl = pchisq(caqt[,11], 1, lower=F)

caqt$id = paste0("chr" ,caqt$Chromosome,":", caqt$position,":", caqt$Ref,":", caqt$Alt)
caq = readLines("/nfs/lab/projects/pbmc_snATAC/data/publicdata/rasqual_paper/pid.fdr10.txt")
caqt$fdr_10 = caqt$Feature %in% caq

In [5]:
celltypes = c('bulk', 'mono', 't', 'nk', 'b')

In [6]:
dir = '/nfs/lab/projects/pbmc_snATAC/analysis_v2/compare_lcls/'

In [7]:
summarydir = '/nfs/lab/projects/pbmc_snATAC/analysis_v2/summarized_caqtls/run3/'

In [8]:
longf = read.table(paste0(summarydir, "EUR_caqtl_leads_run3.tsv"), header=T)

In [9]:
fimatall=data.frame()
#options(repr.plot.width=11, repr.plot.height=6)
pdf("/nfs/lab/projects/pbmc_snATAC/analysis_v2/figures/compare_lcl.pdf")
par(mfrow=c(3,3), mar=c(2,4,4,2))

for (c in celltypes) {

tabname       = paste0(summarydir ,c, "_caqtl_sumstats_run3.tsv")
res_all       = read.table(tabname, header=T, stringsAsFactors = F)
res           = subset(longf, cell==c )
res_all$flag_fdr10 = res_all$Feature %in% res$Feature[res$flag_fdr10==TRUE & res$exclude==FALSE]
out           = paste0(dir,  c, "_intersected.bed") 

# res$chrom = as.numeric(substring(res$Chromosome, 4))
# coo = str_split_fixed(res$Feature, "\\:",2)[,2]
# coo = str_split_fixed(coo, "\\-",2)
# a   = paste0(dir, c, "_tested_peaks.bed")
# write.table(cbind(res$chrom, coo),a , sep="\t", quote=F, col.names=F, row.names=F)
# b   = '/nfs/lab/projects/pbmc_snATAC/data/publicdata/rasqual_paper/peaks.bed'
#    system(paste("bedtools intersect -a", a, '-b' , b,  "-wo >", out))
                

inter      = read.table(out)
inter$peak = paste0("chr", inter[,1], ":", inter[,2], "-", inter[,3])
res_int    = merge( res_all, inter[,c('V7', "peak")], by.x=1, by.y=2)
res_int$paperqtl = res_int$V7 %in% caq
res_int_uni = subset(res_int, !duplicated(res_int$Feature))

fimat = unlist(c(fisher.test(table(res_int_uni$flag_fdr10, res_int_uni$paperqtl))[c(1,3)],
          table(res_int_uni$flag_fdr10, res_int_uni$paperqtl)))
              
fimatall = rbind(fimatall, fimat)
     

mm1 = merge(res_int[,c('varID', 'V7', 'flag_fdr10','Effect_size' , "P_value")],
               caqt[,c("id", 'Feature', 'fdr_10', 'Effect_size')], by=1:2)

mm1$cat1 = apply(mm1, 1, function(x) paste0(as.numeric(x['fdr_10']==T), as.numeric(x['flag_fdr10']==T)))
mm1 = mm1[order(mm1$P_value),]
mm1 = subset(mm1, !duplicated(mm1$V7))

mms = subset(mm1, cat1!="00")
disc = sum((mms$Effect_size.x>0.5 & mms$Effect_size.y<0.5) | (mms$Effect_size.x<0.5 & mms$Effect_size.y>0.5))
conc = round(1- (disc /nrow(mms)),2)*100
#col3 = alpha(c( "red","purple","black"), 0.7)
#col3 = alpha(c( "red",rainbow(25)[16],"black"), 0.7)
#col3 = alpha(c( brewer.pal(9, "Purples")[8],'gray',"black"), 0.7)

col3 = alpha(c(  "darkorchid4",'gray60',"black"), 0.7)
plot(mms$Effect_size.x, mms$Effect_size.y, pch=19, cex = 0.7, col = col3[as.numeric(as.factor(mms$cat1))],
     ylab = "LCLs caQTL effect", xlab= paste(c, "snATAC-caQTLs effect"))
 rect(0,0,0.5,0.5, col = alpha ('gray', 0.2))
 rect(0.5,0.5,1,1, col = alpha ('gray', 0.2))
mtext(paste0( " ",conc, "% concord."), line=-1.5, adj=0, cex=0.7)
mtext(paste0(nrow(mms), " caQTLs/ ", nrow(mm1), " total sites"), line=2, cex=0.7)
legend('bottom', pch = 19, col=col3, legend = paste( c(c,"LCLs", "both" ), table(mms$cat1), sep="="),
       text.width = 0.18, cex=1.3,
       inset=c(0,1), xpd=T, horiz=TRUE, bty="n")
                 
                 


}
dev.off()

In [10]:
rownames(fimatall) = celltypes
colnames(fimatall) = c("pv", "or", 1:4)

In [11]:
fimatall

Unnamed: 0_level_0,pv,or,1,2,3,4
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
bulk,0.0,15.71482,62552,1401,1537,541
mono,1.525503e-189,12.9916,48259,976,1153,303
t,2.0015249999999997e-229,16.12184,45333,889,1050,332
nk,2.927821e-110,16.7227,32048,360,809,152
b,2.0242950000000002e-206,28.48255,34370,307,927,236


In [12]:
541/ (541+1401)

In [21]:
541+1401

In [11]:
524/ 1920

### Compare Bluebprint

In [13]:
setwd('/nfs/lab/projects/pbmc_snATAC/data/publicdata/blueprint_qtls/')

In [14]:
cc = c('varID', 'snpId', 'phenotypeID', 'p.value', 'beta', 'Bonfe.p', 'FDR', 'alt.AF')

h3k_mono = read.table('mono.H3K27ac_peak_WASP_CHT_all_Jun28.sub.txt')

gene_mono = read.table('mono.gene_WASP_CHT_all_Jun28.sub.txt')

h3k_tcel = read.table('tcel.H3K27ac_peak_WASP_CHT_all_Jun28.sub.txt')

gene_tcel = read.table('tcel.gene_WASP_CHT_all_Jun28.sub.txt')

li = list (h3k_mono,gene_mono,h3k_tcel,gene_tcel)

In [15]:
celltypes_macro = c('mono', 't')

In [16]:
PC = OR = PV = matrix(NA, ncol = 4, nrow = 2)

for (b in 1:4){
bpr = li[[b]]
colnames(bpr) = cc
bpr$varID = paste0("chr",gsub("_", ":", bpr$varID))

for( c in 1:2) {
cl =  celltypes_macro[c]
sub = subset(longf, cell ==cl)
sub = subset(sub, varID %in% bpr$varID)
sub$test1 = sub$varID %in% bpr$varID[bpr$FDR<0.1]  ## their threshold was 5%
pct = sum(sub$test1)/nrow(sub)

m = merge(bpr,sub, by.y="varID", by.x="varID" )
tb = table(m$flag_fdr10 & m$exclude==FALSE, m$FDR<0.1)

PC[c,b] = pct
OR[c,b] = fisher.test(tb)$estimate
PV[c,b] = fisher.test(tb)$p.value
    }
    }

In [17]:
rownames(PC) = rownames(OR) = rownames(PV) = celltypes_macro
colnames(PC) = colnames(OR) = colnames(PV) = c('H3K27ac.mono','eQTL.mono','H3K27ac.tcel','eQTL.tcel')

In [18]:
OR

Unnamed: 0,H3K27ac.mono,eQTL.mono,H3K27ac.tcel,eQTL.tcel
mono,2.262146,1.243598,1.542288,1.150429
t,1.649647,1.194598,3.094242,1.374615


In [19]:
PV

Unnamed: 0,H3K27ac.mono,eQTL.mono,H3K27ac.tcel,eQTL.tcel
mono,1.891995e-119,1.56559e-20,4.372042e-15,7.479878e-08
t,2.130746e-22,9.343541e-10,5.998643e-117,6.118169000000001e-28


In [20]:
pdf("/nfs/lab/projects/pbmc_snATAC/analysis_v2/figures/Compare_with_Blueprint.pdf")
par(xpd = TRUE)
#layout(matrix(c(1,1,3,2,2,4,5,5,5), ncol=3, byrow=T))
par(mfcol=c(2,2))

nms =  celltypes_macro
rownames(PC) = NULL

par(pin=c(0.8,1))

bp<-barplot(t(OR[,c(1,3)]), beside=T,  col = c('gray', "black"),
            ylab="Odds ratio", space = c(0.1,0.4), ylim=c(0,3), las=1)
bp<-barplot(t(OR[,c(2,4)]), beside=T,  col = c('gray', "black"),
            ylab="Odds ratio", space = c(0.1,0.4), ylim=c(0,1.5), las=1)
dev.off()