In [4]:
library(tidyr)
library(stringr)
library(scales)

In [5]:
setwd('/nfs/lab/projects/pbmc_snATAC/analysis_v2/rasqual_eur/')

In [6]:
celltypes_macro = c('b', "t",'mono', 'nk')
celltypes_micro = c('act_cd4_t' , 'adaptive_NK' , 'cDC' , 'cMono' , 'cyto_cd8_t' , 
                    'cyto_nk' , 'iMono' , 'mem_b' , 'mem_cd8_t' , 'mkc' , 'naive_b' , 
                    'naive_cd4_t' , 'naive_cd8_t' , 'ncMono' , 'tReg')

In [19]:
rr = data.frame()
np1 =  list()
for (cc in celltypes_macro){
res         =  read.table(paste0('broad/', cc, "/Results_pop1_all.tsv"), header=T, stringsAsFactors = F)
reslead     =  read.table(paste0('broad/', cc, "/Results_pop1_lead.tsv"), header=T, stringsAsFactors = F)    
res2        =  read.table(paste0('broad/', cc, "/Results_run3_lead.tsv"), header=T, stringsAsFactors = F)

np1[[cc]]    = unique(reslead$Feature[reslead$flag_fdr10])
    
res$caQTL  = res$Feature %in% reslead$Feature[reslead$flag_fdr10]
res        = subset(res,res$rsID !="SKIPPED")
res2 = subset(res2, flag_fdr10==T)  
res2 = merge(res2, res[,c("Feature", "rsID", "Effect_size",'caQTL')], by=c("Feature", "rsID"))    
res2$cell = cc    
rr  = rbind(rr, res2)

                     }
macro = rr

In [20]:
macro = subset(macro, Ref_allele_bias<0.8 & Ref_allele_bias >0.2 & Sequencing_mapping_error_rate<0.1)

In [21]:
length(unique(macro$Feature[macro$caQTL==TRUE]))

In [22]:
aggregate(caQTL~cell, macro, sum)

cell,caQTL
<chr>,<int>
b,108
mono,392
nk,16
t,51


In [23]:
rr = data.frame()
np2 =  list()
for (cc in celltypes_micro){
res         =  read.table(paste0('fine/', cc, "/Results_pop1_all.tsv"), header=T, stringsAsFactors = F)
reslead     =  read.table(paste0('fine/', cc, "/Results_pop1_lead.tsv"), header=T, stringsAsFactors = F)    
res2        =  read.table(paste0('fine/', cc, "/Results_run3_lead.tsv"), header=T, stringsAsFactors = F)
np2[[cc]]    = unique(reslead$Feature[reslead$flag_fdr10])
    
res$caQTL  = res$Feature %in% reslead$Feature[reslead$flag_fdr10]
res        = subset(res,res$rsID !="SKIPPED")
res2 = subset(res2, flag_fdr10==T)  
res2 = merge(res2, res[,c("Feature", "rsID", "Effect_size", "caQTL")], by=c("Feature", "rsID")) 
res2$cell = cc    
rr  = rbind(rr, res2)

                     }
micro = rr

In [24]:
micro = subset(micro, Ref_allele_bias<0.8 & Ref_allele_bias >0.2 & Sequencing_mapping_error_rate<0.1)

In [25]:
length(unique(micro$Feature[micro$caQTL==TRUE]))

In [39]:
aggregate(caQTL~cell, micro, sum)

cell,caQTL
<chr>,<int>
act_cd4_t,25
adaptive_NK,3
cDC,0
cMono,447
cyto_cd8_t,44
cyto_nk,10
iMono,4
mem_b,38
mem_cd8_t,28
mkc,4


In [26]:
sp1 = split(macro, macro$cell)

In [27]:
sp2 = split(micro, micro$cell)

In [28]:
disctot1 = sum(apply (macro[,c('Effect_size.x' , 'Effect_size.y')],1, function(x) sum(x>0.5)==1))  
disctot2 = sum(apply (micro[,c('Effect_size.x' , 'Effect_size.y')],1, function(x) sum(x>0.5)==1))  
  


In [29]:
fract = (disctot1+ disctot2)/(nrow(macro) + nrow(micro)  )

In [30]:
fract

In [36]:
sapply(sp1, function(y) sum(apply (y[,c('Effect_size.x' , 'Effect_size.y')],1, function(x) sum(x>0.5)==1)) / nrow(y) )

In [38]:
sapply(sp2, function(y) sum(apply (y[,c('Effect_size.x' , 'Effect_size.y')],1, function(x) sum(x>0.5)==1)) / nrow(y) )

In [46]:
pdf("../figures/Comparison_with_population_only_caQTL_all.pdf", height = 10, width = 8)
par(mfrow = c(5,4),  mar = c(4,4,2,2))
for(i in 1:4){
tab = sp1[[i]]
plot(Effect_size.y~Effect_size.x, tab , pch=19, main = names(sp1)[i],cex=0.5, xlab=NA, ylab=NA,
       xlim=c(0.1, 0.9), ylim=c(0.1, 0.9),col = tab$caQTL+1)
     mtext( "Population only effect", side = 2, cex=0.7, line=2)
           mtext( "Combined population-ASE effect", side=1, cex=0.7, line=2)
    
rect(0,0,0.5,0.5, col = alpha ('gray', 0.2))
rect(0.5,0.5,1,1, col = alpha ('gray', 0.2))
disc = sum(apply (tab[,c('Effect_size.x' , 'Effect_size.y')],1, function(x) sum(x>0.5)==1))  
fract = disc/nrow(tab)    
mtext(paste( " ", round(fract,4)*100, "% \n discordant"), line=-2, adj=0, cex=0.7)
    }

for(i in 1:15){
tab = sp2[[i]]
plot(Effect_size.y~Effect_size.x, tab , pch=19, main = names(sp2)[i],cex=0.5, xlab=NA, ylab=NA,
       xlim=c(0.1, 0.9) , ylim=c(0.1, 0.9), col = tab$caQTL+1)
     mtext( "Population only effect", side = 2, cex=0.7, line=2)
           mtext( "Combined population-ASE effect", side=1, cex=0.7, line=2)
    
rect(0,0,0.5,0.5, col = alpha ('gray', 0.2))
rect(0.5,0.5,1,1, col = alpha ('gray', 0.2))
disc = sum(apply (tab[,c('Effect_size.x' , 'Effect_size.y')],1, function(x) sum(x>0.5)==1))  
fract = disc/nrow(tab)    
mtext(paste( " ", round(fract,4)*100, "% \n discordant"), line=-2, adj=0, cex=0.7)
    }
dev.off()
                  

In [50]:
tot = rbind(macro, micro)

In [53]:
sum(tot$Ref_allele_bias<0.25)

In [54]:
176/nrow(tot)