In [1]:
library(stringr)
library(parallel)
library(tidyr)

In [2]:
traits = c( 'Asthma_child_onset.99credset.PPA.bed',
            'Atopic_dermatitis.99credset.PPA.bed',
            'Autoimmune_vitiligo.99credset.PPA.bed',
            'Basophil_count.99credset.PPA.bed',
            'Crohns_disease.99credset.PPA.bed',
            'Eosinophil_count.99credset.PPA.bed',
            'Gout.99credset.PPA.bed',
            'Lymphocyte_count.99credset.PPA.bed',
            'Monocyte_count.99credset.PPA.bed',
            'Neutrophil_count.99credset.PPA.bed',
            'Primary_sclerosing_cholangitis.99credset.PPA.bed',
            'Rheumatoid_arthritis.99credset.PPA.bed',
            'Selective_IgA_deficiency.99credset.PPA.bed',
            'Systemic_lupus_erythematosus.99credset.PPA.bed',
            'Type_1_diabetes.99credset.PPA.bed',
            'Ulcerative_colitis.99credset.PPA.bed')

In [3]:
fmdir = '/nfs/lab/projects/pbmc_snATAC/data/credible_sets/'

In [4]:
out = '/nfs/lab/projects/pbmc_snATAC/analysis_v2/finrich/99credset_immune/'

In [5]:
setwd('/nfs/lab/projects/pbmc_snATAC/analysis_v2')

In [6]:
results = data.table::fread("summarized_caqtls/run3/EUR_caqtl_leads_run3.tsv", header=T)

In [7]:
celltypes = unique(results$cell)[unique(results$cell)!='bulk']

In [8]:
outdir = '/nfs/lab/projects/pbmc_snATAC/analysis_v2/finrich/bedfiles/'

In [9]:
for (c in celltypes) {
cell_results      = subset(results, cell == c)
step1 = str_split_fixed(cell_results$Feature, "\\:", 2 )
bed   = cbind(step1[,1], str_split_fixed(step1[,2], "\\-", 2 ))

write.table(bed[cell_results$flag_fdr10==TRUE & cell_results$exclude==FALSE,], paste0(outdir, c, ".caqtl_peaks.bed"), sep="\t", quote=F, col.names=F, row.names=F)
write.table(bed[cell_results$flag_fdr10==FALSE,], paste0(outdir, c, ".non_caqtl_peaks.bed"), sep="\t", quote=F, col.names=F, row.names=F)

    }


In [10]:
beds_pos = paste0(outdir, list.files(outdir)[!(grepl(list.files(outdir),pattern = ".non_caqtl_peaks.bed"))])
beds_neg = paste0(outdir, list.files(outdir)[grepl(list.files(outdir),pattern = ".non_caqtl_peaks.bed")])

In [11]:
resdir   = '/nfs/lab/projects/pbmc_snATAC/analysis_v2/finrich/results/'

In [12]:
outfiles = paste0(resdir, 
                  str_split_fixed(list.files(outdir)[grepl(list.files(outdir),pattern = ".non_caqtl_peaks.bed")], "\\.", 2)[,1])

In [13]:
for (tr in list.files(fmdir)){

 mclapply(1:length(outfiles), function(x) system(paste0(
    "finrich ", fmdir, tr, " ", beds_pos[x]," " ,beds_neg[x], 
        " --permutations 10000 --processes 8 > ", outfiles[x], ".", tr, ".json" )) ,mc.cores=32)
}

#### Read in the results

In [14]:
library('rjson')

In [15]:
infiles = list.files(resdir)[grepl("PPA", list.files(resdir))]

In [16]:
m = matrix(NA, nrow = length(infiles), ncol=6)
for( rr in 1:length(infiles)) {
    file= infiles[rr]
    test = readLines(paste0(resdir,file))
    if(length(test)==0){
        m[rr,1:4] = NA} else {
        
    if ( grepl('Infinity' , test)== TRUE   ){
    m[rr,1:4] = c(1,0,0,0)  
    }else{
    js = fromJSON(file =paste0(resdir,file))
    m[rr,1:4] = unlist(js) [c("pval", "logOR", "conf_lower", "conf_upper")]
    }
        }
    m[rr,5:6] = str_split_fixed(file, "\\.", 6)[,1:2]
}

In [17]:
colnames(m) = c("pval", "logOR", "conf_lower", "conf_upper", "cell", "trait")

In [18]:
m2 = data.frame(m)

In [19]:
macro = as.character(unique(m2$cell[m2$cell %in% c("t", "b", "nk", "mono")]))
micro = as.character(unique(m2$cell[!(m2$cell %in% c("t", "b", "nk", "mono"))]))

In [20]:
m2 = m2[complete.cases(m2),]

In [21]:
wideor = spread(m2[,c('trait','cell','logOR' )],  cell, logOR)
widepv = spread(m2[,c('trait','cell','pval' )],  cell, pval)
wideor[is.na(wideor)] <-0
widepv[is.na(widepv)] <-1
or = apply(wideor[,-1], 2, as.numeric)
pv = apply(widepv[,-1], 2, as.numeric)

In [22]:
min(pv[pv>0])

In [23]:
pv[pv==0]<-min(pv[pv>0])

In [24]:
rownames(pv) = rownames(or) = widepv$trait

signedpv = -log10(pv)
signedpv = signedpv*sign(or)

In [25]:
library(pheatmap)

In [26]:
ix = pv
ix[pv<0.05]<-"*"
ix[pv<0.005]<-"**"
ix[pv<0.0005]<-"X"
ix[pv>=0.05]<-""

traits = c( 'Monocyte_count','Eosinophil_count','Lymphocyte_count','Asthma_child_onset',
            'Crohns_disease', 'Neutrophil_count','Ulcerative_colitis','Rheumatoid_arthritis',
             "Type_1_diabetes",'Autoimmune_vitiligo','Basophil_count','Systemic_lupus_erythematosus',
            'Gout','Atopic_dermatitis', 'Primary_sclerosing_cholangitis','Selective_IgA_deficiency')


In [27]:
cat(paste(micro, collapse = "\', \'"))

act_cd4_t', 'adaptive_NK', 'cDC', 'cMono', 'cyto_cd8_t', 'cyto_nk', 'iMono', 'mem_b', 'mem_cd8_t', 'mkc', 'naive_b', 'naive_cd4_t', 'naive_cd8_t', 'ncMono', 'tReg

In [28]:
micro =c('naive_cd4_t', 'act_cd4_t', 'naive_cd8_t', 'cyto_cd8_t','mem_cd8_t','tReg',
  'adaptive_NK','cyto_nk',  'cMono', 'ncMono', 
   'iMono', 'cDC', 'mem_b',   'naive_b', 'mkc')


In [29]:
library(RColorBrewer)

col = rev(colorRampPalette(brewer.pal(9, "RdBu"))(50))
col = rev(colorRampPalette(brewer.pal(9, "PuOr"))(50))

mat = as.matrix(or[traits,micro])
pheatmap(mat,    show_colnames = TRUE, angle_col = 45, fontsize_col = 8,
         show_rownames=TRUE, cluster_rows = F, cluster_cols = F,
         col = col,  breaks=seq(-abs(max(mat)), abs(max(mat)), length.out=50) ,
         cellheight = 8, cellwidth = 10, fontsize_row = 8, display_numbers= ix[traits,micro],
          filename =  'figures/Heatmap_OR_subtypes.pdf'    
        )

mat=as.matrix(signedpv[traits,micro])
pheatmap(mat,    show_colnames = TRUE, angle_col = 45,fontsize_col = 8,
         show_rownames=TRUE, cluster_rows = F, cluster_cols = F,
         col = col,  breaks=seq(-abs(max(mat)), abs(max(mat)), length.out=50) ,
         cellheight = 8, cellwidth = 10, fontsize_row = 8, display_numbers= ix[traits,micro],
          filename =  'figures/Heatmap_PV_subtypes.pdf',
        )
mat = as.matrix(or[traits,macro])
pheatmap(mat,    show_colnames = TRUE, angle_col = 45,fontsize_col = 8,
         show_rownames=TRUE, cluster_rows = F, cluster_cols = F,
         col = col,  breaks=seq(-abs(max(mat)), abs(max(mat)), length.out=50) ,
         cellheight = 8, cellwidth = 10, fontsize_row = 8, display_numbers= ix[traits,macro],
          filename =  'figures/Heatmap_OR.pdf'    
        )

mat=as.matrix(signedpv[traits,macro])
pheatmap(mat,    show_colnames = TRUE, angle_col = 45,fontsize_col = 8,
         show_rownames=TRUE, cluster_rows = F, cluster_cols = F,
         col = col,  breaks=seq(-abs(max(mat)), abs(max(mat)), length.out=50) ,
         cellheight = 8, cellwidth = 10, fontsize_row = 8, display_numbers= ix[traits,macro],
          filename =  'figures/Heatmap_PV.pdf',
        )


In [30]:
as.matrix(or[traits,micro])

Unnamed: 0,naive_cd4_t,act_cd4_t,naive_cd8_t,cyto_cd8_t,mem_cd8_t,tReg,adaptive_NK,cyto_nk,cMono,ncMono,iMono,cDC,mem_b,naive_b,mkc
Monocyte_count,-3.60012874,1.2391766,-2.96890879,-0.7289019,-2.8512349,-1.6534208,0.0,-4.5041606,1.0007816,0.4258095,0.0,-2.0116908,-1.634849,-3.2676063,1.175222
Eosinophil_count,1.7753694,2.0043113,0.01802934,-0.5095369,0.281357,0.6231183,0.0,-0.3342661,-0.7489404,-2.8488269,0.0,0.0,0.0,0.06589922,0.0
Lymphocyte_count,2.03223963,1.4994188,2.16015761,1.3525184,2.0510174,3.2244201,0.0,-0.4282907,0.6300371,2.7852329,0.0,3.3086488,2.6420576,2.49008187,-0.7110517
Asthma_child_onset,-0.59388761,1.3894306,-0.02541565,1.9034175,2.3881408,1.2716449,0.0,0.3924459,1.2255258,-0.9091417,0.0,0.4224073,3.2793533,-2.80279922,0.0
Crohns_disease,0.16981604,0.9483895,0.67495068,0.9640044,-0.1156697,-1.3154639,0.0,0.5458632,1.5894775,0.5199925,-0.2811783,0.0,0.0,-2.53745757,0.0
Neutrophil_count,-0.83481738,2.3699483,-0.96676064,2.3121126,0.0,0.0,0.0,2.5532727,1.9078642,1.6861801,2.2788806,0.0,0.0,0.0,0.0
Ulcerative_colitis,-0.05451528,-0.5980586,-0.65556886,-1.3863797,3.4831972,0.0,0.0,-1.0281177,2.3756144,1.0851989,1.2124441,0.0,2.4568387,0.0,0.0
Rheumatoid_arthritis,0.0,2.3368733,0.3588592,1.4794507,3.1600465,0.0,0.0,-0.6131536,0.5331701,-3.2064921,0.0,0.0,-0.7392045,0.0,0.0
Type_1_diabetes,1.25229679,1.6202938,2.07627693,1.517907,-1.6752181,-0.6804159,0.0,0.9188091,0.902912,-0.5174353,-2.6486752,0.0,0.0,0.93113,0.0
Autoimmune_vitiligo,2.51308289,1.3655552,1.31409587,0.8783528,1.7607671,2.563735,-0.801763,-1.0793436,1.3396148,0.0,1.9073338,0.0,0.0,-2.46347655,0.0


In [31]:
as.matrix(pv[traits,micro])

Unnamed: 0,naive_cd4_t,act_cd4_t,naive_cd8_t,cyto_cd8_t,mem_cd8_t,tReg,adaptive_NK,cyto_nk,cMono,ncMono,iMono,cDC,mem_b,naive_b,mkc
Monocyte_count,0.6992,0.0596,0.4547,0.3892,0.4006,0.1589,1.0,0.8299,0.0608,0.2257,1.0,0.1998,0.2468,0.5385,0.0598
Eosinophil_count,0.0283,0.0029,0.224,0.3886,0.1918,0.0996,1.0,0.3803,0.6232,0.5694,1.0,1.0,1.0,0.2468,1.0
Lymphocyte_count,0.013,0.0168,0.0137,0.0515,0.0184,0.0045,1.0,0.3924,0.1559,0.0023,1.0,0.0157,0.0098,0.0127,0.0732
Asthma_child_onset,0.3239,0.054,0.1665,0.0283,0.0168,0.0648,1.0,0.1925,0.0623,0.2723,1.0,0.0705,0.0057,0.5357,1.0
Crohns_disease,0.2314,0.1131,0.1389,0.1288,0.2014,0.1921,1.0,0.1889,0.0067,0.1599,0.0906,1.0,1.0,0.6009,1.0
Neutrophil_count,0.3099,0.0034,0.2342,0.0201,1.0,1.0,1.0,0.004,0.0024,0.0627,0.0193,1.0,1.0,1.0,1.0
Ulcerative_colitis,0.2453,0.4422,0.2405,0.4701,0.0072,1.0,1.0,0.4174,0.0008,0.0871,0.0367,1.0,0.0252,1.0,1.0
Rheumatoid_arthritis,1.0,0.0016,0.1527,0.0778,0.0009,1.0,1.0,0.3131,0.2218,0.3391,1.0,1.0,0.1427,1.0,1.0
Type_1_diabetes,0.0882,0.0176,0.0309,0.0647,0.4345,0.1604,1.0,0.1164,0.1091,0.2861,0.1356,1.0,1.0,0.0996,1.0
Autoimmune_vitiligo,0.0174,0.0692,0.0537,0.1049,0.0286,0.0132,0.0752,0.4254,0.0637,1.0,0.0269,1.0,1.0,0.4641,1.0
