**Question**: are differentially accessible sites after cytokine stimulation enriched for differentielly expressed genes under the same conditions?

In [4]:
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(tidyr))

In [5]:
outdir = '/nfs/lab/projects/islet_cytok/analysis/enrichment_genes'

In [6]:
setwd(outdir)

In [7]:
rna = read.table("../deseq_analysis/g.highcyt3.deseq_results_cytok.tsv", stringsAsFactors = F)

In [8]:
a=sum(rna$padj<0.1, na.rm=T)
b=sum(rna$padj<0.1& rna$log2FoldChange >0, na.rm=T)
c=sum(rna$padj<0.1& rna$log2FoldChange <0, na.rm=T)

In [9]:
#writeLines(rownames(rna)[rna$padj<0.1& rna$log2FoldChange >0], "Up_genes_hicyt.txt")
#writeLines(rownames(rna)[rna$padj<0.1& rna$log2FoldChange <0], "Down_genes_hicyt.txt")

In [10]:
atac = read.table('../deseq_ATAC_NEW/cytokine_24hr_Hi.dec16.filtered.txt')

rownames(atac) = gsub("peak-", "",rownames(atac) )

- *Method 1* : assign genes to peaks based promoter proximity (5KB)

In [9]:
system('bedtools intersect -a Islet_bulkATAC.bed -b /nfs/lab/publicdata/gencode_v19/gencode.v19.20kb_all_possible_transcripts.bed  -wa -wb > genes_within_10kb')
intersec = read.table("genes_within_10kb", sep="\t")

cent1 = intersec[,2] + ((intersec[,3]-intersec[,2])/2) 
cent2 = intersec[,5] + ((intersec[,6]-intersec[,5])/2) 
dist = abs(cent1 - cent2)


In [10]:
intersec$peak = paste(intersec$V1, intersec$V2,intersec$V3, sep="-")
int = intersec[,c(8, 7)]
colnames(int) = c("peak", "gene")
int$dist = dist

In [12]:
fishtab = function (int, atac, rna, distance = 100000, FC="both", atac_FC = "both", fdr=0.1) {
int = int[int$dist <= distance, ]
int = int[!duplicated(int[,1:2]),]    
if(FC=="both"){
int$gene_diff = int$gene %in% rownames(rna)[rna$padj < fdr ]}
if(FC=="up"){
int$gene_diff = int$gene %in% rownames(rna)[rna$padj < fdr & rna$log2FoldChange >0]}
if(FC=="down"){
int$gene_diff = int$gene %in% rownames(rna)[rna$padj < fdr & rna$log2FoldChange <0]}
    
ag           = aggregate(gene_diff~peak, int, sum)
if(atac_FC=="both"){
ag$peak_diff = ag$peak %in% rownames(atac)[atac$padj <fdr]}
if(atac_FC=="up"){
ag$peak_diff = ag$peak %in% rownames(atac)[atac$padj <fdr & atac$log2FoldChange >0]
#ag = subset(ag, !(ag$peak %in% rownames(atac)[atac$padj <fdr & atac$log2FoldChange <0]) )
    }
if(atac_FC=="down"){
ag$peak_diff = ag$peak %in% rownames(atac)[atac$padj <fdr & atac$log2FoldChange <0]
#ag = subset(ag, !(ag$peak %in% rownames(atac)[atac$padj <fdr & atac$log2FoldChange >0])) 
    }
    
tab          = table(ag$peak_diff, ag$gene_diff>0)
    
    return(tab)
    }

In [12]:
sum(rownames(atac) %in% int$peak)

In [13]:
int_sub = int
atac_sub = atac
genes_sub = rna

In [14]:
tab       = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="both", atac_FC="up",distance =10000)
tab_up    = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="up",   atac_FC="up",distance =10000)
tab_down  = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="down", atac_FC="up",distance =10000)

li = list(tab, tab_up, tab_down)
pl = cbind(tab[,2]/rowSums(tab) , tab_up[,2]/rowSums(tab_up) , tab_down[,2]/rowSums(tab_down))
colnames(pl) = c('All DEGs', 'Up-reg.', 'Down-reg.')

li_up = li
pl_up = pl

tab       = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="both", atac_FC="down",distance =10000)
tab_up    = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="up",   atac_FC="down",distance =10000)
tab_down  = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="down", atac_FC="down",distance =10000)
li = list(tab, tab_up, tab_down)
pl = cbind(tab[,2]/rowSums(tab) , tab_up[,2]/rowSums(tab_up) , tab_down[,2]/rowSums(tab_down))
colnames(pl) = c('All DEGs', 'Up-reg.', 'Down-reg.')

li_down = li
pl_down = pl

tab       = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="both", atac_FC="both")

mm = rbind(pl_down[2,], pl_up)
mm = mm[,2:3]
rownames(mm) = c("down", "no.diff", "up")


In [15]:
a=sum(genes_sub$padj<0.05, na.rm=T)
b=sum(genes_sub$padj<0.05& genes_sub$log2FoldChange >0, na.rm=T)
c=sum(genes_sub$padj<0.05& genes_sub$log2FoldChange <0, na.rm=T)

In [16]:
col = rainbow(2)
col = c("#fdb863", "#b2abd2")

In [17]:
pdf("Enrichment_results_degs_fdr10.pdf")
par(mfrow=c(2,2))

bp <- barplot(pl_up, beside=T, ylab = "frac. peaks with DEGs within 10kb", las=1,  
              legend=TRUE, ylim = c(0,0.5),col=col,
        args.legend =list(x = "topright", legend =  c('no.diff', 'diff.peak.up'), bty="n"))

txt = paste0("p=", sapply(li_up, function(x) signif(fisher.test(x)$p.value,2)))
txt2 = paste0("OR=", sapply(li_up, function(x) signif(fisher.test(x)$estimate,2)))
text(x=c(2,5,8), y=pl_up[2,]+0.05, labels = paste(txt, txt2, sep="\n") )    
mtext(text = paste0("n=", c(a,b,c)), at = c(2,5,8), side =1, cex=0.8, line = 2)      



bp <- barplot(pl_down, beside=T, ylab = "frac. peaks with DEGs within 10kb", las=1,  
              legend=TRUE, ylim = c(0,0.5),col=col,
        args.legend =list(x = "topright", legend =  c('no.diff', 'diff.peak.down'), bty="n"))

txt = paste0("p=", sapply(li_down, function(x) signif(fisher.test(x)$p.value,2)))
txt2 = paste0("OR=", sapply(li_down, function(x) signif(fisher.test(x)$estimate,2)))
text(x=c(2,5,8), y=pl_down[2,]+0.05, labels = paste(txt, txt2, sep="\n") )    
mtext(text = paste0("n=", c(a,b,c)), at = c(2,5,8), side =1, cex=0.8, line = 2)      

                            
bp <- barplot(tab[,2]/rowSums(tab), beside=T, ylab = "frac. peaks with DEGs within 10kb", las=1,  
              legend=TRUE, ylim = c(0,0.5),col=col,
        args.legend =list(x = "topright", legend =  c('no.diff', 'diff.peak'), bty="n"))

txt = paste0("p=", signif(fisher.test(tab)$p.value,2))
txt2 = paste0("OR=", signif(fisher.test(tab)$estimate,2))
text(x=1, y=0.45, labels = paste(txt, txt2, sep="\n") )    
mtext("overall")
                            
 bp <- barplot(mm, beside=T, ylab = "frac. peaks with coacc DEGs", las=1,  
              legend=TRUE, ylim = c(0,0.3),
        args.legend =list(x = "topright", legend =  rownames(mm), bty="n"))

                           dev.off()

In [18]:
pl_al = cbind(pl_up[,2:3], pl_down[,2:3])
li_al = list(li_up[[2]], li_up[[3]],li_down[[2]], li_down[[3]] )

col = brewer.pal(4, 'Paired')

In [19]:
pdf("../cytokine_figures/Enrichemnt_DAC_DEG_promoters.pdf")
par(mfrow=c(2,2))

bp <- barplot(pl_al, beside=T, ylab = "frac. peaks with DEGs within 10kb TSS", las=1, 
               ylim = c(0,0.5),col=col[c(1,2,1,2,3,4,3,4)], space=c(0,0.5), 
              names.arg = c('Up', 'Down', 'Up', 'Down') )
        
xpos = bp[1,]+((bp[2,]-bp[1,])/2)
txt = paste0("p=", sapply(li_al, function(x) signif(fisher.test(x)$p.value,2)))
txt2 = paste0("OR=", sapply(li_al, function(x) signif(fisher.test(x)$estimate,2)))
text(x=xpos, y=pl_al[2,]+0.07, labels = paste(txt, txt2, sep="\n") )    
mtext(text = paste0(rep("genes",4)), at = xpos, side =1, cex=0.8, line = 2)      


plot.new()
legend("top", legend =  c( 'Not.DAC.up', 'DAC.up', 'Not.DAC.down', 'DAC.down'), pch=22, pt.bg=col, pt.cex=2)

                        
 dev.off()

- *Method 2* : assign genes to peaks based on coaccessibility

In [20]:
coacc1 = read.table('../snATAC/coaccess/untreated.beta.coacc.intersect.alsites')
coacc2 = read.table('../snATAC/coaccess/cytokine.beta.coacc.intersect.alsites')
coacc3 = read.table('../snATAC/coaccess/cyt_unt.beta.coacc.intersect.alsites')

In [61]:
###Used in the paper
hichip1 = read.table('../snATAC/coaccess/A471.EndoC_unt.pchich.promlab.fdr10.intersect.alsites')
hichip2 = read.table('../snATAC/coaccess/A471.EndoC_cyt.pchich.promlab.fdr10.intersect.alsites')

In [13]:
###new Hichip
#hichip1 = read.table('../snATAC/coaccess/EndoB_HiChIP_UNT_1_01102022_S51.pchich.promlab.intersect.alsites')
#hichip2 = read.table('../snATAC/coaccess/EndoB_HiChIP_HiCy_1_01102022_S53.pchich.promlab.intersect.alsites')

In [14]:
convert_coacc = function(coacc){
coacc = subset(coacc, V8=="CP")
coacc$peak = paste(coacc$V11,  coacc$V12,  coacc$V13, sep="-")
coacc$dist = coacc$V6 - coacc$V2
colnames(coacc)[9] = 'gene'
colnames(coacc)[10] = 'gene2'

coacc$distal = (coacc$gene=="." & coacc$V7=="A") | (coacc$gene2=="." & coacc$V7=="B")
coacc = subset(coacc, distal ==TRUE)

map  = coacc[,c('peak', 'dist', 'gene')]
map2 = coacc[,c('peak', 'dist', 'gene2')]
colnames(map2)[3] = c('gene')
map  = rbind(map, map2)
map  = map[!duplicated(map),]

map = subset(map, gene!=".")

commas = str_count(map$gene, ",")

for (cm in 0:4){
    if(cm==0){
        MAP = subset(map, commas ==0)
    }else{
        mp = subset(map, commas ==cm)
        mp2 = str_split_fixed(mp$gene, "\\,",cm+1)
        mp3 = data.frame(mp[,1:2],mp2,stringsAsFactors = F)
        mp4 = gather(mp3, "column", "gene", -(1:2))
        MAP = rbind(MAP, mp4 [,c('peak', 'gene', 'dist')])
    }
}

MAP = MAP[!duplicated(MAP), c('peak','gene','dist')]
MAP = subset(MAP, dist > 10000)
map_sub = MAP[MAP$gene %in% rownames(rna),]
    return(map_sub)
    }

In [15]:
map_unt = convert_coacc(hichip1)
map_cyt = convert_coacc(hichip2)

In [20]:
int_sub   = map_cyt
atac_sub  = atac
genes_sub = rna

In [21]:
tab       = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="both", atac_FC="up",distance =1000000)
tab_up    = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="up",   atac_FC="up",distance =1000000)
tab_down  = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="down", atac_FC="up",distance =1000000)

li = list(tab, tab_up, tab_down)
pl = cbind(tab[,2]/rowSums(tab) , tab_up[,2]/rowSums(tab_up) , tab_down[,2]/rowSums(tab_down))
colnames(pl) = c('All DEGs', 'Up-reg.', 'Down-reg.')

li_up = li
pl_up = pl

tab       = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="both", atac_FC="down",distance =1000000)
tab_up    = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="up",   atac_FC="down",distance =1000000)
tab_down  = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="down", atac_FC="down",distance =1000000)
li = list(tab, tab_up, tab_down)
pl = cbind(tab[,2]/rowSums(tab) , tab_up[,2]/rowSums(tab_up) , tab_down[,2]/rowSums(tab_down))
colnames(pl) = c('All DEGs', 'Up-reg.', 'Down-reg.')

li_down = li
pl_down = pl

tab       = fishtab(int=int_sub, atac=atac_sub, rna=genes_sub, FC="both", atac_FC="both")

mm = rbind(pl_down[2,], pl_up)
mm = mm[,2:3]
rownames(mm) = c("down", "no.diff", "up")



In [22]:
pl_al = cbind(pl_up[,2:3], pl_down[,2:3])
li_al = list(li_up[[2]], li_up[[3]],li_down[[2]], li_down[[3]] )

col = brewer.pal(4, 'Paired')

In [23]:
#pdf("../cytokine_figures/Enrichemnt_DAC_DEG_coacc_untr.pdf")
#pdf("../cytokine_figures/Enrichemnt_DAC_DEG_coacc_cyt.pdf")
#pdf("../cytokine_figures/Enrichemnt_DAC_DEG_coacc_cyt_unt.pdf")
#pdf("../cytokine_figures/Enrichemnt_DAC_DEG_Hichip_cyt.pdf")
pdf("../cytokine_figures/Enrichemnt_DAC_DEG_Hichip_cyt_newbatchdf10.pdf")
par(mfrow=c(2,2), xpd=TRUE)

bp <- barplot(pl_al, beside=T, ylab = "frac. peaks with coacces. DEGs 10-1000kb", las=1, 
               ylim = c(0,0.6),col=col[c(1,2,1,2,3,4,3,4)], space=c(0,0.5), 
              names.arg = c('Up', 'Down', 'Up', 'Down') )
        
xpos = bp[1,]+((bp[2,]-bp[1,])/2)
txt = paste0("p=", sapply(li_al, function(x) signif(fisher.test(x)$p.value,2)))
txt2 = paste0("OR=", sapply(li_al, function(x) signif(fisher.test(x)$estimate,2)))
text(x=xpos, y=pl_al[2,]+0.07, labels = paste(txt, txt2, sep="\n") )    
mtext(text = paste0(rep("genes",4)), at = xpos, side =1, cex=0.8, line = 2)      


plot.new()
legend("top", legend =  c( 'Not.DAC.up', 'DAC.up', 'Not.DAC.down', 'DAC.down'), pch=22, pt.bg=col, pt.cex=2)

                        
 dev.off()

### Find examples that are validated by HiChiP

In [101]:
suppressPackageStartupMessages(library(cicero))
suppressPackageStartupMessages(library(parallel))

In [None]:
### Here is a list of coaccessible gene-distal ATAC that have concordant differential expression and accessibility and that show both coaccessibility AND validation with HiCHIP in only one of the conditions.- for our examples.

In [165]:
split_commas = function(map, spl="gene" ,colkeep = c('peak','dist')){
    
    commas = str_count(map[,spl], ",")
   for (cm in 0:max(commas)){
    if(cm==0){
        MAP = subset(map, commas ==0)
    }else{
        mp = subset(map, commas ==cm)
        mp2 = str_split_fixed(mp[,spl], "\\,",cm+1)
        mp3 = data.frame(mp[,colkeep],mp2,stringsAsFactors = F)
        gath_cols= colnames(mp3)[!(colnames(mp3)%in%colkeep) ]
        mp4 = gather_(mp3, "column", spl, gather_cols=gath_cols)
        MAP = rbind(MAP[,c(colkeep,spl )], mp4 [, c(colkeep,spl )])
    }
}
 return(MAP)
}

In [103]:
cyt = read.table('/nfs/lab/projects/islet_cytok/analysis/hichip/cyt/A471_Arima_EndoC_Lo_S3_L002.5k.2.sig3Dinteractions.bedpe', header=T)
unt = read.table('/nfs/lab/projects/islet_cytok/analysis/hichip/unt/A471_Arima_EndoB_Lo_S1_L002.5k.2.sig3Dinteractions.bedpe', header=T)

cyt = data.frame(Peak1 = paste(cyt[,1], cyt[,2], cyt[,3], sep="_"),
                 Peak2 = paste(cyt[,4], cyt[,5], cyt[,6], sep="_"))

unt = data.frame(Peak1 = paste(unt[,1], unt[,2], unt[,3], sep="_"),
                 Peak2 = paste(unt[,4], unt[,5], unt[,6], sep="_"))

cyt = cyt[!duplicated(cyt),]
unt = unt[!duplicated(unt),]

In [113]:
coacc1$peak = paste(coacc1$V11,  coacc1$V12,  coacc1$V13, sep="-")
coacc1$dist = coacc1$V6 - coacc1$V2
coac_untr      = subset(coacc1,   ((V9=="." & V7=="A") | (V10=="." & V7=="B"))& V8=="CP" )
coacc2$peak = paste(coacc2$V11,  coacc2$V12,  coacc2$V13, sep="-")
coacc2$dist = coacc2$V6 - coacc2$V2
coac_cyt      = subset(coacc2,   ((V9=="." & V7=="A") | (V10=="." & V7=="B"))& V8=="CP" )

In [114]:
coac_cyt = data.frame(Peak1 = paste(coac_cyt[,1], coac_cyt[,2], coac_cyt[,3], sep="_"),
                 Peak2 = paste(coac_cyt[,4], coac_cyt[,5], coac_cyt[,6], sep="_"), coac_cyt[,c(9,10,14,15)] )

coac_untr = data.frame(Peak1 = paste(coac_untr[,1], coac_untr[,2], coac_untr[,3], sep="_"),
                 Peak2 = paste(coac_untr[,4], coac_untr[,5], coac_untr[,6], sep="_"), coac_untr[,c(9,10,14,15)] )


In [116]:
coac_cyt$hichip_cyt = compare_connections(coac_cyt[,1:2], cyt, maxgap=1000)
coac_cyt$hichip_unt = compare_connections(coac_cyt[,1:2], unt, maxgap=1000)
coac_cyt$coac_unt = compare_connections(coac_cyt[,1:2], coac_untr[,1:2], maxgap=1000)

coac_cyt_uni = subset(coac_cyt,hichip_cyt==T & hichip_unt==F &coac_unt==F)

coac_cyt_uni = coac_cyt_uni[!duplicated(coac_cyt_uni),]

coac_cyt_uni$gene = apply(coac_cyt_uni[,3:4], 1, function(x) x[x!="."])

ccu = split_commas(coac_cyt_uni, colkeep=c('Peak1','Peak2', 'peak','dist','hichip_cyt','hichip_unt','coac_unt'))

In [172]:
ccu = merge(ccu, atac[,c(1,2,6)], by.x="peak", by.y="row.names")
ccu = merge(ccu, rna[,c(1,2,6)], by.x="gene", by.y="row.names")

fctab_up = subset(ccu, log2FoldChange.x>0 & log2FoldChange.y>0 & padj.x<0.1 &padj.y<0.1)
fctab_up = fctab_up[with(fctab_up, order(padj.y,padj.x)),]

In [190]:
write.csv(fctab_up, "Coaccessible_concordant_validated_examples_up.csv")

In [180]:
sur = readLines('/nfs/lab/projects/islet_cytok/analysis/crispr_screen/Down_genes_cyt.txt')

In [181]:
fctab_up$screen = fctab_up$gene %in% sur

In [185]:
coac_untr$hichip_cyt = compare_connections(coac_untr[,1:2], cyt, maxgap=1000)
coac_untr$hichip_unt = compare_connections(coac_untr[,1:2], unt, maxgap=1000)
coac_untr$coac_cyt = compare_connections(coac_untr[,1:2], coac_cyt[,1:2], maxgap=1000)

coac_untr_uni = subset(coac_untr,hichip_cyt==F & hichip_unt==T &coac_cyt==F)
coac_untr_uni = coac_untr_uni[!duplicated(coac_untr_uni),]

coac_untr_uni$gene = apply(coac_untr_uni[,3:4], 1, function(x) x[x!="."])

cuu = split_commas(coac_untr_uni, colkeep=c('Peak1','Peak2', 'peak','dist','hichip_cyt','hichip_unt','coac_cyt'))

In [186]:
cuu = merge(cuu, atac[,c(1,2,6)], by.x="peak", by.y="row.names")
cuu = merge(cuu, rna[,c(1,2,6)], by.x="gene", by.y="row.names")

fctab_do = subset(cuu, log2FoldChange.x<0 & log2FoldChange.y<0 & padj.x<0.1 &padj.y<0.1)
fctab_do = fctab_do[with(fctab_do, order(padj.y,padj.x),]

In [187]:
fctab_do$screen = fctab_do$gene %in% readLines('/nfs/lab/projects/islet_cytok/analysis/crispr_screen/Up_genes_cyt.txt')

In [191]:
write.csv(fctab_do, "Coaccessible_concordant_validated_examples_down.csv")

### Plot Connection examples 

In [4]:
suppressPackageStartupMessages(library(data.table))

In [5]:
gene_annotation <- read.table('/home/joshchiou/references/gencode.v19.cicero_gene_annotation.txt', header=T, stringsAsFactors=FALSE)
gene_annotation <- gene_annotation[gene_annotation[['feature']]=='protein_coding',]

In [6]:
cells = c('untreated','cytokine')

In [7]:
coacc_dir = '/nfs/lab/projects/islet_cytok/analysis/snATAC/coaccess'

In [205]:
for (cell in cells){

file = paste(coacc_dir, cell,  '5kb_all/promLabel/beta.cicero_conns_dedup.promLabelled.txt', sep="/")
conns = fread(file, data.table = F)
coacc = subset(conns, coaccess >= 0.05)
cic = coacc[,1:3]
assign(cell, cic)
}

In [208]:
cp_cyt = coac_cyt[!duplicated(coac_cyt[,1:2]),1:2]
cp_unt = coac_untr[!duplicated(coac_untr[,1:2]),1:2]

In [209]:
cp_cyt = merge(cp_cyt, cytokine, by=1:2) 
cp_unt = merge(cp_unt, untreated, by=1:2) 

In [216]:
cyt$coaccess=0.5
unt$coaccess=0.5

In [275]:
pdf("MNX_untcyt_prom_cp_new.pdf")
plot_connections(cp_unt, "chr7", 156781418,157016341, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr7_156781418_156803890',
                gene_model = gene_annotation, 
                 comparison_track = cp_cyt)
dev.off()

In [379]:
pdf("MNX_untcyt_enh_cp_new.pdf")
plot_connections(cp_unt, "chr7", 156781418,157016341, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr7_157001443_157002210',
                gene_model = gene_annotation, 
                 comparison_track = cp_cyt)
dev.off()

"No comparison connections in viewpoint"
"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [232]:
pdf("SOCS1_untcyt_enh_cp_new.pdf")
plot_connections(cp_cyt, "chr16", 11340000,11420000,alpha_by_coaccess = TRUE,
                 viewpoint = 'chr16_11401017_11401743',
                gene_model = gene_annotation,  
                 comparison_track = cp_unt )
dev.off()

"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [269]:
pdf("SOCS1_untcyt_prom_cp_new.pdf")
plot_connections(cp_cyt, "chr16", 11340000,11420000,alpha_by_coaccess = TRUE,
                 viewpoint = 'chr16_11340000_11352536',
                gene_model = gene_annotation,  
                 comparison_track = cp_unt )
dev.off()

"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [262]:

# pdf("Examples_validated_coacc_up.pdf")
# for (i in 1:12){
# coord = str_split(fctab_up$peak[i],"-", 3,simplify = TRUE)
# cr = coord[1]
# start = as.numeric(coord[2])- fctab_up$dist[i]-10000
# end = as.numeric(coord[2])+ fctab_up$dist[i]+10000
# vp = gsub("-", "_",fctab_up$peak[i] )
#  plot_connections(cp_cyt, cr, start,end,alpha_by_coaccess = TRUE,
#                  viewpoint = vp,
#                 gene_model = gene_annotation,  
#                  comparison_track = cp_unt )
    
# }
# dev.off()


In [286]:
fctab_up[fctab_up$gene=="BCL6",]

Unnamed: 0_level_0,gene,peak,Peak1,Peak2,dist,hichip_cyt,hichip_unt,coac_unt,baseMean.x,log2FoldChange.x,padj.x,baseMean.y,log2FoldChange.y,padj.y,screen
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<fct>,<int>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
19,BCL6,chr3-187724421-187725031,chr3_187463067_187463728,chr3_187724371_187725011,261944,True,False,False,230.8912,0.7279962,4.606726e-05,1019.504,0.9964662,3.351261e-24,False


In [375]:
pdf("BCL6_untcyt_enh_cp_new.pdf")
plot_connections(cp_cyt, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr3_187724421_187725031',
                gene_model = gene_annotation,  
                 comparison_track = cp_unt )
dev.off()

"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [376]:
pdf("BCL6_untcyt_prom_cp_new.pdf")
plot_connections(cp_cyt, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr3_187463067_187463728',
                gene_model = gene_annotation,  
                 comparison_track = cp_unt )
dev.off()

"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [378]:
fctab_do[fctab_do$gene=="MNX1",]

Unnamed: 0_level_0,gene,peak,Peak1,Peak2,dist,hichip_cyt,hichip_unt,coac_cyt,baseMean.x,log2FoldChange.x,padj.x,baseMean.y,log2FoldChange.y,padj.y,screen
Unnamed: 0_level_1,<chr>,<chr>,<fct>,<fct>,<int>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
94,MNX1,chr7-157001443-157002210,chr7_156803006_156803890,chr7_157001442_157002194,199188,False,True,False,401.1572,-1.1237771,4.409097e-17,422.1425,-0.8731248,1.737599e-13,False
93,MNX1,chr7-156921910-156922208,chr7_156803006_156803890,chr7_156921914_156922153,119147,False,True,False,32.71712,-0.8025611,0.0954501,422.1425,-0.8731248,1.737599e-13,False


### Plot also the virtual 4C as arcs

In [8]:
files = list.files("bedgraph")

In [1]:
mat = matrix(c(  'MNX1_prom' , 'MNX1_enh'  ,'BCL6_prom', 'BCL6_enh', 'SOCS1_prom', 'SOCS1_enh',
                'chr7_156781418_156803890', 'chr7_157001443_157002210',
                'chr3_187463067_187463728', 'chr3_187724421_187725031',
                'chr16_11340000_11352536',  'chr16_11401017_11401743'), ncol=2)

In [2]:
colnames(mat) = c("locus", "Peak1")

In [12]:
nm = str_split_fixed(files, "\\.",3)

In [13]:
v4c = data.frame()
for (x in 1:length(files)){
   
f1 = read.table(paste0("bedgraph/", files[x]))
f1$Peak2    = paste(f1$V1,f1$V2,f1$V3, sep="_" )
f1$score = f1$V4
f1$locus = nm[x,1]
f1$condition = nm[x,2]
v4c = rbind(v4c, f1)
    }

In [14]:
v4c = merge(mat,v4c, by="locus" )

In [17]:
range01 <- function(x){(x-min(x))/(max(x)-min(x))}

In [18]:
sp = split(v4c, v4c$locus)

In [19]:
v4c$coaccess =unlist(lapply(sp, function(x) range01(x$score )))

In [454]:
a = v4c[ v4c$locus =='MNX1_enh' & v4c$condition =='unt'  , c('Peak1', 'Peak2', 'coaccess')]
b = v4c[ v4c$locus =='SOCS1_enh' & v4c$condition =='cyt'  , c('Peak1', 'Peak2', 'coaccess')]
c = v4c[ v4c$locus =='MNX1_prom' & v4c$condition =='unt'  , c('Peak1', 'Peak2', 'coaccess')]
d = v4c[ v4c$locus =='MNX1_prom' & v4c$condition =='cyt'  , c('Peak1', 'Peak2', 'coaccess')]

In [390]:
pdf("MNX_untcyt_enh_cp_v4c.pdf")
plot_connections(a, "chr7", 156781418,157016341, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr7_157001443_157002210',
                  connection_color="forestgreen",comparison_connection_color="forestgreen",
                gene_model = gene_annotation, comparison_track = b)
dev.off()

"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [360]:
pdf("MNX_untcyt_prom_cp_v4c.pdf")
plot_connections(c, "chr7", 156781418,157016341, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr7_156781418_156803890',
                gene_model = gene_annotation, 
                 connection_color="forestgreen",comparison_connection_color="forestgreen",
                 comparison_track = d)
dev.off()

In [437]:
a = v4c[ v4c$locus =='SOCS1_enh' & v4c$condition =='cyt'  , c('Peak1', 'Peak2', 'coaccess')]
b = v4c[ v4c$locus =='SOCS1_enh' & v4c$condition =='unt'  , c('Peak1', 'Peak2', 'coaccess')]
c = v4c[ v4c$locus =='SOCS1_prom' & v4c$condition =='cyt'  , c('Peak1', 'Peak2', 'coaccess')]
d = v4c[ v4c$locus =='SOCS1_prom' & v4c$condition =='unt'  , c('Peak1', 'Peak2', 'coaccess')]

In [366]:
pdf("SOCS1_untcyt_enh_cp_v4c.pdf")
plot_connections(a, "chr16", 11340000,11420000,alpha_by_coaccess = TRUE,
                 viewpoint = 'chr16_11401017_11401743',
                gene_model = gene_annotation,  
                  connection_color="forestgreen",comparison_connection_color="forestgreen",
                 comparison_track = b )
dev.off()

"no non-missing arguments to max; returning -Inf"
"comparison_ymax calc failed"
"Nothing to plot"


In [367]:
pdf("SOCS1_untcyt_prom_cp_v4c.pdf")
plot_connections(c, "chr16", 11340000,11420000,alpha_by_coaccess = TRUE,
                 viewpoint = 'chr16_11340000_11352536',
                gene_model = gene_annotation,  
                  connection_color="forestgreen",comparison_connection_color="forestgreen",
                 comparison_track = d )
dev.off()

In [451]:
a = v4c[ v4c$locus =='BCL6_enh' & v4c$condition =='cyt'  , c('Peak1', 'Peak2', 'coaccess')]
b = v4c[ v4c$locus =='BCL6_enh' & v4c$condition =='unt'  , c('Peak1', 'Peak2', 'coaccess')]
c = v4c[ v4c$locus =='BCL6_prom' & v4c$condition =='cyt'  , c('Peak1', 'Peak2', 'coaccess')]
d = v4c[ v4c$locus =='BCL6_prom' & v4c$condition =='unt'  , c('Peak1', 'Peak2', 'coaccess')]

In [373]:
pdf("BCL6_untcyt_enh_cp_v4c.pdf")
plot_connections(a, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr3_187724421_187725031',
                gene_model = gene_annotation,  
                 connection_color="forestgreen",comparison_connection_color="forestgreen",
                 comparison_track = b )
dev.off()

In [374]:
pdf("BCL6_untcyt_prom_cp_v4c.pdf")
plot_connections(c, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE,
                 viewpoint = 'chr3_187463067_187463728',
                gene_model = gene_annotation,  
                 connection_color="forestgreen",comparison_connection_color="forestgreen",
                 comparison_track = d )
dev.off()

In [452]:
A=plot_connections(a, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE, connection_color=4,return_as_list = TRUE,gene_model = gene_annotation)
B=plot_connections(b, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE, connection_color=4,return_as_list = TRUE)

CY=plot_connections(cp_cyt, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE,return_as_list = TRUE)
UN=plot_connections(cp_cyt, 'chr3', 187410164, 187750682, alpha_by_coaccess = TRUE, return_as_list = TRUE)

plot_list= list(CY[[1]], UN[[1]], A[[1]], B[[1]],  A[[3]])

In [453]:
pdf("BCL6.pdf",width = 5, height = 2.5)

Gviz::plotTracks(plot_list,
               # sizes = c(2,2,2,2,1,1),
                from = 187410164, to = 187750682, chromosome = "chr3", 
                transcriptAnnotation = "symbol",
                col.axis = "black", cex.axis=1 ,
                fontsize.group = 6,
                fontcolor.legend = "black",
                title.width = 2, background.title = "transparent", 
                col.border.title = "transparent")
               dev.off()

In [441]:
A=plot_connections(a, "chr16", 11340000,11420000, alpha_by_coaccess = TRUE, connection_color=4,return_as_list = TRUE,gene_model = gene_annotation)

CY=plot_connections(cp_cyt, "chr16", 11340000,11420000, alpha_by_coaccess = TRUE,return_as_list = TRUE)


In [444]:
plot_list= list(CY[[1]], CY[[1]], A[[1]], A[[1]],  A[[3]])

In [449]:
pdf("SOCS1.pdf", width = 5, height = 2.5)

Gviz::plotTracks(plot_list,
                #sizes = c(2,2,2,2,1,1),
                from = 11340000, to = 11420000, chromosome = "chr16", 
                transcriptAnnotation = "symbol",
                col.axis = "black", cex.axis=1 ,
                fontsize.group = 6,
                fontcolor.legend = "black",
                title.width = 2, background.title = "transparent", 
                col.border.title = "transparent")
               dev.off()

In [455]:
A=plot_connections(a, "chr7", 156781418,157016341, alpha_by_coaccess = TRUE, connection_color=4,return_as_list = TRUE,gene_model = gene_annotation)

UN=plot_connections(cp_unt, "chr7", 156781418,157016341, alpha_by_coaccess = TRUE,return_as_list = TRUE)


In [456]:
plot_list= list(UN[[1]], UN[[1]], A[[1]], A[[1]],  A[[3]])

In [457]:

pdf("MNX1.pdf", width = 5, height = 2.5)

Gviz::plotTracks(plot_list,
                #sizes = c(2,2,2,2,1,1),
                from = 156781418, to = 157016341, chromosome = "chr7", 
                transcriptAnnotation = "symbol",
                col.axis = "black", cex.axis=1 ,
                fontsize.group = 6,
                fontcolor.legend = "black",
                title.width = 2, background.title = "transparent", 
                col.border.title = "transparent")
               dev.off()



### Socs1 value for paper


In [36]:
x=16
cyt_dir = '/nfs/lab/mokino/SP21/endoC_HiChIP/17may21_cyt_newpeaks/MAPS_output/A471_Arima_EndoC_Lo_S3_L002_current/'
ori = read.table(paste0(cyt_dir, 'reg_raw.chr', x,'.A471_Arima_EndoC_Lo_S3_L002.5k.and.MAPS2_pospoisson'))

sub = subset(ori, bin1_mid> 11340000 & bin2_mid < 11402000)

In [37]:
sub$Peak = paste("chr16", sub$bin1_mid , sub$bin1_mid + 5000, sep="_")

In [45]:
head(sub)

Unnamed: 0_level_0,bin1_mid,bin2_mid,count,X1D_peak_bin1,X1D_peak_bin2,effective_length1,gc1,mappability1,short_count1,effective_length2,⋯,logShortCount,chr,expected,p_val,expected2,ratio2,p_val_reg2,p_bonferroni,fdr,Peak
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<int>,<int>,⋯,<dbl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
420515,11345000,11390000,13,1,1,5520,0.5669,0.9765,104,5081,⋯,-2.723959,chr16,12.46823,0.368709668,11.549802,1.1255604,0.2719631555,891663.025,0.801573409,chr16_11345000_11350000
420614,11350000,11390000,51,1,1,5154,0.5414,0.9828,400,5081,⋯,-1.383958,chr16,33.04529,0.001385866,29.538099,1.7265837,0.0001169019,383.2765,0.008351577,chr16_11350000_11355000
420715,11370000,11390000,30,1,1,5118,0.5238,0.8978,91,5081,⋯,-2.856131,chr16,21.66115,0.034239708,20.488465,1.4642385,0.0180766183,59266.3079,0.320198765,chr16_11370000_11375000
426815,11345000,11400000,11,1,1,5520,0.5669,0.9765,104,5627,⋯,-3.427415,chr16,6.6932,0.040732043,6.310895,1.7430175,0.0280974804,92120.8768,0.408896388,chr16_11345000_11350000
426915,11350000,11400000,28,1,1,5154,0.5414,0.9828,400,5627,⋯,-2.087414,chr16,17.42389,0.006855663,15.837987,1.7679015,0.0018963322,6217.347,0.068979462,chr16_11350000_11355000
427012,11370000,11400000,7,1,1,5118,0.5238,0.8978,91,5627,⋯,-3.559587,chr16,10.09395,0.788155949,9.68398,0.7228433,0.750002261,2458970.163,0.842282792,chr16_11370000_11375000


In [39]:
m = merge(v4c, sub, by.x="Peak2", by.y="Peak")

In [46]:
head(v4c)

Unnamed: 0_level_0,locus,Peak1,V1,V2,V3,V4,Peak2,score,condition,coaccess
Unnamed: 0_level_1,<fct>,<fct>,<fct>,<int>,<int>,<int>,<chr>,<int>,<chr>,<dbl>
1,BCL6_enh,chr3_187724421_187725031,chr3,187465000,187470000,7,chr3_187465000_187470000,7,cyt,0.33333333
2,BCL6_enh,chr3_187724421_187725031,chr3,188010000,188015000,3,chr3_188010000_188015000,3,cyt,0.11111111
3,BCL6_enh,chr3_187724421_187725031,chr3,188010000,188015000,3,chr3_188010000_188015000,3,unt,0.11111111
4,BCL6_enh,chr3_187724421_187725031,chr3,188150000,188155000,2,chr3_188150000_188155000,2,cyt,0.05555556
5,BCL6_enh,chr3_187724421_187725031,chr3,187870000,187875000,2,chr3_187870000_187875000,2,cyt,0.05555556
6,BCL6_enh,chr3_187724421_187725031,chr3,187465000,187470000,8,chr3_187465000_187470000,8,unt,0.38888889


In [43]:
m= subset(m, locus=="SOCS1_enh" & coaccess >0.1 & bin2_mid > 11397000)

In [44]:
m[,]

Unnamed: 0,2,5,12,15
Peak2,chr16_11345000_11350000,chr16_11350000_11355000,chr16_11370000_11375000,chr16_11390000_11395000
locus,SOCS1_enh,SOCS1_enh,SOCS1_enh,SOCS1_enh
Peak1,chr16_11401017_11401743,chr16_11401017_11401743,chr16_11401017_11401743,chr16_11401017_11401743
V1,chr16,chr16,chr16,chr16
V2,11345000,11350000,11370000,11390000
V3,11350000,11355000,11375000,11395000
V4,11,28,7,48
score,11,28,7,48
condition,cyt,cyt,cyt,cyt
coaccess,0.2127660,0.5744681,0.1276596,1.0000000


In [58]:
m[,c(11:13,16,19,20,23, 31:35,37)]

Unnamed: 0_level_0,bin1_mid,bin2_mid,count,effective_length1,short_count1,effective_length2,short_count2,expected,p_val,expected2,ratio2,p_val_reg2,fdr
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2,11345000,11400000,11,5520,104,5627,192,6.6932,0.040732043,6.310895,1.7430175,0.02809748,0.40889639
5,11350000,11400000,28,5154,400,5627,192,17.42389,0.006855663,15.837987,1.7679015,0.001896332,0.06897946
12,11370000,11400000,7,5118,91,5627,192,10.09395,0.788155949,9.68398,0.7228433,0.750002261,0.84228279
15,11390000,11400000,48,5081,389,5627,192,54.70552,0.797471749,51.150272,0.9384114,0.636932224,0.80394023


In [59]:
m[,c(11:13,19,23, 31:35,37)]

Unnamed: 0_level_0,bin1_mid,bin2_mid,count,short_count1,short_count2,expected,p_val,expected2,ratio2,p_val_reg2,fdr
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2,11345000,11400000,11,104,192,6.6932,0.040732043,6.310895,1.7430175,0.02809748,0.40889639
5,11350000,11400000,28,400,192,17.42389,0.006855663,15.837987,1.7679015,0.001896332,0.06897946
12,11370000,11400000,7,91,192,10.09395,0.788155949,9.68398,0.7228433,0.750002261,0.84228279
15,11390000,11400000,48,389,192,54.70552,0.797471749,51.150272,0.9384114,0.636932224,0.80394023
