**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


## Import annotation data

In [2]:
### setup file path
fdiry = file.path(
    FD_RES, 
    "results", 
    "region", 
    "KS91_K562_ASTARRseq_peak_macs_input", 
    "summary")
fnames = dir(fdiry)

### setup labels for annotation file
lst = str_split(fnames, "\\.") 
lst = lapply(lst, function(vec){
    res = vec[-c(1, 2, length(vec))]
    res = paste(res, collapse=".")
    return(res)
})
vec = unlist(lst)

### import data
lst = lapply(fnames, function(fname){
    fpath = file.path(fdiry, fname)
    dat   = read_tsv(fpath, show_col_types = FALSE)
    return(dat)
})
names(lst) = vec

### assign and show
lst_dat_annot = lst
print(names(lst))
head(lst[[1]])

 [1] "ccre_v3"                         "ccre_v4"                        
 [3] "chipseq_histone"                 "chipseq_tf_subset"              
 [5] "chipseq_tf"                      "chromHMM"                       
 [7] "crispri_e2g"                     "crispri_growth_dhs"             
 [9] "crispri_hcrff_casa"              "enhancer_zscore_junke.ASTARR_A" 
[11] "enhancer_zscore_junke.ASTARR_AB" "enhancer_zscore_junke.ASTARR_R" 
[13] "enhancer_zscore_junke.ASTARR"    "enhancer_zscore_junke.LMPRA_A"  
[15] "enhancer_zscore_junke.LMPRA_AB"  "enhancer_zscore_junke.LMPRA_R"  
[17] "enhancer_zscore_junke.LMPRA"     "enhancer_zscore_junke.STARRMPRA"
[19] "enhancer_zscore_junke.TMPRA_A"   "enhancer_zscore_junke.TMPRA_R"  
[21] "enhancer_zscore_junke.TMPRA"     "enhancer_zscore_junke.WSTARR_A" 
[23] "enhancer_zscore_junke.WSTARR_AB" "enhancer_zscore_junke.WSTARR_R" 
[25] "enhancer_zscore_junke.WSTARR"    "tss_pol2"                       


Peak,Annotation,Label,Count,Region,Score,Note
<chr>,<chr>,<chr>,<dbl>,<chr>,<dbl>,<chr>
chr1:180982-182087,ccre_v3,DNase-only,1,chr1:181251-181601,0,EH38E1310153:DNase-only
chr1:777949-779437,ccre_v3,PLS,1,chr1:779086-779355,0,EH38E1310159:PLS
chr1:777949-779437,ccre_v3,"PLS,CTCF-bound",1,chr1:778562-778912,0,"EH38E1310158:PLS,CTCF-bound"
chr1:816774-817547,ccre_v3,Low-DNase,1,chr1:817080-817403,0,EH38E1310166:Low-DNase
chr1:817905-818348,ccre_v3,Low-DNase,1,chr1:817903-818252,0,EH38E1310167:Low-DNase
chr1:818602-819380,ccre_v3,Low-DNase,1,chr1:818718-818872,0,EH38E1310168:Low-DNase


## Setup groups for annotation

In [3]:
vec = c(
    "ASTARR_AB", "ASTARR_A", "ASTARR_R", 
    "WSTARR_AB", "WSTARR_A", "WSTARR_R", 
                  "TMPRA_A",  "TMPRA_R",
     "LMPRA_AB",  "LMPRA_A",  "LMPRA_R")
vec = paste("enhancer_zscore_junke", vec, sep=".")

vec = c(
    vec,
    "crispri_hcrff_casa", 
    "crispri_growth_dhs",
    "crispri_e2g"
)
vec = c(
    vec,
    "tss_pol2", 
    "ccre_v3", 
    "ccre_v4", 
    "chromHMM"
)

vec_annot_genomic = vec
print(vec)

 [1] "enhancer_zscore_junke.ASTARR_AB" "enhancer_zscore_junke.ASTARR_A" 
 [3] "enhancer_zscore_junke.ASTARR_R"  "enhancer_zscore_junke.WSTARR_AB"
 [5] "enhancer_zscore_junke.WSTARR_A"  "enhancer_zscore_junke.WSTARR_R" 
 [7] "enhancer_zscore_junke.TMPRA_A"   "enhancer_zscore_junke.TMPRA_R"  
 [9] "enhancer_zscore_junke.LMPRA_AB"  "enhancer_zscore_junke.LMPRA_A"  
[11] "enhancer_zscore_junke.LMPRA_R"   "crispri_hcrff_casa"             
[13] "crispri_growth_dhs"              "crispri_e2g"                    
[15] "tss_pol2"                        "ccre_v3"                        
[17] "ccre_v4"                         "chromHMM"                       


In [4]:
lst_annot_group = list(
    "annotation_genomic"           = vec_annot_genomic,
    "annotation_chipseq_histone"   = c("chipseq_histone"),
    "annotation_chipseq_tf_subset" = c("chipseq_tf_subset"),
    "annotation_chipseq_tf"        = c("chipseq_tf")
)

## Save results

In [5]:
folders = names(lst_annot_group)
for (folder in folders){
    ###
    vec_annot_name = lst_annot_group[[folder]]
    
    ###
    cat(folder, "\n")
    print(vec_annot_name)
    cat("\n")
    flush.console()
    
    ###
    lst_dat = lst_dat_annot[vec_annot_name]
    
    for (dat in lst_dat){
        
        ###
        lst = split(dat$Peak, dat$Label)
        
        ###
        for (idx in names(lst)){
            
            ###
            ant = unique(dat$Annotation)
            txt = paste(ant, idx, sep=".")
            txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
            cat("Annotation-Label:", txt, "\n")
            
            ###
            fdiry = file.path(
                FD_RES, 
                "results", 
                "region", 
                "KS91_K562_ASTARRseq_peak_macs_input", 
                "enrichment_prepare_annotation",
                folder)
            fname = paste("annot", txt, "rds", sep=".")
            fpath = file.path(fdiry, fname)
            print(fname)
            
            ###
            txt = paste(ant, idx, sep=":")
            lst_annot = lst[idx]
            names(lst_annot) = txt
            saveRDS(lst_annot, file = fpath)
            print(txt)
            
            ###
            flush.console()
        }
    }
    cat("\n")
    
}

annotation_genomic 
 [1] "enhancer_zscore_junke.ASTARR_AB" "enhancer_zscore_junke.ASTARR_A" 
 [3] "enhancer_zscore_junke.ASTARR_R"  "enhancer_zscore_junke.WSTARR_AB"
 [5] "enhancer_zscore_junke.WSTARR_A"  "enhancer_zscore_junke.WSTARR_R" 
 [7] "enhancer_zscore_junke.TMPRA_A"   "enhancer_zscore_junke.TMPRA_R"  
 [9] "enhancer_zscore_junke.LMPRA_AB"  "enhancer_zscore_junke.LMPRA_A"  
[11] "enhancer_zscore_junke.LMPRA_R"   "crispri_hcrff_casa"             
[13] "crispri_growth_dhs"              "crispri_e2g"                    
[15] "tss_pol2"                        "ccre_v3"                        
[17] "ccre_v4"                         "chromHMM"                       

Annotation-Label: Enhancer_ZScore.ASTARR_AB 
[1] "annot.Enhancer_ZScore.ASTARR_AB.rds"
[1] "Enhancer_ZScore:ASTARR_AB"
Annotation-Label: Enhancer_ZScore.ASTARR_A 
[1] "annot.Enhancer_ZScore.ASTARR_A.rds"
[1] "Enhancer_ZScore:ASTARR_A"
Annotation-Label: Enhancer_ZScore.ASTARR_R 
[1] "annot.Enhancer_ZScore.ASTARR_R.rds"
[1

In [12]:
folder  = "enrichment_prepare_annot_others"
lst_dat = lst_dat_annot[vec]

for (dat in lst_dat){
    lst = split(dat$Peak, dat$Label)
    ant = unique(dat$Annotation)
    
    cat("Annotation:", ant, "\n")
    for (idx in names(lst)){
        
        fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
        fname = paste("annot", ant, idx, "rds", sep=".")
        fname = str_replace_all(fname, pattern = "-|,|/", replacement = "_")
        fpath = file.path(fdiry, fname)
        print(fname)

        txt = paste(ant, idx, sep=":")
        print(txt)
    }
}

Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.ASTARR_AB.rds"
[1] "Enhancer_ZScore:ASTARR_AB"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.ASTARR_A.rds"
[1] "Enhancer_ZScore:ASTARR_A"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.ASTARR_R.rds"
[1] "Enhancer_ZScore:ASTARR_R"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.WSTARR_AB.rds"
[1] "Enhancer_ZScore:WSTARR_AB"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.WSTARR_A.rds"
[1] "Enhancer_ZScore:WSTARR_A"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.WSTARR_R.rds"
[1] "Enhancer_ZScore:WSTARR_R"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.TMPRA_A.rds"
[1] "Enhancer_ZScore:TMPRA_A"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.TMPRA_R.rds"
[1] "Enhancer_ZScore:TMPRA_R"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.LMPRA_AB.rds"
[1] "Enhancer_ZScore:LMPRA_AB"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.LMPRA_A.rds"
[1] "Enhancer_ZScore:L

In [14]:
folder  = "enrichment_prepare_annot_others"
lst_dat = lst_dat_annot[vec]

for (dat in lst_dat){
    lst = split(dat$Peak, dat$Label)
    ant = unique(dat$Annotation)
    
    cat("Annotation:", ant, "\n")
    for (idx in names(lst)){
        
        fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
        fname = paste("annot", ant, idx, "rds", sep=".")
        fname = str_replace_all(fname, pattern = "-|,|/", replacement = "_")
        fpath = file.path(fdiry, fname)
        print(fname)

        txt = paste(ant, idx, sep=":")
        print(txt)
        
        tmp = lst[idx]
        names(tmp) = txt
        saveRDS(tmp, file = fpath)
    }
}

Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.ASTARR_AB.rds"
[1] "Enhancer_ZScore:ASTARR_AB"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.ASTARR_A.rds"
[1] "Enhancer_ZScore:ASTARR_A"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.ASTARR_R.rds"
[1] "Enhancer_ZScore:ASTARR_R"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.WSTARR_AB.rds"
[1] "Enhancer_ZScore:WSTARR_AB"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.WSTARR_A.rds"
[1] "Enhancer_ZScore:WSTARR_A"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.WSTARR_R.rds"
[1] "Enhancer_ZScore:WSTARR_R"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.TMPRA_A.rds"
[1] "Enhancer_ZScore:TMPRA_A"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.TMPRA_R.rds"
[1] "Enhancer_ZScore:TMPRA_R"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.LMPRA_AB.rds"
[1] "Enhancer_ZScore:LMPRA_AB"
Annotation: Enhancer_ZScore 
[1] "annot.Enhancer_ZScore.LMPRA_A.rds"
[1] "Enhancer_ZScore:L

In [3]:
folder = "enrichment_prepare_annot_others"
fdiry  = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
dir(fdiry)

In [5]:
folder = "enrichment_prepare_annot_others"
fdiry  = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
print(fdiry)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/enrichment_prepare_annot_others"


In [None]:
folder  = "enrichment_prepare_annot_others"
lst_dat = list(
    dat_peak_annot_casa,
    dat_peak_annot_enh,
    dat_peak_annot_enh_zscore,
    dat_peak_annot_ccre_v3,
    dat_peak_annot_ccre_v4,
    dat_peak_annot_tss_pol2,
    dat_peak_annot_chromhmm
)

for (dat in lst_dat){
    lst = split(dat$Peak, dat$Label)
    ant = unique(dat$Annotation)
    
    cat("Annotation:", ant, "\n")
    for (idx in names(lst)){
        
        txt = paste(ant, idx, sep="_")
        txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
        txt = paste("annot", txt, sep="_")
        txt = paste(txt,   "rds", sep=".")
        
        fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
        fname = txt
        fpath = file.path(fdiry, fname)
        print(fname)

        txt = paste(ant, idx, sep=":")
        print(txt)
    }
}

## Import data

**Import annotatoin: CRISPR CASA peaks**

In [2]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.crispri_hcrff_casa.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)
dat = dat %>% dplyr::mutate(Label = "CRISPR_CASA")

dat_peak_annot_casa = dat
print(dim(dat))
head(dat)

[1] 57  5


Peak,Annotation,Label,Count,Note
<chr>,<chr>,<chr>,<dbl>,<chr>
chr11_33868682_33871379,CRISPRi_HCRFF,CRISPR_CASA,1,LMO2
chr11_33880631_33881416,CRISPRi_HCRFF,CRISPR_CASA,1,LMO2
chr11_33881831_33882405,CRISPRi_HCRFF,CRISPR_CASA,1,LMO2
chr11_33936760_33937819,CRISPRi_HCRFF,CRISPR_CASA,1,LMO2
chr11_33941500_33942221,CRISPRi_HCRFF,CRISPR_CASA,1,LMO2
chr11_33944510_33945297,CRISPRi_HCRFF,CRISPR_CASA,3,CAPRIN1|CAT|LMO2


**Import annotatoin: Junke's Enhancer calls**

In [3]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.enhancer_junke_peak_starr.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_enh = dat
print(dim(dat))
head(dat)

[1] 29012     5


Peak,Annotation,Label,Count,Note
<chr>,<chr>,<chr>,<dbl>,<chr>
chr10_100009096_100010466,Enhancer,Enhancer_STARR,2,WSTARR|WSTARR
chr10_100185017_100187275,Enhancer,Enhancer_STARR,1,WSTARR
chr10_100228452_100230090,Enhancer,Enhancer_STARR,1,ASTARR
chr10_100267066_100268374,Enhancer,Enhancer_STARR,2,ASTARR|WSTARR
chr10_100285974_100287341,Enhancer,Enhancer_STARR,2,WSTARR|WSTARR
chr10_100346328_100347693,Enhancer,Enhancer_STARR,1,WSTARR


In [4]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.enhancer_zscore_junke_peak_starr.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)
dat_peak_annot_enh_zscore = dat
print(dim(dat))
head(dat)

[1] 14856     5


Peak,Annotation,Label,Count,Note
<chr>,<chr>,<chr>,<dbl>,<chr>
chr10_100185017_100187275,Enhancer,Enhancer_ZScore_STARR,1,WSTARR
chr10_100346328_100347693,Enhancer,Enhancer_ZScore_STARR,2,WSTARR|WSTARR
chr10_100373060_100374238,Enhancer,Enhancer_ZScore_STARR,2,WSTARR|WSTARR
chr10_100528900_100530538,Enhancer,Enhancer_ZScore_STARR,1,WSTARR
chr10_100535377_100536237,Enhancer,Enhancer_ZScore_STARR,1,WSTARR
chr10_100912166_100913618,Enhancer,Enhancer_ZScore_STARR,1,WSTARR


**Import annotatoin: TSS**

In [5]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.tss_pol2.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_tss_pol2 = dat
print(dim(dat))
head(dat)

[1] 15191     4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,TSS_POL2,TSS,1
chr10_100185017_100187275,TSS_POL2,TSS,1
chr10_100228452_100230090,TSS_POL2,TSS,1
chr10_100267066_100268374,TSS_POL2,TSS,1
chr10_100285974_100287341,TSS_POL2,TSS,1
chr10_100346328_100347693,TSS_POL2,TSS,1


**Import annotatoin: cCREs**

In [6]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.ccre_v3.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_ccre_v3 = dat
print(dim(dat))
head(dat)

[1] 173935      4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,ccre_v3,"DNase-H3K4me3,CTCF-bound",2
chr10_100009096_100010466,ccre_v3,Low-DNase,1
chr10_100009096_100010466,ccre_v3,"PLS,CTCF-bound",1
chr10_100020710_100021134,ccre_v3,Low-DNase,2
chr10_100045961_100046653,ccre_v3,"CTCF-only,CTCF-bound",1
chr10_100065094_100065486,ccre_v3,Low-DNase,1


In [7]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.ccre_v4.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_ccre_v4 = dat
print(dim(dat))
head(dat)

[1] 277309      4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,ccre_v4,CA-H3K4me3,2
chr10_100009096_100010466,ccre_v4,Low-DNase,1
chr10_100009096_100010466,ccre_v4,PLS,1
chr10_100009096_100010466,ccre_v4,pELS,1
chr10_100020710_100021134,ccre_v4,Low-DNase,2
chr10_100045961_100046653,ccre_v4,CA-CTCF,1


**Import annotation: ChromHMM**

In [8]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.chromHMM.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_chromhmm = dat
print(dim(dat))
head(dat)

[1] 343446      4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,ChromHMM,TssFlnk,3
chr10_100009096_100010466,ChromHMM,TssFlnkD,2
chr10_100020710_100021134,ChromHMM,Biv,1
chr10_100020710_100021134,ChromHMM,ReprPC,1
chr10_1000372_1000820,ChromHMM,Tx,1
chr10_100045961_100046653,ChromHMM,Quies,1


**Import annotation: ChIP-seq Histone**

In [9]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.chipseq_histone.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_chipseq_histone = dat
print(dim(dat))
head(dat)

[1] 701933      4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,ChIPseq_Histone,H3K27ac,1
chr10_100185017_100187275,ChIPseq_Histone,H3K27ac,1
chr10_100228452_100230090,ChIPseq_Histone,H3K27ac,1
chr10_100267066_100268374,ChIPseq_Histone,H3K27ac,1
chr10_100285974_100287341,ChIPseq_Histone,H3K27ac,1
chr10_100289686_100290090,ChIPseq_Histone,H3K27ac,1


**Import annotatoin: ChIP-seq TF subset**

In [10]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.chipseq_tf_subset.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_chipseq_tf_subset = dat
print(dim(dat))
head(dat)

[1] 270491      4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,ChIPseq_TF,YY1_ENCFF398UQZ,1
chr10_100185017_100187275,ChIPseq_TF,YY1_ENCFF398UQZ,1
chr10_100228452_100230090,ChIPseq_TF,YY1_ENCFF398UQZ,1
chr10_100267066_100268374,ChIPseq_TF,YY1_ENCFF398UQZ,1
chr10_100285974_100287341,ChIPseq_TF,YY1_ENCFF398UQZ,2
chr10_100346328_100347693,ChIPseq_TF,YY1_ENCFF398UQZ,1


**Import annotatoin: ChIP-seq TF**

In [11]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "summary")
fname = "peak.summary.chipseq_tf.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_peak_annot_chipseq_tf = dat
print(dim(dat))
head(dat)

[1] 5716412       4


Peak,Annotation,Label,Count
<chr>,<chr>,<chr>,<dbl>
chr10_100009096_100010466,ChIPseq_TF,CTCF_ENCFF769AUF,1
chr10_100045961_100046653,ChIPseq_TF,CTCF_ENCFF769AUF,1
chr10_100228452_100230090,ChIPseq_TF,CTCF_ENCFF769AUF,1
chr10_100240591_100241246,ChIPseq_TF,CTCF_ENCFF769AUF,1
chr10_100338359_100339077,ChIPseq_TF,CTCF_ENCFF769AUF,1
chr10_100342247_100342824,ChIPseq_TF,CTCF_ENCFF769AUF,1


## All annotations except ChIP-seq

**Test loop**

In [12]:
folder  = "enrichment_prepare_annot_others"
lst_dat = list(
    dat_peak_annot_casa,
    dat_peak_annot_enh,
    dat_peak_annot_enh_zscore,
    dat_peak_annot_ccre_v3,
    dat_peak_annot_ccre_v4,
    dat_peak_annot_tss_pol2,
    dat_peak_annot_chromhmm
)

for (dat in lst_dat){
    lst = split(dat$Peak, dat$Label)
    ant = unique(dat$Annotation)
    
    cat("Annotation:", ant, "\n")
    for (idx in names(lst)){
        
        txt = paste(ant, idx, sep="_")
        txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
        txt = paste("annot", txt, sep="_")
        txt = paste(txt,   "rds", sep=".")
        
        fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
        fname = txt
        fpath = file.path(fdiry, fname)
        print(fname)

        txt = paste(ant, idx, sep=":")
        print(txt)
    }
}

Annotation: CRISPRi_HCRFF 
[1] "annot_CRISPRi_HCRFF_CRISPR_CASA.rds"
[1] "CRISPRi_HCRFF:CRISPR_CASA"
Annotation: Enhancer 
[1] "annot_Enhancer_Enhancer_STARR.rds"
[1] "Enhancer:Enhancer_STARR"
Annotation: Enhancer 
[1] "annot_Enhancer_Enhancer_ZScore_STARR.rds"
[1] "Enhancer:Enhancer_ZScore_STARR"
Annotation: ccre_v3 
[1] "annot_ccre_v3_CTCF_only_CTCF_bound.rds"
[1] "ccre_v3:CTCF-only,CTCF-bound"
[1] "annot_ccre_v3_dELS.rds"
[1] "ccre_v3:dELS"
[1] "annot_ccre_v3_dELS_CTCF_bound.rds"
[1] "ccre_v3:dELS,CTCF-bound"
[1] "annot_ccre_v3_DNase_H3K4me3.rds"
[1] "ccre_v3:DNase-H3K4me3"
[1] "annot_ccre_v3_DNase_H3K4me3_CTCF_bound.rds"
[1] "ccre_v3:DNase-H3K4me3,CTCF-bound"
[1] "annot_ccre_v3_DNase_only.rds"
[1] "ccre_v3:DNase-only"
[1] "annot_ccre_v3_Low_DNase.rds"
[1] "ccre_v3:Low-DNase"
[1] "annot_ccre_v3_pELS.rds"
[1] "ccre_v3:pELS"
[1] "annot_ccre_v3_pELS_CTCF_bound.rds"
[1] "ccre_v3:pELS,CTCF-bound"
[1] "annot_ccre_v3_PLS.rds"
[1] "ccre_v3:PLS"
[1] "annot_ccre_v3_PLS_CTCF_bound.rds"
[1] "cc

**Save annotations**

In [13]:
folder  = "enrichment_prepare_annot_others"
lst_dat = list(
    dat_peak_annot_casa,
    dat_peak_annot_enh,
    dat_peak_annot_enh_zscore,
    dat_peak_annot_ccre_v3,
    dat_peak_annot_ccre_v4,
    dat_peak_annot_tss_pol2,
    dat_peak_annot_chromhmm
)

for (dat in lst_dat){
    lst = split(dat$Peak, dat$Label)
    ant = unique(dat$Annotation)
    
    cat("Annotation:", ant, "\n")
    for (idx in names(lst)){
        
        txt = paste(ant, idx, sep="_")
        txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
        txt = paste("annot", txt, sep="_")
        txt = paste(txt,   "rds", sep=".")
        
        fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
        fname = txt
        fpath = file.path(fdiry, fname)

        txt = paste(ant, idx, sep=":")
        lst_annot = lst[idx]
        names(lst_annot) = txt
        saveRDS(lst_annot, file = fpath)
    }
}

Annotation: CRISPRi_HCRFF 
Annotation: Enhancer 
Annotation: Enhancer 
Annotation: ccre_v3 
Annotation: ccre_v4 
Annotation: TSS_POL2 
Annotation: ChromHMM 


## Annotation: ChIP-seq Histone

**Test Loop**

In [14]:
folder  = "enrichment_prepare_annot_chipseq_histone"
dat = dat_peak_annot_chipseq_histone
lst = split(dat$Peak, dat$Label)
ant = unique(dat$Annotation)

cat("Annotation:", ant, "\n")
for (idx in names(lst)){

    txt = paste(ant, idx, sep="_")
    txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
    txt = paste("annot", txt, sep="_")
    txt = paste(txt,   "rds", sep=".")

    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
    fname = txt
    fpath = file.path(fdiry, fname)
    print(fname)

    txt = paste(ant, idx, sep=":")
    print(txt)
}

Annotation: ChIPseq_Histone 
[1] "annot_ChIPseq_Histone_H2AFZ.rds"
[1] "ChIPseq_Histone:H2AFZ"
[1] "annot_ChIPseq_Histone_H3K27ac.rds"
[1] "ChIPseq_Histone:H3K27ac"
[1] "annot_ChIPseq_Histone_H3K27me3.rds"
[1] "ChIPseq_Histone:H3K27me3"
[1] "annot_ChIPseq_Histone_H3K36me3.rds"
[1] "ChIPseq_Histone:H3K36me3"
[1] "annot_ChIPseq_Histone_H3K4me1.rds"
[1] "ChIPseq_Histone:H3K4me1"
[1] "annot_ChIPseq_Histone_H3K4me2.rds"
[1] "ChIPseq_Histone:H3K4me2"
[1] "annot_ChIPseq_Histone_H3K4me3.rds"
[1] "ChIPseq_Histone:H3K4me3"
[1] "annot_ChIPseq_Histone_H3K79me2.rds"
[1] "ChIPseq_Histone:H3K79me2"
[1] "annot_ChIPseq_Histone_H3K9ac.rds"
[1] "ChIPseq_Histone:H3K9ac"
[1] "annot_ChIPseq_Histone_H3K9me1.rds"
[1] "ChIPseq_Histone:H3K9me1"
[1] "annot_ChIPseq_Histone_H3K9me3.rds"
[1] "ChIPseq_Histone:H3K9me3"
[1] "annot_ChIPseq_Histone_H4K20me1.rds"
[1] "ChIPseq_Histone:H4K20me1"


**Save annotations**

In [15]:
folder  = "enrichment_prepare_annot_chipseq_histone"
dat = dat_peak_annot_chipseq_histone
lst = split(dat$Peak, dat$Label)
ant = unique(dat$Annotation)

cat("Annotation:", ant, "\n")
for (idx in names(lst)){

    txt = paste(ant, idx, sep="_")
    txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
    txt = paste("annot", txt, sep="_")
    txt = paste(txt,   "rds", sep=".")

    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
    fname = txt
    fpath = file.path(fdiry, fname)

    txt = paste(ant, idx, sep=":")
    lst_annot = lst[idx]
    names(lst_annot) = txt
    saveRDS(lst_annot, file = fpath)
}

Annotation: ChIPseq_Histone 


## Annotation: ChIP-seq TF subset

**Test loop**

In [16]:
folder  = "enrichment_prepare_annot_chipseq_tf_subset"
dat = dat_peak_annot_chipseq_tf_subset
lst = split(dat$Peak, dat$Label)
ant = unique(dat$Annotation)

cat("Annotation:", ant, "\n")
for (idx in names(lst)){

    txt = paste(ant, idx, sep="_")
    txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
    txt = paste("annot", txt, sep="_")
    txt = paste(txt,   "rds", sep=".")

    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
    fname = txt
    fpath = file.path(fdiry, fname)
    print(fname)

    txt = paste(ant, idx, sep=":")
    print(txt)
}

Annotation: ChIPseq_TF 
[1] "annot_ChIPseq_TF_ATF1_ENCFF627RSK.rds"
[1] "ChIPseq_TF:ATF1_ENCFF627RSK"
[1] "annot_ChIPseq_TF_CTCF_ENCFF660GHM.rds"
[1] "ChIPseq_TF:CTCF_ENCFF660GHM"
[1] "annot_ChIPseq_TF_ELK1_ENCFF715WGN.rds"
[1] "ChIPseq_TF:ELK1_ENCFF715WGN"
[1] "annot_ChIPseq_TF_EP300_ENCFF702XPO.rds"
[1] "ChIPseq_TF:EP300_ENCFF702XPO"
[1] "annot_ChIPseq_TF_GATA1_ENCFF657CTC.rds"
[1] "ChIPseq_TF:GATA1_ENCFF657CTC"
[1] "annot_ChIPseq_TF_JUN_ENCFF190CGV.rds"
[1] "ChIPseq_TF:JUN_ENCFF190CGV"
[1] "annot_ChIPseq_TF_KLF10_ENCFF142ZTD.rds"
[1] "ChIPseq_TF:KLF10_ENCFF142ZTD"
[1] "annot_ChIPseq_TF_KLF16_ENCFF488OTN.rds"
[1] "ChIPseq_TF:KLF16_ENCFF488OTN"
[1] "annot_ChIPseq_TF_NFE2_ENCFF023IFO.rds"
[1] "ChIPseq_TF:NFE2_ENCFF023IFO"
[1] "annot_ChIPseq_TF_NRF1_ENCFF777PKJ.rds"
[1] "ChIPseq_TF:NRF1_ENCFF777PKJ"
[1] "annot_ChIPseq_TF_POLR2A_ENCFF355MNE.rds"
[1] "ChIPseq_TF:POLR2A_ENCFF355MNE"
[1] "annot_ChIPseq_TF_REST_ENCFF707MDI.rds"
[1] "ChIPseq_TF:REST_ENCFF707MDI"
[1] "annot_ChIPseq_TF_SP1_ENCF

**Save annotations**

In [17]:
folder  = "enrichment_prepare_annot_chipseq_tf_subset"
dat = dat_peak_annot_chipseq_tf_subset
lst = split(dat$Peak, dat$Label)
ant = unique(dat$Annotation)

cat("Annotation:", ant, "\n")
for (idx in names(lst)){

    txt = paste(ant, idx, sep="_")
    txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
    txt = paste("annot", txt, sep="_")
    txt = paste(txt,   "rds", sep=".")

    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
    fname = txt
    fpath = file.path(fdiry, fname)

    txt = paste(ant, idx, sep=":")
    lst_annot = lst[idx]
    names(lst_annot) = txt
    saveRDS(lst_annot, file = fpath)
}

Annotation: ChIPseq_TF 


## Annotation: ChIP-seq TF master set

**Test Loop**

In [18]:
folder  = "enrichment_prepare_annot_chipseq_tf"
dat = dat_peak_annot_chipseq_tf
lst = split(dat$Peak, dat$Label)
ant = unique(dat$Annotation)
print(length(lst))


cat("Annotation:", ant, "\n")
for (idx in names(lst)[1:10]){

    txt = paste(ant, idx, sep="_")
    txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
    txt = paste("annot", txt, sep="_")
    txt = paste(txt,   "rds", sep=".")

    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
    fname = txt
    fpath = file.path(fdiry, fname)
    print(fname)

    txt = paste(ant, idx, sep=":")
    print(txt)
}

[1] 733
Annotation: ChIPseq_TF 
[1] "annot_ChIPseq_TF_ADNP_ENCFF739AJO.rds"
[1] "ChIPseq_TF:ADNP_ENCFF739AJO"
[1] "annot_ChIPseq_TF_AFF1_ENCFF195YGC.rds"
[1] "ChIPseq_TF:AFF1_ENCFF195YGC"
[1] "annot_ChIPseq_TF_AFF1_ENCFF674XTY.rds"
[1] "ChIPseq_TF:AFF1_ENCFF674XTY"
[1] "annot_ChIPseq_TF_AGO1_ENCFF794IRP.rds"
[1] "ChIPseq_TF:AGO1_ENCFF794IRP"
[1] "annot_ChIPseq_TF_ARHGAP35_ENCFF952WKN.rds"
[1] "ChIPseq_TF:ARHGAP35_ENCFF952WKN"
[1] "annot_ChIPseq_TF_ARID1B_ENCFF879NTL.rds"
[1] "ChIPseq_TF:ARID1B_ENCFF879NTL"
[1] "annot_ChIPseq_TF_ARID2_ENCFF913WRW.rds"
[1] "ChIPseq_TF:ARID2_ENCFF913WRW"
[1] "annot_ChIPseq_TF_ARID3A_ENCFF891OQP.rds"
[1] "ChIPseq_TF:ARID3A_ENCFF891OQP"
[1] "annot_ChIPseq_TF_ARID3B_ENCFF270TSN.rds"
[1] "ChIPseq_TF:ARID3B_ENCFF270TSN"
[1] "annot_ChIPseq_TF_ARID4B_ENCFF086FAZ.rds"
[1] "ChIPseq_TF:ARID4B_ENCFF086FAZ"


**Save annotations**

In [19]:
folder  = "enrichment_prepare_annot_chipseq_tf"
dat = dat_peak_annot_chipseq_tf
lst = split(dat$Peak, dat$Label)
ant = unique(dat$Annotation)
print(length(lst))

cat("Annotation:", ant, "\n")
for (idx in names(lst)){

    txt = paste(ant, idx, sep="_")
    txt = str_replace_all(txt, pattern = "-|,|/", replacement = "_")
    txt = paste("annot", txt, sep="_")
    txt = paste(txt,   "rds", sep=".")

    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", folder)
    fname = txt
    fpath = file.path(fdiry, fname)

    txt = paste(ant, idx, sep=":")
    lst_annot = lst[idx]
    names(lst_annot) = txt
    saveRDS(lst_annot, file = fpath)
}

[1] 733
Annotation: ChIPseq_TF 
