**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


In [2]:
FOLDER_ANTS  = c(
    "annotation_enhancer_zscore_junke",
    "annotation_crispri_growth",
    "annotation_enhancer_merge"
)

FOLDER_LOOPS = c(
    "hic_intact_ENCSR479XDG",
    "hic_intact_deep"
)

## Example 01

**Check data**

In [3]:
folder_ant  = FOLDER_ANTS[1]
folder_loop = FOLDER_LOOPS[1]

In [4]:
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)
print(fnames)

 [1] "enhancer_zscore_junke.peak.ASTARR_AB.bed.gz"
 [2] "enhancer_zscore_junke.peak.ASTARR_A.bed.gz" 
 [3] "enhancer_zscore_junke.peak.ASTARR_R.bed.gz" 
 [4] "enhancer_zscore_junke.peak.LMPRA_AB.bed.gz" 
 [5] "enhancer_zscore_junke.peak.LMPRA_A.bed.gz"  
 [6] "enhancer_zscore_junke.peak.LMPRA_R.bed.gz"  
 [7] "enhancer_zscore_junke.peak.TMPRA_A.bed.gz"  
 [8] "enhancer_zscore_junke.peak.TMPRA_R.bed.gz"  
 [9] "enhancer_zscore_junke.peak.WSTARR_AB.bed.gz"
[10] "enhancer_zscore_junke.peak.WSTARR_A.bed.gz" 
[11] "enhancer_zscore_junke.peak.WSTARR_R.bed.gz" 


In [5]:
fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)
print(fnames)

 [1] "loopA.annotation.enhancer_zscore_junke.peak.ASTARR_AB.bed.gz"
 [2] "loopA.annotation.enhancer_zscore_junke.peak.ASTARR_A.bed.gz" 
 [3] "loopA.annotation.enhancer_zscore_junke.peak.ASTARR_R.bed.gz" 
 [4] "loopA.annotation.enhancer_zscore_junke.peak.LMPRA_AB.bed.gz" 
 [5] "loopA.annotation.enhancer_zscore_junke.peak.LMPRA_A.bed.gz"  
 [6] "loopA.annotation.enhancer_zscore_junke.peak.LMPRA_R.bed.gz"  
 [7] "loopA.annotation.enhancer_zscore_junke.peak.TMPRA_A.bed.gz"  
 [8] "loopA.annotation.enhancer_zscore_junke.peak.TMPRA_R.bed.gz"  
 [9] "loopA.annotation.enhancer_zscore_junke.peak.WSTARR_AB.bed.gz"
[10] "loopA.annotation.enhancer_zscore_junke.peak.WSTARR_A.bed.gz" 
[11] "loopA.annotation.enhancer_zscore_junke.peak.WSTARR_R.bed.gz" 
[12] "loopB.annotation.enhancer_zscore_junke.peak.ASTARR_AB.bed.gz"
[13] "loopB.annotation.enhancer_zscore_junke.peak.ASTARR_A.bed.gz" 
[14] "loopB.annotation.enhancer_zscore_junke.peak.ASTARR_R.bed.gz" 
[15] "loopB.annotation.enhancer_zscore_junke.pea

**Execute: Single example**

In [6]:
### import annotation description file
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname = "description.tsv"
fpath = file.path(fdiry, fname)
dat = read_tsv(fpath, show_col_types = FALSE)
cnames = dat$Name

### set file path for annotation
fdiry = file.path(FD_RES, "results", "region", folder_ant)
fname = "*.bed.gz"
fglob = file.path(fdiry, fname)
fpaths_ant = Sys.glob(fglob)

for (fpath_ant in fpaths_ant[1]){
    ### init
    fname_ant = basename(fpath_ant)
    print(fname_ant)
    cat("\n")
    
    ### import annotation file
    dat_ant = read_tsv(fpath_ant, col_names = cnames, show_col_types = FALSE)
    dat_ant = dat_ant %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
    print(head(dat_ant))
    cat("\n")
    
    ### set file path for loops
    fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
    fname = paste0("*", fname_ant)
    fglob = file.path(fdiry, fname)
    fpaths_loop = Sys.glob(fglob)
    fnames_loop = basename(fpaths_loop)
    print(fnames_loop)
    cat("\n")
    
    ### import loop file
    vec = c("Chrom_Loop", "Start_Loop", "End_Loop", cnames, "Overlap")
    lst = lapply(fpaths_loop, function(fpath_loop){
        dat = read_tsv(fpath_loop, col_names = vec, show_col_types = FALSE)
        dat = dat %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
        return(dat)
    })
    dat_loop = bind_rows(lst)
    print(head(dat_loop))
    cat("\n")
    
    vec_ant_total = unique(dat_ant$Region)
    vec_ant_loop  = unique(dat_loop$Region)
    vec = vec_ant_total %in% vec_ant_loop
    res = c(
        length(vec_ant_total), 
        length(vec_ant_loop), 
        sum(vec), 
        mean(vec))
    names(res) = c("Total", "Looped", "Check", "Percentage")
    print(res)
    cat("\n")
    
    #
    #print(sum(vec))
    #cat("\n")
}

[1] "enhancer_zscore_junke.peak.ASTARR_AB.bed.gz"

[90m# A tibble: 6 × 10[39m
  Chrom   Start     End Name  Score Strand Assay_Type Assay_Label Dataset Region
  [3m[90m<chr>[39m[23m   [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m  [3m[90m<chr>[39m[23m      [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m   [3m[90m<chr>[39m[23m 
[90m1[39m chr1  1[4m0[24m[4m1[24m[4m3[24m020 1[4m0[24m[4m1[24m[4m3[24m470 peak…  2.18 .      ASTARR     ASTARR_AB   ASTARR… chr1:…
[90m2[39m chr1  1[4m7[24m[4m2[24m[4m4[24m540 1[4m7[24m[4m2[24m[4m4[24m700 peak…  2.15 .      ASTARR     ASTARR_AB   ASTARR… chr1:…
[90m3[39m chr1  2[4m2[24m[4m9[24m[4m0[24m700 2[4m2[24m[4m9[24m[4m1[24m030 peak…  2.39 .      ASTARR     ASTARR_AB   ASTARR… chr1:…
[90m4[39m chr1  3[4m3[24m[4m1[24m[4m3[24m260 3[4m3[24m[4m1[24m[4m3[24m400 peak…  2.18 .      ASTARR     ASTARR_AB   AST

**Execute: loop**

In [7]:
for(fname in basename(fpaths_ant)) print(fname)

[1] "enhancer_zscore_junke.peak.ASTARR_AB.bed.gz"
[1] "enhancer_zscore_junke.peak.ASTARR_A.bed.gz"
[1] "enhancer_zscore_junke.peak.ASTARR_R.bed.gz"
[1] "enhancer_zscore_junke.peak.LMPRA_AB.bed.gz"
[1] "enhancer_zscore_junke.peak.LMPRA_A.bed.gz"
[1] "enhancer_zscore_junke.peak.LMPRA_R.bed.gz"
[1] "enhancer_zscore_junke.peak.TMPRA_A.bed.gz"
[1] "enhancer_zscore_junke.peak.TMPRA_R.bed.gz"
[1] "enhancer_zscore_junke.peak.WSTARR_AB.bed.gz"
[1] "enhancer_zscore_junke.peak.WSTARR_A.bed.gz"
[1] "enhancer_zscore_junke.peak.WSTARR_R.bed.gz"


In [8]:
labels_ant = fpaths_ant %>% 
    str_split(., "\\.") %>%
    do.call(rbind, .) %>%
    as.data.frame %>%
    dplyr::pull(3)

labels_ant

In [9]:
### import annotation description file
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname = "description.tsv"
fpath = file.path(fdiry, fname)
dat = read_tsv(fpath, show_col_types = FALSE)
cnames = dat$Name

### set file path for annotation
fdiry = file.path(FD_RES, "results", "region", folder_ant)
fname = "*.bed.gz"
fglob = file.path(fdiry, fname)
fpaths_ant = Sys.glob(fglob)
labels_ant = fpaths_ant %>% 
    str_split(., "\\.") %>%
    do.call(rbind, .) %>%
    as.data.frame %>%
    dplyr::pull(3)
names(fpaths_ant) = labels_ant

###
lst = lapply(fpaths_ant, function(fpath_ant){
    ### init
    fname_ant = basename(fpath_ant)
    
    ### import annotation file
    dat_ant = read_tsv(fpath_ant, col_names = cnames, show_col_types = FALSE)
    dat_ant = dat_ant %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
    
    ### set file path for loops
    fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
    fname = paste0("*", fname_ant)
    fglob = file.path(fdiry, fname)
    fpaths_loop = Sys.glob(fglob)
    fnames_loop = basename(fpaths_loop)
    
    ### import loop file
    vec = c("Chrom_Loop", "Start_Loop", "End_Loop", cnames, "Overlap")
    lst = lapply(fpaths_loop, function(fpath_loop){
        dat = read_tsv(fpath_loop, col_names = vec, show_col_types = FALSE)
        dat = dat %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
        return(dat)
    })
    dat_loop = bind_rows(lst)
    
    ###
    vec_ant_total = unique(dat_ant$Region)
    vec_ant_loop  = unique(dat_loop$Region)
    vec = vec_ant_total %in% vec_ant_loop
    res = c(
        length(vec_ant_total), 
        length(vec_ant_loop), 
        sum(vec), 
        mean(vec))
    names(res) = c("Total", "Looped", "Check", "Percentage")
    return(res)
})

In [10]:
dat = bind_rows(lst, .id = "Label")
dat

Label,Total,Looped,Check,Percentage
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
ASTARR_AB,3123,1455,1455,0.46589817
ASTARR_A,9368,4277,4277,0.45655423
ASTARR_R,17897,8808,8808,0.49214952
LMPRA_AB,26732,9854,9854,0.36862188
LMPRA_A,40096,14869,14869,0.370835
LMPRA_R,1525,426,426,0.27934426
TMPRA_A,8294,1031,1031,0.12430673
TMPRA_R,363,33,33,0.09090909
WSTARR_AB,25505,7528,7528,0.29515781
WSTARR_A,79738,16863,16863,0.2114801


## Example 02

In [114]:
folder_ant  = FOLDER_ANTS[3]
folder_loop = FOLDER_LOOPS[1]

In [115]:
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)
print(fnames)

[1] "enhancer_merge.concat.crispri_hcrff.bed.gz"  
[2] "enhancer_merge.concat.starrmpracrispr.bed.gz"
[3] "enhancer_merge.region.crispri_hcrff.bed.gz"  
[4] "enhancer_merge.region.starrmpracrispr.bed.gz"


In [116]:
fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)
print(fnames)

[1] "loopA.annotation.enhancer_merge.region.crispri_hcrff.bed.gz"  
[2] "loopA.annotation.enhancer_merge.region.starrmpracrispr.bed.gz"
[3] "loopB.annotation.enhancer_merge.region.crispri_hcrff.bed.gz"  
[4] "loopB.annotation.enhancer_merge.region.starrmpracrispr.bed.gz"


In [117]:
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)

for (fname in fnames){
    cat(fname, "\n")
    fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
    fname  = paste0("*", fname)
    print(fname)
    fglob  = file.path(fdiry, fname)
    fpaths = Sys.glob(fglob)
    fnames = basename(fpaths)
    print(fnames)
    cat("\n")
    if (identical(fnames, character(0))){
        next
    }
    print("Import")
    cat("\n")
}

enhancer_merge.concat.crispri_hcrff.bed.gz 
[1] "*enhancer_merge.concat.crispri_hcrff.bed.gz"
character(0)

enhancer_merge.concat.starrmpracrispr.bed.gz 
[1] "*enhancer_merge.concat.starrmpracrispr.bed.gz"
character(0)

enhancer_merge.region.crispri_hcrff.bed.gz 
[1] "*enhancer_merge.region.crispri_hcrff.bed.gz"
[1] "loopA.annotation.enhancer_merge.region.crispri_hcrff.bed.gz"
[2] "loopB.annotation.enhancer_merge.region.crispri_hcrff.bed.gz"

[1] "Import"

enhancer_merge.region.starrmpracrispr.bed.gz 
[1] "*enhancer_merge.region.starrmpracrispr.bed.gz"
[1] "loopA.annotation.enhancer_merge.region.starrmpracrispr.bed.gz"
[2] "loopB.annotation.enhancer_merge.region.starrmpracrispr.bed.gz"

[1] "Import"



In [118]:
fdiry = file.path(FD_RES, "results", "region", folder_ant)
fname = "*region*.bed.gz"
fglob = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
print(fpaths)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/annotation_enhancer_merge/enhancer_merge.region.crispri_hcrff.bed.gz"  
[2] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/annotation_enhancer_merge/enhancer_merge.region.starrmpracrispr.bed.gz"


In [122]:
### import annotation description file
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname = "description.tsv"
fpath = file.path(fdiry, fname)
dat = read_tsv(fpath, show_col_types = FALSE)
cnames = dat$Name

### set file path for annotation
fdiry = file.path(FD_RES, "results", "region", folder_ant)
fname = "*region*.bed.gz"
fglob = file.path(fdiry, fname)
fpaths_ant = Sys.glob(fglob)
fnames_ant = basename(fpaths_ant)
labels_ant = fnames_ant %>% 
    str_split(., "\\.") %>%
    do.call(rbind, .) %>%
    as.data.frame %>%
    dplyr::pull(3)
names(fpaths_ant) = labels_ant

###
lst = lapply(fpaths_ant, function(fpath_ant){
    ### init
    fname_ant = basename(fpath_ant)
    
    ### import annotation file
    dat_ant = read_tsv(fpath_ant, col_names = cnames, show_col_types = FALSE)
    dat_ant = dat_ant %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
    
    ### set file path for loops
    fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
    fname = paste0("*", fname_ant)
    fglob = file.path(fdiry, fname)
    fpaths_loop = Sys.glob(fglob)
    fnames_loop = basename(fpaths_loop)
    
    ### import loop file
    vec = c("Chrom_Loop", "Start_Loop", "End_Loop", cnames, "Overlap")
    lst = lapply(fpaths_loop, function(fpath_loop){
        dat = read_tsv(fpath_loop, col_names = vec, show_col_types = FALSE)
        dat = dat %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
        return(dat)
    })
    dat_loop = bind_rows(lst)
    
    ###
    vec_ant_total = unique(dat_ant$Region)
    vec_ant_loop  = unique(dat_loop$Region)
    vec = vec_ant_total %in% vec_ant_loop
    res = c(
        length(vec_ant_total), 
        length(vec_ant_loop), 
        sum(vec), 
        mean(vec))
    names(res) = c("Total", "Looped", "Check", "Percentage")
    return(res)
})

In [124]:
dat = bind_rows(lst, .id = "Label")
dat

Label,Total,Looped,Check,Percentage
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
crispri_hcrff,80,46,46,0.575
starrmpracrispr,182231,37793,37793,0.2073906


## Example 03

In [125]:
folder_ant  = FOLDER_ANTS[2]
folder_loop = FOLDER_LOOPS[1]

In [126]:
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)
print(fnames)

[1] "crispri_growth_dhs.bed.gz"


In [127]:
fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
fname  = "*.bed.gz"
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)
print(fnames)

[1] "loopA.annotation.crispri_growth_dhs.bed.gz"
[2] "loopB.annotation.crispri_growth_dhs.bed.gz"


In [130]:
### import annotation description file
fdiry  = file.path(FD_RES, "results", "region", folder_ant)
fname = "description.tsv"
fpath = file.path(fdiry, fname)
dat = read_tsv(fpath, show_col_types = FALSE)
cnames = dat$Name

### set file path for annotation
fdiry = file.path(FD_RES, "results", "region", folder_ant)
fname = "*.bed.gz"
fglob = file.path(fdiry, fname)
fpaths_ant = Sys.glob(fglob)
fnames_ant = basename(fpaths_ant)
labels_ant = fnames_ant %>% 
    str_split(., "\\.") %>%
    do.call(rbind, .) %>%
    as.data.frame %>%
    dplyr::pull(1)
names(fpaths_ant) = labels_ant

###
lst = lapply(fpaths_ant, function(fpath_ant){
    ### init
    fname_ant = basename(fpath_ant)
    
    ### import annotation file
    dat_ant = read_tsv(fpath_ant, col_names = cnames, show_col_types = FALSE)
    dat_ant = dat_ant %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
    
    ### set file path for loops
    fdiry = file.path(FD_RES, "results", "region", folder_loop, "loop", folder_ant)
    fname = paste0("*", fname_ant)
    fglob = file.path(fdiry, fname)
    fpaths_loop = Sys.glob(fglob)
    fnames_loop = basename(fpaths_loop)
    
    ### import loop file
    vec = c("Chrom_Loop", "Start_Loop", "End_Loop", cnames, "Overlap")
    lst = lapply(fpaths_loop, function(fpath_loop){
        dat = read_tsv(fpath_loop, col_names = vec, show_col_types = FALSE)
        dat = dat %>% dplyr::mutate(Region = paste0(Chrom, ":", Start, "-", End))
        return(dat)
    })
    dat_loop = bind_rows(lst)
    
    ###
    vec_ant_total = unique(dat_ant$Region)
    vec_ant_loop  = unique(dat_loop$Region)
    vec = vec_ant_total %in% vec_ant_loop
    res = c(
        length(vec_ant_total), 
        length(vec_ant_loop), 
        sum(vec), 
        mean(vec))
    names(res) = c("Total", "Looped", "Check", "Percentage")
    return(res)
})

In [131]:
lst

In [132]:
dat = bind_rows(lst, .id = "Label")
dat

Label,Total,Looped,Check,Percentage
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
crispri_growth_dhs,6242,2257,2257,0.3615828
