**Set environment**

In [1]:
suppressWarnings(suppressMessages(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


## Import data

In [2]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input")
fname = "KS91_K562_hg38_ASTARRseq_Input.all_reps.masked.union_narrowPeak.q5.bed.gz"
fpath = file.path(fdiry, fname)

cnames = c("Chrom", "Start", "End")
dat = read_tsv(fpath, col_names = cnames, show_col_types = FALSE)
dat = dat %>% dplyr::mutate(Name = paste0(Chrom, ":", Start, "-", End))

dat_peak_ocr = dat
print(dim(dat))
head(dat)

[1] 247520      4


Chrom,Start,End,Name
<chr>,<dbl>,<dbl>,<chr>
chr1,10015,10442,chr1:10015-10442
chr1,14253,14645,chr1:14253-14645
chr1,16015,16477,chr1:16015-16477
chr1,17237,17772,chr1:17237-17772
chr1,28903,29613,chr1:28903-29613
chr1,30803,31072,chr1:30803-31072


## Create combinations of OCRs for each chromosome

In [4]:
dat = dat_peak_ocr

vec = paste0("chr", c(1:22, "X"))
dat = dat %>% dplyr::filter(Chrom %in% vec)

lst = split(dat$Name, dat$Chrom)
lst = lapply(lst, function(vec){
    ### Show progress
    cat(vec[1], "\n", length(vec), "\n\n")
    flush.console()
    
    ### create combinations without permutation
    dat_comb2_self  = data.frame(V1 = vec, V2 = vec)
    dat_comb2_pair  = combn(vec, 2) %>% t %>% as.data.frame    
    dat_comb2_total = dplyr::bind_rows(
        dat_comb2_self, 
        dat_comb2_pair) %>% 
        dplyr::arrange(V1, V2)
    return(dat_comb2_total)
})

lst_peak_comb2 = lst
print(names(lst))
head(lst[[1]])

chr1:10015-10442 
 30534 

chr10:11333-11817 
 11398 

chr11:113408-113701 
 12010 

chr12:10606-11448 
 10725 

chr13:16008265-16008631 
 3675 

chr14:18655043-18655786 
 4386 

chr15:19987541-19987935 
 7965 

chr16:10131-11496 
 8999 

chr17:134079-134681 
 10104 

chr18:45971-46669 
 5088 

chr19:70732-71295 
 8447 

chr2:11292-11934 
 19379 

chr20:129971-130487 
 6046 

chr21:5065292-5066109 
 4241 

chr22:10687272-10687556 
 4946 

chr3:10230-10640 
 14422 

chr4:11715-12387 
 10120 

chr5:10627-11783 
 13453 

chr6:126583-127039 
 18166 

chr7:30662-31595 
 20036 

chr8:191891-192124 
 10120 

chr9:10454-11824 
 8378 

chrX:19079-20855 
 4214 

 [1] "chr1"  "chr10" "chr11" "chr12" "chr13" "chr14" "chr15" "chr16" "chr17"
[10] "chr18" "chr19" "chr2"  "chr20" "chr21" "chr22" "chr3"  "chr4"  "chr5" 
[19] "chr6"  "chr7"  "chr8"  "chr9"  "chrX" 


Unnamed: 0_level_0,V1,V2
Unnamed: 0_level_1,<chr>,<chr>
1,chr1:100006256-100006880,chr1:100006256-100006880
2,chr1:100006256-100006880,chr1:100010437-100010915
3,chr1:100006256-100006880,chr1:100021298-100021629
4,chr1:100006256-100006880,chr1:100023727-100023976
5,chr1:100006256-100006880,chr1:100027983-100029702
6,chr1:100006256-100006880,chr1:100036871-100039191


In [5]:
lst = lst_peak_comb2
for (idn in names(lst)){
    dat = lst[[idn]]
    cat(idn, "|", nrow(dat)/10^6, "\n")
}

chr1 | 466.1778 
chr10 | 64.9629 
chr11 | 72.12605 
chr12 | 57.51817 
chr13 | 6.75465 
chr14 | 9.620691 
chr15 | 31.7246 
chr16 | 40.4955 
chr17 | 51.05046 
chr18 | 12.94642 
chr19 | 35.68013 
chr2 | 187.7825 
chr20 | 18.28008 
chr21 | 8.995161 
chr22 | 12.23393 
chr3 | 104.0043 
chr4 | 51.21226 
chr5 | 90.49833 
chr6 | 165.0109 
chr7 | 200.7307 
chr8 | 51.21226 
chr9 | 35.09963 
chrX | 8.881005 


## Save the results into tsv and bedpe format

In [6]:
lst = lst_peak_comb2
for (idn in names(lst)){
    ### Show progress
    cat("Chromosome:", idn, "\n")
    flush.console()
    
    ### extract table
    dat = lst[[idn]]
    
    ### save in table format
    fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "region_pair")
    fname = paste("region_pair", idn, "tsv.gz", sep=".")
    fpath = file.path(fdiry, fname)
    write_tsv(dat, fpath, col_names=FALSE)

    ### Show progress
    cat("Save results:\n", fpath,    "\n")
    cat("Data shape:\n",   dim(dat), "\n")
    print(head(dat))
    cat("\n")
    flush.console()
}

Chromosome: chr1 
Save results:
 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/region_pair/region_pair.chr1.tsv.gz 
Data shape:
 466177845 2 
                        V1                       V2
1 chr1:100006256-100006880 chr1:100006256-100006880
2 chr1:100006256-100006880 chr1:100010437-100010915
3 chr1:100006256-100006880 chr1:100021298-100021629
4 chr1:100006256-100006880 chr1:100023727-100023976
5 chr1:100006256-100006880 chr1:100027983-100029702
6 chr1:100006256-100006880 chr1:100036871-100039191

Chromosome: chr10 
Save results:
 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/region_pair/region_pair.chr10.tsv.gz 
Data shape:
 64962901 2 
                         V1                        V2
1 chr10:100009096-100010466 chr10:100009096-100010466
2 chr10:100009096-100010466 chr10:100020710-100021134
3 chr10:100009096-100010466 chr10:100045961-100046653
4 chr10:100009096-1000

In [5]:
folders = c(
    "hic_intact_K562_ENCSR479XDG",
    "hic_intact_K562_deep"
)

lst = lst_peak_comb2
for (idn in names(lst)){
    ### Show progress
    cat("Chromosome:", idn, "\n")
    flush.console()
    
    ### change column name
    dat = lst[[idn]]
    colnames(dat) = c("Region1", "Region2")
    dat_peak_pair_tsv = dat
    
    ### Show progress
    print(head(dat))
    cat(dim(dat), "\n")
    cat("\n")
    flush.console()
    
    ### Save results
    for (folder in folders){
        ### set file direcotyr
        fdiry = file.path(FD_RES, "results", folder, "coverage_astarrseq_peak_macs_input")
        
        ### save in table format
        fname = paste("region_pair", idn, "tsv.gz", sep=".")
        fpath = file.path(fdiry, fname)
        write_tsv(dat_peak_pair_tsv, fpath)
        
        cat("Save results:\n", fpath, "\n")
        cat("\n")
    }
}

Chromosome: chr1 
                   Region1                  Region2
1 chr1:100006256-100006880 chr1:100006256-100006880
2 chr1:100006256-100006880 chr1:100010437-100010915
3 chr1:100006256-100006880 chr1:100021298-100021629
4 chr1:100006256-100006880 chr1:100023727-100023976
5 chr1:100006256-100006880 chr1:100027983-100029702
6 chr1:100006256-100006880 chr1:100036871-100039191
466177845 2 

Save results:
 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/hic_intact_K562_ENCSR479XDG/coverage_astarrseq_peak_macs_input/region_pair.chr1.tsv.gz 

Save results:
 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/hic_intact_K562_deep/coverage_astarrseq_peak_macs_input/region_pair.chr1.tsv.gz 

Chromosome: chr10 
                    Region1                   Region2
1 chr10:100009096-100010466 chr10:100009096-100010466
2 chr10:100009096-100010466 chr10:100020710-100021134
3 chr10:100009096-100010466 chr10:100045961-100046653
4 chr10:100009096-100010466 chr10:100065094-100065

```
folders = c(
    "hic_intact_K562_ENCSR479XDG",
    "hic_intact_K562_deep"
)

lst = lst_peak_comb2
for (idn in names(lst)){
    ### Show progress
    cat("Chromosome:", idn, "\n")
    flush.console()
    
    ### change column name
    dat = lst[[idn]]
    colnames(dat) = c("Region1", "Region2")
    dat_peak_pair_tsv = dat
    
    ### Show progress
    print(head(dat))
    cat(dim(dat), "\n")
    cat("\n")
    flush.console()
    
    ### split the regions into bedpe format
    dat = dat %>%
        dplyr::mutate(Name = paste(Region1, Region2, sep="|")) %>%
        tidyr::separate(Region1, c("Chrom1", "Start1", "End1"), sep = "[:-]") %>%
        tidyr::separate(Region2, c("Chrom2", "Start2", "End2"), sep = "[:-]") %>%
        dplyr::select(Chrom1, Start1, End1, Chrom2, Start2, End2, Name)
    dat_peak_pair_bedpe = dat
    
    ### Show progress
    print(head(dat))
    cat(dim(dat), "\n")
    cat("\n")
    flush.console()
    
    ### Save results
    for (folder in folders){
        ### set file direcotyr
        fdiry = file.path(FD_RES, "results", folder, "coverage_astarrseq_peak_macs_input")
        
        ### save in table format
        fname = paste("region_pair", idn, "tsv", sep=".")
        fpath = file.path(fdiry, fname)
        write_tsv(dat_peak_pair_tsv, fpath)
        
        cat("Save results:\n", fpath, "\n")
        cat("\n")
        
        ### save in bedpe format
        fname = paste("region_pair", idn, "bedpe.gz", sep=".")
        fpath = file.path(fdiry, fname)
        write_tsv(dat_peak_pair_bedpe, fpath, col_names=FALSE)
        
        cat("Save results:\n", fpath, "\n")
        cat("\n")
    }
}
```