**Set environment**

In [1]:
suppressWarnings(suppressMessages(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


## Import data

In [2]:
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input")
fname = "KS91_K562_hg38_ASTARRseq_Input.all_reps.masked.union_narrowPeak.q5.bed.gz"
fpath = file.path(fdiry, fname)

cnames = c("Chrom", "Start", "End")
dat = read_tsv(fpath, col_names = cnames, show_col_types = FALSE)
dat = dat %>% dplyr::mutate(Name = paste0(Chrom, ":", Start, "-", End))

dat_peak_ocr = dat
print(dim(dat))
head(dat)

[1] 247520      4


Chrom,Start,End,Name
<chr>,<dbl>,<dbl>,<chr>
chr1,10015,10442,chr1:10015-10442
chr1,14253,14645,chr1:14253-14645
chr1,16015,16477,chr1:16015-16477
chr1,17237,17772,chr1:17237-17772
chr1,28903,29613,chr1:28903-29613
chr1,30803,31072,chr1:30803-31072


In [3]:
fdiry = file.path(FD_RES, "results", "region")
fname = "region_screened_selected.bed.gz"
fpath = file.path(fdiry, fname)

cnames = c("Chrom", "Start", "End", "Name")
dat = read_tsv(fpath, col_names = cnames, show_col_types = FALSE)

dat_region_subset = dat
print(dim(dat))
dat

[1] 16  4


Chrom,Start,End,Name
<chr>,<dbl>,<dbl>,<chr>
chr11,4000000,6600000,chr11:4091884-6505900
chr11,32800000,35000000,chr11:32869701-34870100
chr11,61700000,62000000,chr11:61787329-61898348
chr12,54290000,54310000,chr12:54300766-54301042
chr12,110000000,113000000,chr12:110726151-112325737
chr16,0,1300000,chr16:10001-1173100
chr19,12880000,12900000,chr19:12887110-12887237
chr2,59000000,62000000,chr2:59553301-61553700
chr20,55500000,59000000,chr20:56391301-58391700
chr3,128475000,128512000,chr3:128487571-128487937


## Subset by the screened regions

In [4]:
dat = dat_region_subset
lst = split(dat, dat$Name)

lst_region_subset = lst
vec = unlist(lapply(lst, nrow))
print(vec)

  chr11:32869701-34870100     chr11:4091884-6505900   chr11:61787329-61898348 
                        1                         1                         1 
chr12:110726151-112325737   chr12:54300766-54301042       chr16:10001-1173100 
                        1                         1                         1 
  chr19:12887110-12887237    chr2:59553301-61553700   chr20:56391301-58391700 
                        1                         1                         1 
 chr3:128487571-128487937    chr4:55237590-56068531    chr5:87611207-89911163 
                        1                         1                         1 
 chr6:108840570-109828800  chr6:134253831-136927585  chr8:126735901-128736550 
                        1                         1                         1 
   chrX:47785501-49880650 
                        1 


In [5]:
lst = lst_region_subset
lst = lapply(lst, function(vec){
    ###
    txt_chrom = vec$Chrom
    val_start = vec$Start
    val_end   = vec$End
    
    ###
    dat = dat_peak_ocr
    dat = dat %>%
        dplyr::filter(
            Chrom == txt_chrom,
            Start <= val_end,
            End   >= val_start
        )
    return(dat)
})
dat = bind_rows(lst)

dat_peak_ocr_subset = dat
print(dim(dat))
head(dat)

[1] 5376    4


Chrom,Start,End,Name
<chr>,<dbl>,<dbl>,<chr>
chr11,32809191,32809816,chr11:32809191-32809816
chr11,32829398,32830575,chr11:32829398-32830575
chr11,32838250,32838808,chr11:32838250-32838808
chr11,32843903,32844319,chr11:32843903-32844319
chr11,32859295,32859826,chr11:32859295-32859826
chr11,32870601,32871324,chr11:32870601-32871324


## Pairs of regions

In [6]:
### init
dat = dat_peak_ocr_subset
vec = paste0("chr", c(1:22, "X"))
dat = dat %>% dplyr::filter(Chrom %in% vec)

### generate region pairs
lst = split(dat$Name, dat$Chrom)
lst = lapply(lst, function(vec){
    ### Show progress
    cat(vec[1], "\n", length(vec), "\n\n")
    flush.console()
    
    ### create combinations without permutation
    dat_comb2_self  = data.frame(V1 = vec, V2 = vec)
    dat_comb2_pair  = combn(vec, 2) %>% t %>% as.data.frame    
    dat_comb2_total = dplyr::bind_rows(
        dat_comb2_self, 
        dat_comb2_pair) %>% 
        dplyr::arrange(V1, V2)
    
    return(dat_comb2_total)
})

### assign and show
lst_peak_comb2_table = lst
print(names(lst))
head(lst[[1]])

chr11:32809191-32809816 
 727 

chr12:109998697-110000145 
 525 

chr16:10131-11496 
 321 

chr19:12881065-12881849 
 6 

chr2:59025102-59025557 
 274 

chr20:56003558-56004239 
 394 

chr3:128477179-128477796 
 9 

chr4:55008511-55009300 
 188 

chr5:86186696-86187100 
 356 

chr6:108002670-108003242 
 1013 

chr8:122533775-122534083 
 1265 

chrX:47047714-47048641 
 298 

 [1] "chr11" "chr12" "chr16" "chr19" "chr2"  "chr20" "chr3"  "chr4"  "chr5" 
[10] "chr6"  "chr8"  "chrX" 


Unnamed: 0_level_0,V1,V2
Unnamed: 0_level_1,<chr>,<chr>
1,chr11:32809191-32809816,chr11:32809191-32809816
2,chr11:32809191-32809816,chr11:32829398-32830575
3,chr11:32809191-32809816,chr11:32838250-32838808
4,chr11:32809191-32809816,chr11:32843903-32844319
5,chr11:32809191-32809816,chr11:32859295-32859826
6,chr11:32809191-32809816,chr11:32870601-32871324


In [7]:
lst = lst_peak_comb2_table
lst = lapply(lst, function(dat){
    ### convert to bedpe format
    dat = dat %>%
        tidyr::separate(V1, c("Chrom1", "Start1", "End1"), sep = "[:-]") %>%
        tidyr::separate(V2, c("Chrom2", "Start2", "End2"), sep = "[:-]") %>%
        dplyr::select(Chrom1, Start1, End1, Chrom2, Start2, End2)
    return(dat)
})

### assign and show
lst_peak_comb2_bedpe = lst
print(names(lst))
head(lst[[1]])

 [1] "chr11" "chr12" "chr16" "chr19" "chr2"  "chr20" "chr3"  "chr4"  "chr5" 
[10] "chr6"  "chr8"  "chrX" 


Unnamed: 0_level_0,Chrom1,Start1,End1,Chrom2,Start2,End2
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,chr11,32809191,32809816,chr11,32809191,32809816
2,chr11,32809191,32809816,chr11,32829398,32830575
3,chr11,32809191,32809816,chr11,32838250,32838808
4,chr11,32809191,32809816,chr11,32843903,32844319
5,chr11,32809191,32809816,chr11,32859295,32859826
6,chr11,32809191,32809816,chr11,32870601,32871324


In [8]:
lst = lst_peak_comb2_bedpe
dat = bind_rows(lst)
dat = dat %>% 
    dplyr::arrange(
        Chrom1, Start1, End1, 
        Chrom2, Start2, End2)

dat_peak_comb2_bedpe = dat
print(dim(dat))
head(dat)

[1] 2010139       6


Unnamed: 0_level_0,Chrom1,Start1,End1,Chrom2,Start2,End2
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,chr11,32809191,32809816,chr11,32809191,32809816
2,chr11,32809191,32809816,chr11,32829398,32830575
3,chr11,32809191,32809816,chr11,32838250,32838808
4,chr11,32809191,32809816,chr11,32843903,32844319
5,chr11,32809191,32809816,chr11,32859295,32859826
6,chr11,32809191,32809816,chr11,32870601,32871324


## Save results

In [9]:
### save in table format
fdiry = file.path(
    FD_RES, "results", "region", 
    "KS91_K562_ASTARRseq_peak_macs_input", 
    "region_pair",
    "region_pair_selected"
)

for (idn in names(lst_peak_comb2_table)){
    ### set file directory
    fname = paste("region_pair", idn, "tsv.gz", sep=".")
    fpath = file.path(fdiry, fname)
    
    
    ### get data
    dat = lst_peak_comb2_table[[idn]]
    
    ### show progress
    print(idn)
    print(fpath)
    print(head(dat, 3))
    print(dim(dat))
    cat("\n")
    
    ### save the results
    write_tsv(dat, fpath, col_names=FALSE)
}

[1] "chr11"
[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/region_pair/region_pair_selected/region_pair.chr11.tsv.gz"
                       V1                      V2
1 chr11:32809191-32809816 chr11:32809191-32809816
2 chr11:32809191-32809816 chr11:32829398-32830575
3 chr11:32809191-32809816 chr11:32838250-32838808
[1] 264628      2

[1] "chr12"
[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/region_pair/region_pair_selected/region_pair.chr12.tsv.gz"
                         V1                        V2
1 chr12:109998697-110000145 chr12:109998697-110000145
2 chr12:109998697-110000145 chr12:110024756-110025315
3 chr12:109998697-110000145 chr12:110035591-110035852
[1] 138075      2

[1] "chr16"
[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/region_pair/region_pair_selected/region_pair.chr16.tsv.gz"
            

In [10]:
### save in table format
fdiry = file.path(FD_RES, "results", "region", "KS91_K562_ASTARRseq_peak_macs_input", "region_pair")
fname = paste("region_pair", "SUBSET", "bedpe.gz", sep=".")
fpath = file.path(fdiry, fname)

dat = dat_peak_comb2_bedpe
write_tsv(dat, fpath, col_names=FALSE)

### Show progress
cat("Save results:\n", fpath,    "\n")
cat("\n")
cat("Data shape:\n",   dim(dat), "\n")

Save results:
 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/region_pair/region_pair.SUBSET.bedpe.gz 

Data shape:
 2010139 6 
