**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_ENCODE_FCC/references 



**Set global variables**

In [2]:
TXT_FOLDER_REGION = "hic_intact_K562_ENCSR479XDG"
TXT_ASSAY = "hic_intact"
TXT_INDEX = "ENCSR479XDG"

## Import data

In [3]:
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_DAT, "external", txt_folder)

vec = dir(txt_fdiry)
for (txt in vec){cat(txt, "\n")}

K562.hg38.ENCSR479XDG.ENCFF126GED.hic_intact.contact_domain.bedpe.gz 
K562.hg38.ENCSR479XDG.ENCFF256ZMD.hic_intact.loop.bedpe.gz 
K562.hg38.ENCSR479XDG.ENCFF621AIY.hic_intact.matrix.hic 
run_download.log.txt 
run_download.sh 


In [4]:
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_DAT, "external", txt_folder)
txt_fname = "K562.hg38.ENCSR479XDG.ENCFF256ZMD.hic_intact.loop.bedpe.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### arrange columns
dat = dat %>% dplyr::filter(!row_number() == 1)
colnames(dat)[1] = "chr"

### assign and show
dat_region_import = dat
print(dim(dat))
fun_display_table(head(dat))

“[1m[22mOne or more parsing issues, call `problems()` on your data frame for details, e.g.:
  dat <- vroom(...)
  problems(dat)”


[1] 46004    33


chr,x1,x2,chr2,y1,y2,name,score,strand1,strand2,color,observed,expectedBL,expectedDonut,expectedH,expectedV,fdrBL,fdrDonut,fdrH,fdrV,numCollapsed,centroid1,centroid2,radius,highRes_start_1,highRes_end_1,highRes_start_2,highRes_end_2,localX,localY,localObserved,localPval,localPeakID
chr10,102835000,102836000,chr10,102901000,102902000,.,.,.,.,255255,16,2.545303,2.056691,2.896359,2.602787,0.0,0.0,1e-07,0.0,2,102835000,102901500,500,102834600.0,102835200.0,102901400.0,102901700.0,102834700.0,102901500.0,4.0,0.0002172,0.0
chr10,123583000,123584000,chr10,123967000,123968000,.,.,.,.,255255,17,1.22944,1.126373,1.532096,2.968846,0.0,0.0,0.0,0.0,2,123583000,123967500,500,,,,,,,,,
chr10,60780000,60782000,chr10,60828000,60830000,.,.,.,.,255255,16,3.935455,3.603662,4.087633,2.71987,4e-06,1.3e-06,6.4e-06,1e-07,1,60781000,60829000,0,,,,,,,,,
chr10,33050000,33051000,chr10,33067000,33068000,.,.,.,.,255255,11,1.858619,1.843505,1.302612,2.001136,4.2e-06,3.9e-06,1e-07,8.3e-06,1,33050500,33067500,0,,,,,,,,,
chr10,11412000,11414000,chr10,11472000,11474000,.,.,.,.,255255,27,5.056888,4.13146,3.918301,7.767662,0.0,0.0,0.0,1e-07,1,11413000,11473000,0,11412000.0,11412500.0,11471700.0,11472700.0,11412200.0,11472100.0,10.0,0.0088614,0.0
chr10,45005000,45010000,chr10,45465000,45470000,.,.,.,.,255255,16,3.658383,2.829474,6.274282,2.340569,1.6e-06,1e-07,0.0008098,0.0,2,45007500,45465000,2500,,,,,,,,,


## Arrange loops

In [5]:
### init
dat = dat_region_import
vec_txt_cname1 = c(
    "chr",  "x1", "x2", 
    "chr2", "y1", "y2", 
    "observed", "centroid1", "centroid2", "radius"
)
vec_txt_cname2 = c(
    "Chrom_A", "ChromStart_A", "ChromEnd_A",
    "Chrom_B", "ChromStart_B", "ChromEnd_B",
    "Observed", "Centroid_A", "Centroid_B", "Radius"
)
vec_txt_cname3 = c(
    "Chrom_A", "ChromStart_A", "ChromEnd_A", "Region_A", "Length_A",
    "Chrom_B", "ChromStart_B", "ChromEnd_B", "Region_B", "Length_B",
    "Name", 
    "Observed", "Centroid_A", "Centroid_B", 
    "Radius", "Distance", "Size"
)

### get columns
dat = dat %>% dplyr::select(!!!vec_txt_cname1)
colnames(dat) = vec_txt_cname2

### arrange columns
dat = dat %>% 
    dplyr::mutate(
        Length_A = ChromEnd_A - ChromStart_A,
        Length_B = ChromEnd_B - ChromStart_B,
        Size     = (Length_A + Length_B) / 2,
        Region_A = paste0(Chrom_A, ":", ChromStart_A, "-", ChromEnd_A),
        Region_B = paste0(Chrom_B, ":", ChromStart_B, "-", ChromEnd_B),
        Name     = paste0(Region_A, "|", Region_B),
        Distance = Centroid_B - Centroid_A
    ) %>%
    dplyr::select(!!!vec_txt_cname3) %>%
    dplyr::distinct()

### assign and show
dat_region_arrange = dat
print(dim(dat))
fun_display_table(head(dat, 3))

[1] 46004    17


Chrom_A,ChromStart_A,ChromEnd_A,Region_A,Length_A,Chrom_B,ChromStart_B,ChromEnd_B,Region_B,Length_B,Name,Observed,Centroid_A,Centroid_B,Radius,Distance,Size
chr10,102835000,102836000,chr10:102835000-102836000,1000,chr10,102901000,102902000,chr10:102901000-102902000,1000,chr10:102835000-102836000|chr10:102901000-102902000,16,102835000,102901500,500,66500,1000
chr10,123583000,123584000,chr10:123583000-123584000,1000,chr10,123967000,123968000,chr10:123967000-123968000,1000,chr10:123583000-123584000|chr10:123967000-123968000,17,123583000,123967500,500,384500,1000
chr10,60780000,60782000,chr10:60780000-60782000,2000,chr10,60828000,60830000,chr10:60828000-60830000,2000,chr10:60780000-60782000|chr10:60828000-60830000,16,60781000,60829000,0,48000,2000


**Check**

In [6]:
dat = dat_region_arrange
all(dat$Length_A == dat$Length_B)

## Prepare region A and B

In [7]:
### init
vec_txt_cname_A = c("Chrom_A", "ChromStart_A", "ChromEnd_A", "Name")
vec_txt_cname_B = c("Chrom_B", "ChromStart_B", "ChromEnd_B", "Name")
vec_txt_cname   = c("Chrom",   "ChromStart",   "ChromEnd",   "Name", "Group", "Label")
txt_assay = TXT_ASSAY

### prepare point A
dat = dat_region_arrange
dat = dat %>% 
    dplyr::select(!!!vec_txt_cname_A) %>% 
    dplyr::mutate(
        Group = txt_assay,
        Label = "Loop_A"
    ) %>%
    dplyr::distinct()
colnames(dat) = vec_txt_cname
dat_region_A  = dat

### prepare point B
dat = dat_region_arrange
dat = dat %>% 
    dplyr::select(!!!vec_txt_cname_B)%>% 
    dplyr::mutate(
        Group = txt_assay,
        Label = "Loop_B"
    ) %>%
    dplyr::distinct()
colnames(dat) = vec_txt_cname
dat_region_B  = dat

In [8]:
dat = dat_region_A
fun_display_table(head(dat))

Chrom,ChromStart,ChromEnd,Name,Group,Label
chr10,102835000,102836000,chr10:102835000-102836000|chr10:102901000-102902000,hic_intact,Loop_A
chr10,123583000,123584000,chr10:123583000-123584000|chr10:123967000-123968000,hic_intact,Loop_A
chr10,60780000,60782000,chr10:60780000-60782000|chr10:60828000-60830000,hic_intact,Loop_A
chr10,33050000,33051000,chr10:33050000-33051000|chr10:33067000-33068000,hic_intact,Loop_A
chr10,11412000,11414000,chr10:11412000-11414000|chr10:11472000-11474000,hic_intact,Loop_A
chr10,45005000,45010000,chr10:45005000-45010000|chr10:45465000-45470000,hic_intact,Loop_A


In [9]:
dat = dat_region_B
fun_display_table(head(dat))

Chrom,ChromStart,ChromEnd,Name,Group,Label
chr10,102901000,102902000,chr10:102835000-102836000|chr10:102901000-102902000,hic_intact,Loop_B
chr10,123967000,123968000,chr10:123583000-123584000|chr10:123967000-123968000,hic_intact,Loop_B
chr10,60828000,60830000,chr10:60780000-60782000|chr10:60828000-60830000,hic_intact,Loop_B
chr10,33067000,33068000,chr10:33050000-33051000|chr10:33067000-33068000,hic_intact,Loop_B
chr10,11472000,11474000,chr10:11412000-11414000|chr10:11472000-11474000,hic_intact,Loop_B
chr10,45465000,45470000,chr10:45005000-45010000|chr10:45465000-45470000,hic_intact,Loop_B


## Save results

In [10]:
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder)
txt_fname = "K562.hg38.hic_intact.Loop_A.bed.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

### get table
dat = dat_region_A
dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)

### write tabel
dir.create(txt_fdiry, showWarnings = FALSE)
write_tsv(dat, txt_fpath, col_names = FALSE)

In [11]:
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder)
txt_fname = "K562.hg38.hic_intact.Loop_B.bed.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

### get table
dat = dat_region_B
dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)

### write tabel
dir.create(txt_fdiry, showWarnings = FALSE)
write_tsv(dat, txt_fpath, col_names = FALSE)

In [12]:
### set file path
txt_folder = TXT_FOLDER_REGION
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname = "K562.hg38.hic_intact.Loop.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

### get table
dat = dat_region_arrange
dat = dat %>% dplyr::arrange(
    Chrom_A, ChromStart_A, ChromEnd_A,
    Chrom_B, ChromStart_B, ChromEnd_B
)

### write tabel
dir.create(txt_fdiry, showWarnings = FALSE)
write_tsv(dat, txt_fpath)