**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
suppressMessages(suppressWarnings(source("../config/config_func.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


**Check data**

In [2]:
fdiry = file.path(FD_RES, "source")
fglob = file.path(fdiry, "hic_intact*")
Sys.glob(fglob)

In [3]:
FOLDER="hic_intact_K562_ENCSR479XDG"
fdiry = file.path(FD_RES, "source", FOLDER)
for (fname in dir(fdiry)) {print(fname)}

[1] "ENCFF621AIY.hic"
[1] "K562.ENCSR479XDG.ENCFF126GED.contact_domains.bedpe.gz"
[1] "K562.ENCSR479XDG.ENCFF256ZMD.loops.bedpe.gz"


## Import

In [4]:
fdiry = file.path(FD_RES, "source", FOLDER)
fname = "K562.ENCSR479XDG.ENCFF256ZMD.loops.bedpe.gz"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, col_names = FALSE, comment = "#")

dat_hic_loop = dat
print(dim(dat))
head(dat)

[1mRows: [22m[34m46004[39m [1mColumns: [22m[34m33[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m  (7): X1, X4, X7, X8, X9, X10, X11
[32mdbl[39m (26): X2, X3, X5, X6, X12, X13, X14, X15, X16, X17, X18, X19, X20, X21, ...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] 46004    33


X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,⋯,X24,X25,X26,X27,X28,X29,X30,X31,X32,X33
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr10,102835000,102836000,chr10,102901000,102902000,.,.,.,.,⋯,500,102834600.0,102835200.0,102901400.0,102901700.0,102834700.0,102901500.0,4.0,0.0002171732,0.0
chr10,123583000,123584000,chr10,123967000,123968000,.,.,.,.,⋯,500,,,,,,,,,
chr10,60780000,60782000,chr10,60828000,60830000,.,.,.,.,⋯,0,,,,,,,,,
chr10,33050000,33051000,chr10,33067000,33068000,.,.,.,.,⋯,0,,,,,,,,,
chr10,11412000,11414000,chr10,11472000,11474000,.,.,.,.,⋯,0,11412000.0,11412500.0,11471700.0,11472700.0,11412200.0,11472100.0,10.0,0.0088614312,0.0
chr10,45005000,45010000,chr10,45465000,45470000,.,.,.,.,⋯,2500,,,,,,,,,


## Arrange

**Loop distance and size**

In [5]:
dat = dat_hic_loop
dat = dat[,1:6]

cnames = c("Chrom_A", "Start_A", "End_A", "Chrom_B", "Start_B", "End_B")
colnames(dat) = cnames

dat = dat %>%
    dplyr::mutate(
        Loop_A    = paste0(Chrom_A, ":", Start_A, "-", End_A),
        Loop_B    = paste0(Chrom_B, ":", Start_B, "-", End_B),
        Length_A  = End_A - Start_A,
        Length_B  = End_B - Start_B,
        Distance = abs((Start_A + End_A) / 2 - (Start_B + End_B) / 2)
    ) %>%
    dplyr::mutate(
        Loop = paste0(Loop_A, "|", Loop_B)
    ) %>%
    dplyr::distinct()

dat_hic_loop_info = dat
print(dim(dat))
head(dat)

[1] 46004    12


Chrom_A,Start_A,End_A,Chrom_B,Start_B,End_B,Loop_A,Loop_B,Length_A,Length_B,Distance,Loop
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>
chr10,102835000,102836000,chr10,102901000,102902000,chr10:102835000-102836000,chr10:102901000-102902000,1000,1000,66000,chr10:102835000-102836000|chr10:102901000-102902000
chr10,123583000,123584000,chr10,123967000,123968000,chr10:123583000-123584000,chr10:123967000-123968000,1000,1000,384000,chr10:123583000-123584000|chr10:123967000-123968000
chr10,60780000,60782000,chr10,60828000,60830000,chr10:60780000-60782000,chr10:60828000-60830000,2000,2000,48000,chr10:60780000-60782000|chr10:60828000-60830000
chr10,33050000,33051000,chr10,33067000,33068000,chr10:33050000-33051000,chr10:33067000-33068000,1000,1000,17000,chr10:33050000-33051000|chr10:33067000-33068000
chr10,11412000,11414000,chr10,11472000,11474000,chr10:11412000-11414000,chr10:11472000-11474000,2000,2000,60000,chr10:11412000-11414000|chr10:11472000-11474000
chr10,45005000,45010000,chr10,45465000,45470000,chr10:45005000-45010000,chr10:45465000-45470000,5000,5000,460000,chr10:45005000-45010000|chr10:45465000-45470000


## Save results for loop annotation

In [11]:
dat_loop_A = dat_hic_loop_info %>% dplyr::select(Chrom_A, Start_A, End_A) %>% dplyr::distinct() %>% dplyr::arrange(Chrom_A, Start_A, End_A)
dat_loop_B = dat_hic_loop_info %>% dplyr::select(Chrom_B, Start_B, End_B) %>% dplyr::distinct() %>% dplyr::arrange(Chrom_B, Start_B, End_B)

In [12]:
head(dat_loop_A)

Chrom_A,Start_A,End_A
<chr>,<dbl>,<dbl>
chr1,770000,780000
chr1,904000,906000
chr1,920000,925000
chr1,980000,990000
chr1,984000,985000
chr1,1000000,1005000


In [13]:
head(dat_loop_B)

Chrom_B,Start_B,End_B
<chr>,<dbl>,<dbl>
chr1,840000,850000
chr1,976000,978000
chr1,1060000,1065000
chr1,1062000,1063000
chr1,1063000,1064000
chr1,1116000,1117000


In [14]:
fdiry = file.path(FD_RES, "results", "region")
fglob = file.path(fdiry, "hic_intact*")
Sys.glob(fglob)

In [15]:
###
FOLDER="hic_intact_ENCSR479XDG"
fdiry = file.path(FD_RES, "results", "region", FOLDER)

###
fname = "hic_intact.ENCSR479XDG.Loop_A.bed.gz"
fpath = file.path(fdiry, fname)

dat = dat_loop_A
write_tsv(dat, fpath, col_names = FALSE)

###
fname = "hic_intact.ENCSR479XDG.Loop_B.bed.gz"
fpath = file.path(fdiry, fname)

dat = dat_loop_B
write_tsv(dat, fpath, col_names = FALSE)

###
fname = "hic_intact.ENCSR479XDG.Loop_info.tsv"
fpath = file.path(fdiry, fname)

dat = dat_hic_loop_info
write_tsv(dat, fpath)