**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 



**Set global variables**

In [2]:
TXT_REGION_FOLDER = "genome_cres"

**Check input files**

In [3]:
txt_folder = "genome_cres"
txt_fdiry  = file.path(FD_REF, txt_folder)

vec = dir(txt_fdiry)
for (txt in vec){cat(txt, "\n")}

K562.hg38.label_cres.tsv 


## Import data

In [4]:
### set file path
txt_folder = TXT_REGION_FOLDER
txt_fdiry  = file.path(FD_REF, txt_folder)
txt_fname = "K562.hg38.label_cres.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### assign and show
dat_region_cres_import = dat
print(dim(dat))
fun_display_table(head(dat))

[1] 27 11


Chrom,ChromStart,ChromEnd,Region,Name,Type,Target,Target_TSS,Description,Reference,Note
chrX,48785794,48786722,"chrX:48,785,794-48,786,722",GATA1 promoter,promoter,GATA1,GATA1|chrX:48786589-48786590,GATA1 promoter,"Fulco et al., 2016; Reilly et al., 2021",known CREs
chrX,48782614,48783539,"chrX:48,782,614-48,783,539",GATA1 enhancer,enhancer,GATA1,GATA1|chrX:48786589-48786590,GATA1 Hematopoietic Enhancer,"Fulco et al., 2016; Reilly et al., 2021",known CREs
chrX,48801698,48802997,"chrX:48,801,698-48,802,997",HDAC6 promoter/enhancer,"promoter,enhancer",HDAC6,HDAC6|chrX:48802066-48802067,HDAC6 promoter and enhancer,"Fulco et al., 2016; Reilly et al., 2021",known CREs
chr8,127735050,127736592,"chr8:127,735,050-127,736,592",MYC promoter,promoter,MYC,MYC|chr8:127736230-127736231,MYC promoter,"Fulco et al., 2016; Reilly et al., 2021",known CREs
chr8,127898376,127899697,"chr8:127,898,376-127,899,697",MYC E1,enhancer,MYC,MYC|chr8:127736230-127736231,intragenic enhancer within PVT1,"Fulco et al., 2016; Reilly et al., 2021",known CREs
chr8,127959692,127960934,"chr8:127,959,692-127,960,934",MYC E2,enhancer,MYC,MYC|chr8:127736230-127736231,intragenic enhancer within PVT1,"Fulco et al., 2016; Reilly et al., 2021",known CREs


## Arrange table

In [5]:
### init
dat = dat_region_cres_import

### arrange location column type and region column
dat = dat %>%
    dplyr::mutate(
        Chrom      = as.character(Chrom),
        ChromStart = as.integer(ChromStart),
        ChromEnd   = as.integer(ChromEnd)
    ) %>%
    dplyr::mutate(Region = fun_gen_region(Chrom, ChromStart, ChromEnd))

### add group and label
dat = dat %>%
    dplyr::mutate(
        Group = "genome_cres",
        Label = Name
    )

### assign and show
dat_region_cres_arrange = dat
print(dim(dat))
fun_display_table(head(dat))

[1] 27 13


Chrom,ChromStart,ChromEnd,Region,Name,Type,Target,Target_TSS,Description,Reference,Note,Group,Label
chrX,48785794,48786722,chrX:48785794-48786722,GATA1 promoter,promoter,GATA1,GATA1|chrX:48786589-48786590,GATA1 promoter,"Fulco et al., 2016; Reilly et al., 2021",known CREs,genome_cres,GATA1 promoter
chrX,48782614,48783539,chrX:48782614-48783539,GATA1 enhancer,enhancer,GATA1,GATA1|chrX:48786589-48786590,GATA1 Hematopoietic Enhancer,"Fulco et al., 2016; Reilly et al., 2021",known CREs,genome_cres,GATA1 enhancer
chrX,48801698,48802997,chrX:48801698-48802997,HDAC6 promoter/enhancer,"promoter,enhancer",HDAC6,HDAC6|chrX:48802066-48802067,HDAC6 promoter and enhancer,"Fulco et al., 2016; Reilly et al., 2021",known CREs,genome_cres,HDAC6 promoter/enhancer
chr8,127735050,127736592,chr8:127735050-127736592,MYC promoter,promoter,MYC,MYC|chr8:127736230-127736231,MYC promoter,"Fulco et al., 2016; Reilly et al., 2021",known CREs,genome_cres,MYC promoter
chr8,127898376,127899697,chr8:127898376-127899697,MYC E1,enhancer,MYC,MYC|chr8:127736230-127736231,intragenic enhancer within PVT1,"Fulco et al., 2016; Reilly et al., 2021",known CREs,genome_cres,MYC E1
chr8,127959692,127960934,chr8:127959692-127960934,MYC E2,enhancer,MYC,MYC|chr8:127736230-127736231,intragenic enhancer within PVT1,"Fulco et al., 2016; Reilly et al., 2021",known CREs,genome_cres,MYC E2


## Define column description

In [6]:
### setup column description
dat = tribble(
    ~Name,              ~Note,
    "Chrom",            "Name of the chromosome",
    "ChromStart",       "The starting position of the feature in the chromosome",
    "ChromEnd",         "The ending position of the feature in the chromosome",
    "Region",           "Region location",
    "Name",             "Name given to a region; Use '.' if no name is assigned.",
    "Type",             "promoter, enhancer, or silencer",
    "Target",           "Gene",
    "Target_TSS",       "TSS of the Target used in this study",
    "Description",      "Summary of the CREs annotated",
    "Reference",        "Previous studies cited",
    "Note",             "Note",
    "Group",            "Region group",
    "Label",            "Region label"
)

### assign and show
dat_cname = dat
fun_display_table(dat)

Name,Note
Chrom,Name of the chromosome
ChromStart,The starting position of the feature in the chromosome
ChromEnd,The ending position of the feature in the chromosome
Region,Region location
Name,Name given to a region; Use '.' if no name is assigned.
Type,"promoter, enhancer, or silencer"
Target,Gene
Target_TSS,TSS of the Target used in this study
Description,Summary of the CREs annotated
Reference,Previous studies cited


## Export

In [7]:
### set directory
txt_folder = TXT_REGION_FOLDER
txt_fdiry  = file.path(FD_RES, "region", txt_folder)
txt_cmd    = paste("mkdir -p", txt_fdiry)
system(txt_cmd)

### write table
txt_fname = "K562.hg38.label_cres.bed.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = dat_region_cres_arrange
write_tsv(dat, txt_fpath, col_names = FALSE)

In [8]:
### set directory
txt_folder = TXT_REGION_FOLDER
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
dir.create(txt_fdiry, showWarnings = FALSE)

### write table
txt_fname = "K562.hg38.label_cres.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = dat_region_cres_arrange
write_tsv(dat, txt_fpath, col_names = TRUE)

In [9]:
### set directory
txt_folder = TXT_REGION_FOLDER
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
dir.create(txt_fdiry, showWarnings = FALSE)

### write table
txt_fname  = "description.tsv"
txt_fpath  = file.path(txt_fdiry, txt_fname)

dat = dat_cname
write_tsv(dat, txt_fpath)

## Check: Target TSS

In [10]:
### set file path
txt_folder = "genome_tss"
txt_fdiry  = file.path(FD_RES, "region", txt_folder, "summary")
txt_fname = "K562.hg38.TSS.selected_by_highest_Pol2_signal.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read table
dat = read_tsv(txt_fpath, show_col_types = FALSE)

### assign and show
dat_region_tss = dat
print(dim(dat))
fun_display_table(head(dat))

[1] 29330     8


Chrom,ChromStart,ChromEnd,Region,Gene,Score,Group,Label
chr1,11873,11874,chr1:11873-11874,DDX11L1,0.00023,TSS_Pol2,DDX11L1
chr1,17436,17437,chr1:17436-17437,MIR6859-1,9.43812,TSS_Pol2,MIR6859-1
chr1,17436,17437,chr1:17436-17437,MIR6859-2,9.43812,TSS_Pol2,MIR6859-2
chr1,17436,17437,chr1:17436-17437,MIR6859-3,9.43812,TSS_Pol2,MIR6859-3
chr1,17436,17437,chr1:17436-17437,MIR6859-4,9.43812,TSS_Pol2,MIR6859-4
chr1,29370,29371,chr1:29370-29371,WASH7P,0.00023,TSS_Pol2,WASH7P


In [11]:
vec_txt_target = c(
    "GATA1", "HDAC6", 
    "MYC",   "HBE1",
    "FADS1", "FADS2", "FADS3", 
    "LMO2", 
    "MYB", 
    "SYN1", "DRD2"
)
dat = dat_region_tss
dat = dat %>% 
    dplyr::filter(Gene %in% vec_txt_target) %>%
    dplyr::mutate(Gene = factor(Gene, levels = vec_txt_target)) %>%
    dplyr::arrange(Gene)

fun_display_table(dat)

Chrom,ChromStart,ChromEnd,Region,Gene,Score,Group,Label
chrX,48786589,48786590,chrX:48786589-48786590,GATA1,70.4208,TSS_Pol2,GATA1
chrX,48802066,48802067,chrX:48802066-48802067,HDAC6,183.3,TSS_Pol2,HDAC6
chr8,127736230,127736231,chr8:127736230-127736231,MYC,272.781,TSS_Pol2,MYC
chr11,5269945,5269946,chr11:5269945-5269946,HBE1,319.925,TSS_Pol2,HBE1
chr11,61817003,61817004,chr11:61817003-61817004,FADS1,158.661,TSS_Pol2,FADS1
chr11,61816202,61816203,chr11:61816202-61816203,FADS2,418.267,TSS_Pol2,FADS2
chr11,61891545,61891546,chr11:61891545-61891546,FADS3,8.24971,TSS_Pol2,FADS3
chr11,33869878,33869879,chr11:33869878-33869879,LMO2,8.55278,TSS_Pol2,LMO2
chr6,135181307,135181308,chr6:135181307-135181308,MYB,15.1013,TSS_Pol2,MYB
chrX,47619857,47619858,chrX:47619857-47619858,SYN1,0.627251,TSS_Pol2,SYN1
