**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


**Check data files**

In [2]:
FOLDER_A = "annotation_fcc_table"
FD_REG_A = file.path(
    FD_RES, 
    "results", 
    "region", 
    FOLDER_A)

FOLDER_B = "annotation_ccres_silencer"
FD_REG_B = file.path(
    FD_RES, 
    "results", 
    "region", 
    FOLDER_B)

FD_REG_C = file.path(
    FD_REG_A,
    FOLDER_B
)

FD_OUT = file.path(
    FD_REG_A,
    "summary"
)

In [3]:
fdiry  = FD_REG_A
fnames = dir(fdiry)
for(fname in fnames){print(fname)}

[1] "annotation_ccres"
[1] "annotation_ccres_silencer"
[1] "annotation_chromHMM"
[1] "annotation_tss_pol2"
[1] "description.tsv"
[1] "fcc_table.starrmpra.crispri.atac.concat.bed.gz"
[1] "fcc_table.starrmpra.crispri.atac.e2g_benchmark.concat.bed.gz"
[1] "fcc_table.starrmpra.crispri.atac.e2g_benchmark.e2g_prediction.concat.bed.gz"
[1] "fcc_table.starrmpra.crispri.atac.e2g_benchmark.e2g_prediction.merge.bed.gz"
[1] "fcc_table.starrmpra.crispri.atac.e2g_benchmark.e2g_prediction.merge.tsv"
[1] "fcc_table.starrmpra.crispri.atac.e2g_benchmark.merge.bed.gz"
[1] "fcc_table.starrmpra.crispri.atac.e2g_benchmark.merge.tsv"
[1] "fcc_table.starrmpra.crispri.atac.merge.bed.gz"
[1] "fcc_table.starrmpra.crispri.atac.merge.tsv"
[1] "summary"
[1] "z_summary"


In [4]:
fdiry  = FD_REG_B
fnames = dir(fdiry)
for(fname in fnames){print(fname)}

[1] "ccres_v4.silencer.rest.bed.gz"
[1] "ccres_v4.silencer.starr.bed.gz"
[1] "description.tsv"


In [5]:
fdiry  = FD_REG_C
fnames = dir(fdiry)
for(fname in fnames){print(fname)}

[1] "peak.annotation.ccres_v4.silencer.rest.bed.gz"
[1] "peak.annotation.ccres_v4.silencer.starr.bed.gz"


## Import data

In [6]:
### set file paths
fdiry = FD_REG_A
fname = "description.tsv"
fpath = file.path(fdiry, fname)

### read table
dat = read_tsv(fpath, show_col_types = FALSE)
vec = dat$Name
vec[1:3] = paste(vec[1:3], "A", sep="_")

### assign and show
vec_txt_cnames_reg_A = vec
print(vec)
dat

[1] "Chrom_A"     "Start_A"     "End_A"       "Assay_Type"  "Assay_Label"


Name,Description
<chr>,<chr>
Chrom,Chromosome
Start,Start
End,End
Assay_Type,Assay type
Assay_Label,Assay label


In [7]:
### set file paths
fdiry = FD_REG_B
fname = "description.tsv"
fpath = file.path(fdiry, fname)

### read table
dat = read_tsv(fpath, show_col_types = FALSE)
vec = dat$Name
vec[1:6] = paste(vec[1:6], "B", sep="_")

### assign and show
vec_txt_cnames_reg_B = vec
print(vec)
dat

[1] "Chrom_B"           "Start_B"           "End_B"            
[4] "Name_B"            "Score_B"           "Strand_B"         
[7] "Category_cCREs"    "Category_Silencer"


Name,Description
<chr>,<chr>
Chrom,Chromosome
Start,Start position
End,End position
Name,Name
Score,Score
Strand,[+-.]; Use '.' if no strand is assigned.
Category_cCREs,Label of cCREs type
Category_Silencer,Label of silencer


In [14]:
### init: set column names
vec_txt_cnames = c(
    vec_txt_cnames_reg_A,
    vec_txt_cnames_reg_B,
    "Overlap"
)

### init: set file path
fdiry  = FD_REG_C
fname  = paste("peak.annotation", "bed.gz", sep="*")
fglob  = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
fnames = basename(fpaths)

### init: annotation and label
annotations        = c("ccres_v4.silencer.rest", "ccres_v4.silencer.starr")
names(fnames)      = annotations
names(annotations) = annotations
print(fnames)

                          ccres_v4.silencer.rest 
 "peak.annotation.ccres_v4.silencer.rest.bed.gz" 
                         ccres_v4.silencer.starr 
"peak.annotation.ccres_v4.silencer.starr.bed.gz" 


In [15]:
### Import data
lst = lapply(annotations, function(annotation){
    ### set file path
    fname = fnames[annotation]
    fpath = file.path(fdiry, fname)
    
    ### read data
    dat = read_tsv(fpath, col_names = vec_txt_cnames, show_col_types = FALSE)
    dat = dat %>% dplyr::mutate(
        Annotation = annotation,
        Region_A   = paste0(Chrom_A, ":", Start_A, "-", End_A),
        Region_B   = paste0(Chrom_B, ":", Start_B, "-", End_B),
        Index_cCREs    = Name_B,
        Label_cCREs    = Category_cCREs,
        Label_Silencer = Category_Silencer
    )
    return(dat)
})

### assign and show
lst_peak_annot_import = lst
print(length(lst))
cat("=========================\n")
print(names(lst))
cat("=========================\n")
head(lst[[1]])

[1] 2
[1] "ccres_v4.silencer.rest"  "ccres_v4.silencer.starr"


Chrom_A,Start_A,End_A,Assay_Type,Assay_Label,Chrom_B,Start_B,End_B,Name_B,Score_B,Strand_B,Category_cCREs,Category_Silencer,Overlap,Annotation,Region_A,Region_B,Index_cCREs,Label_cCREs,Label_Silencer
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
chr1,1298873,1300610,"ASTARR,ATAC,CRISPRi-Growth","ASTARR_R,ATAC,CRISPRi-Growth",chr1,1298966,1299316,EH38E2777310,0,.,dELS,REST+ enhancer/silencer,350,ccres_v4.silencer.rest,chr1:1298873-1300610,chr1:1298966-1299316,EH38E2777310,dELS,REST+ enhancer/silencer
chr1,1334703,1335568,ATAC,ATAC,chr1,1334920,1335266,EH38E2777380,0,.,dELS,REST+ enhancer/silencer,346,ccres_v4.silencer.rest,chr1:1334703-1335568,chr1:1334920-1335266,EH38E2777380,dELS,REST+ enhancer/silencer
chr1,1374415,1376654,"ATAC,WSTARR","ATAC,WSTARR_A,WSTARR_AB",chr1,1375250,1375598,EH38E2777455,0,.,pELS,REST+ Non-silencer,348,ccres_v4.silencer.rest,chr1:1374415-1376654,chr1:1375250-1375598,EH38E2777455,pELS,REST+ Non-silencer
chr1,1406792,1408163,"ASTARR,ATAC,LMPRA,WSTARR","ASTARR_R,ATAC,LMPRA_A,LMPRA_AB,WSTARR_A,WSTARR_AB",chr1,1407146,1407496,EH38E2777492,0,.,PLS,REST+ Non-silencer,350,ccres_v4.silencer.rest,chr1:1406792-1408163,chr1:1407146-1407496,EH38E2777492,PLS,REST+ Non-silencer
chr1,1432984,1434718,"ASTARR,ATAC,WSTARR","ASTARR_R,ATAC,WSTARR_A",chr1,1433204,1433554,EH38E2777541,0,.,pELS,REST+ Non-silencer,350,ccres_v4.silencer.rest,chr1:1432984-1434718,chr1:1433204-1433554,EH38E2777541,pELS,REST+ Non-silencer
chr1,1470698,1473196,"ASTARR,ATAC,LMPRA,WSTARR","ASTARR_R,ATAC,LMPRA_A,LMPRA_AB,WSTARR_A,WSTARR_AB",chr1,1472896,1473193,EH38E3951566,0,.,pELS,REST+ Non-silencer,297,ccres_v4.silencer.rest,chr1:1470698-1473196,chr1:1472896-1473193,EH38E3951566,pELS,REST+ Non-silencer


In [16]:
dat = lst[[1]]
table(dat$Score_B)


   0 
4325 

## Arrange the columns

In [17]:
### init
lst = lst_peak_annot_import

lst = lapply(lst, function(dat){
    ### Select
    dat = dat %>%
        dplyr::select(
            Chrom_A, Start_A, End_A, Region_A, 
            Assay_Type, Assay_Label,
            Annotation,
            Chrom_B, Start_B, End_B, Region_B,
            Index_cCREs,
            Label_cCREs,
            Label_Silencer)

    ### Arrange
    dat = dat %>% 
        dplyr::arrange(Chrom_A,  Start_A,  End_A)
    return(dat)
})

### assign and show
lst_peak_annot_arrange = lst
print(length(lst))
cat("=========================\n")
print(names(lst))
cat("=========================\n")
head(lst[[1]])

[1] 2
[1] "ccres_v4.silencer.rest"  "ccres_v4.silencer.starr"


Chrom_A,Start_A,End_A,Region_A,Assay_Type,Assay_Label,Annotation,Chrom_B,Start_B,End_B,Region_B,Index_cCREs,Label_cCREs,Label_Silencer
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
chr1,1298873,1300610,chr1:1298873-1300610,"ASTARR,ATAC,CRISPRi-Growth","ASTARR_R,ATAC,CRISPRi-Growth",ccres_v4.silencer.rest,chr1,1298966,1299316,chr1:1298966-1299316,EH38E2777310,dELS,REST+ enhancer/silencer
chr1,1334703,1335568,chr1:1334703-1335568,ATAC,ATAC,ccres_v4.silencer.rest,chr1,1334920,1335266,chr1:1334920-1335266,EH38E2777380,dELS,REST+ enhancer/silencer
chr1,1374415,1376654,chr1:1374415-1376654,"ATAC,WSTARR","ATAC,WSTARR_A,WSTARR_AB",ccres_v4.silencer.rest,chr1,1375250,1375598,chr1:1375250-1375598,EH38E2777455,pELS,REST+ Non-silencer
chr1,1406792,1408163,chr1:1406792-1408163,"ASTARR,ATAC,LMPRA,WSTARR","ASTARR_R,ATAC,LMPRA_A,LMPRA_AB,WSTARR_A,WSTARR_AB",ccres_v4.silencer.rest,chr1,1407146,1407496,chr1:1407146-1407496,EH38E2777492,PLS,REST+ Non-silencer
chr1,1432984,1434718,chr1:1432984-1434718,"ASTARR,ATAC,WSTARR","ASTARR_R,ATAC,WSTARR_A",ccres_v4.silencer.rest,chr1,1433204,1433554,chr1:1433204-1433554,EH38E2777541,pELS,REST+ Non-silencer
chr1,1470698,1473196,chr1:1470698-1473196,"ASTARR,ATAC,LMPRA,WSTARR","ASTARR_R,ATAC,LMPRA_A,LMPRA_AB,WSTARR_A,WSTARR_AB",ccres_v4.silencer.rest,chr1,1472896,1473193,chr1:1472896-1473193,EH38E3951566,pELS,REST+ Non-silencer


## Save results

In [14]:
FD_OUT

In [18]:
lst = lst_peak_annot_arrange
for (idn in names(lst)){
    ### set file path
    fdiry = FD_OUT
    fname = paste("peak.annotation", idn, "tsv", sep=".")
    fpath = file.path(fdiry, fname)
    print(fname)
    flush.console()

    ### write table
    dat = lst[[idn]]
    write_tsv(dat, fpath)
}

[1] "peak.annotation.ccres_v4.silencer.rest.tsv"
[1] "peak.annotation.ccres_v4.silencer.starr.tsv"
