**Set environment**

In [3]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 



## Prepare

**Set global variable**

In [4]:
vec = c(
    "fcc_astarr_macs_input_overlap",
    "fcc_astarr_macs_input_union"
)
names(vec) = vec

VEC_TXT_FOLDER = vec
for(txt in vec){cat(txt, "\n")}

fcc_astarr_macs_input_overlap 
fcc_astarr_macs_input_union 


In [5]:
TXT_FNAME_ANNOT = "region.annotation.encode_e2g_benchmark.tsv"

**View files**

In [6]:
txt_fdiry = file.path(FD_RES, "region_annotation", "*", "summary")
txt_fname = TXT_FNAME_ANNOT
txt_fglob = file.path(txt_fdiry, txt_fname)

vec = Sys.glob(txt_fglob)
for(txt in vec){cat(txt, "\n")}

/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary/region.annotation.encode_e2g_benchmark.tsv 
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary/region.annotation.encode_e2g_benchmark.tsv 


## Import data

**Known or tested CREs**

In [16]:
lst = lapply(VEC_TXT_FOLDER, function(txt_folder){
    ### set file directory
    txt_fdiry  = file.path(FD_RES, "region_annotation", txt_folder, "summary")
    txt_fname  = "region.annotation.genome_cres.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)

    ### read table
    dat = read_tsv(txt_fpath, show_col_types = FALSE)
    return(dat)
})

### assign and show
lst_dat_region_annot_cres_import = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
head(dat, 3)

$fcc_astarr_macs_input_overlap
[1] 27 12

$fcc_astarr_macs_input_union
[1] 27 12



Chrom,ChromStart,ChromEnd,Region,Region_CRE,Group,Label,Type,Target,Description,Reference,Note
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
chr11,5269503,5270633,chr11:5269503-5270633,chr11:5269504-5270633,genome_cres,HBE1 promoter,promoter,HBE1,HBE1 promoter,,known CREs
chr11,5275455,5276164,chr11:5275455-5276164,chr11:5275456-5276164,genome_cres,LCR HS1,enhancer,HBE1,locus control region HS1,,known CREs
chr11,5280584,5281266,chr11:5280584-5281266,chr11:5280585-5281266,genome_cres,LCR HS2,enhancer,HBE1,locus control region HS2,,known CREs


## E2G

**Read region annotation**

In [7]:
### loop to import data
lst = lapply(VEC_TXT_FOLDER, function(txt_folder){
    ### set file directory
    txt_fdiry = file.path(FD_RES, "region_annotation", txt_folder, "summary")
    txt_fname = TXT_FNAME_ANNOT
    txt_fpath = file.path(txt_fdiry, txt_fname)

    ### read table
    dat = read_tsv(txt_fpath, show_col_types = FALSE)
    return(dat)
})
names(lst) = VEC_TXT_FOLDER

### assign and show
lst_dat_region_annot_import = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 8770   12

$fcc_astarr_macs_input_union
[1] 9229   12



Chrom,ChromStart,ChromEnd,Region,Group,Label,Target,Score,Zcore,NLog10P,Regulated,Source
chr1,3774056,3776283,chr1:3774056-3776283,E2G-Benchmark,Regulated:TRUE,CEP104,-0.2934319,-4.705144,2.395344,True,Ulirsch2016
chr1,3774056,3776283,chr1:3774056-3776283,E2G-Benchmark,Regulated:TRUE,LRRC47,-0.3311781,-5.331209,2.109514,True,Ulirsch2016
chr1,3774056,3776283,chr1:3774056-3776283,E2G-Benchmark,Regulated:TRUE,SMIM1,-0.4720192,-7.667223,3.192703,True,Ulirsch2016


## Arrange table

In [8]:
lst = lst_dat_region_annot_import
lst = lapply(lst, function(dat){
    ### set score and sign
    dat = dat %>% 
        dplyr::mutate(Score = Zcore) %>%
        dplyr::mutate(Sign  = ifelse(Score > 0, "pos", "neg"))

    ### select the columns
    dat = dat %>%
        dplyr::select(Chrom, ChromStart, ChromEnd, Region, Score, Target, Sign) %>%
        dplyr::distinct()
    return(dat)
})

### assign and show
lst_dat_region_score_arrange = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
head(dat, 3)

$fcc_astarr_macs_input_overlap
[1] 8770    7

$fcc_astarr_macs_input_union
[1] 9229    7



Chrom,ChromStart,ChromEnd,Region,Score,Target,Sign
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<chr>
chr1,3774056,3776283,chr1:3774056-3776283,-4.705144,CEP104,neg
chr1,3774056,3776283,chr1:3774056-3776283,-5.331209,LRRC47,neg
chr1,3774056,3776283,chr1:3774056-3776283,-7.667223,SMIM1,neg


## Check regions: HS2

In [9]:
TXT_REGION_LCR_HS2 = "chr11:5280584-5281266"

In [11]:
idx = "fcc_astarr_macs_input_overlap"
#lst = lst_dat_region_annot_import
lst = lst_dat_region_score_arrange
dat = lst[[idx]]

dat = dat %>% dplyr::filter(Region == TXT_REGION_LCR_HS2)
fun_display_table(dat)

Chrom,ChromStart,ChromEnd,Region,Score,Target,Sign
chr11,5280584,5281266,chr11:5280584-5281266,-14.1819818,HBE1,neg
chr11,5280584,5281266,chr11:5280584-5281266,-12.0884764,HBG2,neg
chr11,5280584,5281266,chr11:5280584-5281266,-7.6439811,HBG1,neg
chr11,5280584,5281266,chr11:5280584-5281266,-0.1321715,HBB,neg
chr11,5280584,5281266,chr11:5280584-5281266,-0.9829265,HBD,neg


## CRISPRi-HCRFF

In [12]:
txt_assay = "CRISPRi_FlowFISH_K562_Riley_JinWoo"

lst = lapply(VEC_TXT_FOLDER, function(txt_folder){
    ### set file directory
    txt_assay = "CRISPRi_FlowFISH_K562_Riley_JinWoo"
    txt_fdiry  = file.path(FD_RES, "region_coverage_fcc_score", txt_folder, txt_assay, "summary")
    txt_fname = "region.coverage.score.filtered.target.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)

    ### read table
    dat = read_tsv(txt_fpath, show_col_types = FALSE)
    return(dat)
})

### assign and show
lst_dat_region_score_chcrff_import = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
head(dat, 3)

$fcc_astarr_macs_input_overlap
[1] 2304   12

$fcc_astarr_macs_input_union
[1] 3084   12



Chrom,ChromStart,ChromEnd,Region,Target,Mean,Median,Max,Min,Count,Mean_Neg,Mean_Pos
<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr11,4092109,4092511,chr11:4092109-4092511,HBE1,-0.1137124,-0.1415565,1.21353,-2.0557,64,-0.5895217,0.581701
chr11,4092109,4092511,chr11:4092109-4092511,HBG1,-0.151743,-0.150858,1.47841,-1.53233,64,-0.5163575,0.498222
chr11,4092109,4092511,chr11:4092109-4092511,HBG2,-0.2425928,-0.248386,0.72989,-1.95678,64,-0.495156,0.3130464


In [15]:
lst = lst_dat_region_score_chcrff_import
lst = lapply(lst, function(dat){
    ### set score
    dat = dat %>% dplyr::mutate(Score = Mean)

    ### select the columns
    dat = dat %>%
        dplyr::select(Chrom, ChromStart, ChromEnd, Region, Score, Target) %>%
        dplyr::distinct()
    return(dat)
})

### assign and show
lst_dat_region_score_arrange = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
head(dat, 3)

$fcc_astarr_macs_input_overlap
[1] 2304    6

$fcc_astarr_macs_input_union
[1] 3084    6



Chrom,ChromStart,ChromEnd,Region,Score,Target
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>
chr11,4092109,4092511,chr11:4092109-4092511,-0.1137124,HBE1
chr11,4092109,4092511,chr11:4092109-4092511,-0.151743,HBG1
chr11,4092109,4092511,chr11:4092109-4092511,-0.2425928,HBG2


In [17]:
idx = "fcc_astarr_macs_input_overlap"
lst = lst_dat_region_score_arrange
dat = lst[[idx]]

dat = dat %>% dplyr::filter(Region == TXT_REGION_LCR_HS2)
fun_display_table(dat)

Chrom,ChromStart,ChromEnd,Region,Score,Target
chr11,5280584,5281266,chr11:5280584-5281266,2.2706102,HBE1
chr11,5280584,5281266,chr11:5280584-5281266,4.028531,HBG1
chr11,5280584,5281266,chr11:5280584-5281266,4.6146099,HBG2
chr11,5280584,5281266,chr11:5280584-5281266,-0.140513,HBS1L
chr11,5280584,5281266,chr11:5280584-5281266,0.0163336,MYB


In [7]:
lst = lst_dat_region_score_import
dat = lst[[1]]
dat %>% dplyr::filter(Region == "chrX:48782613-48783539")

Chrom,ChromStart,ChromEnd,Region,Score,Zcore,Score_Label,Zcore_Label,Assay_Name,Assay_Type,Assay_Group,Assay_Label
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
chrX,48782613,48783539,chrX:48782613-48783539,1.7147966,1.032789,Log2FC,Scale(Log2FC),MPRA_Tiling_K562_Tewhey_Hannah,TMPRA,TMPRA,Tiling-MPRA
chrX,48782613,48783539,chrX:48782613-48783539,1.5149754,2.387557,Log2FC,Scale(Log2FC),STARR_ATAC_K562_Reddy_KS91,ASTARR,ASTARR_KS91,ATAC-STARR
chrX,48782613,48783539,chrX:48782613-48783539,0.7122563,1.782385,Log2FC,Scale(Log2FC),STARR_WHG_K562_Reddy_A001,WSTARR,WSTARR,WHG-STARR
chrX,48782613,48783539,chrX:48782613-48783539,1.5321586,1.532159,Mean(ZScore),Mean(ZScore),CRISPRi_FlowFISH_K562_Riley_JinWoo,CRISPRi-HCRFF,CRISPRi-HCRFF,CRISPRi-HCR FlowFISH
chrX,48782613,48783539,chrX:48782613-48783539,2.492208,2.492208,Mean(ZScore),Mean(ZScore),CRISPRi_Growth_K562_Gersbach_JinWoo,CRISPRi-Growth,CRISPRi-Growth,CRISPRi-Growth


In [None]:
"chrX:48782613-48783539"
chrX:48782524-48783758