**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 



## Prepare

**Set global variable**

In [2]:
vec = c(
    "fcc_astarr_macs_input_overlap",
    "fcc_astarr_macs_input_union"
)
names(vec) = vec

VEC_TXT_FOLDER = vec
for(txt in vec){cat(txt, "\n")}

fcc_astarr_macs_input_overlap 
fcc_astarr_macs_input_union 


In [3]:
TXT_FNAME_ANNOT = "region.intersect.summary.fcc_crispri_growth.tsv"

**View files**

In [4]:
txt_fdiry = file.path(FD_RES, "region_annotation", "*", "summary")
txt_fname = TXT_FNAME_ANNOT
txt_fglob = file.path(txt_fdiry, txt_fname)

vec = Sys.glob(txt_fglob)
for(txt in vec){cat(txt, "\n")}

/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary/region.intersect.summary.fcc_crispri_growth.tsv 
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary/region.intersect.summary.fcc_crispri_growth.tsv 


## Import data

**Import region pairs**

In [5]:
### loop to import data
lst = lapply(VEC_TXT_FOLDER, function(txt_folder){
    ### set file directory
    txt_fdiry = file.path(FD_RES, "region_annotation", txt_folder, "summary")
    txt_fname = TXT_FNAME_ANNOT
    txt_fpath = file.path(txt_fdiry, txt_fname)

    ### read table
    dat = read_tsv(txt_fpath, show_col_types = FALSE)
    return(dat)
})
names(lst) = VEC_TXT_FOLDER

### assign and show
lst_dat_region_annot_import = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 77623    10

$fcc_astarr_macs_input_union
[1] 85677    10



Chrom,ChromStart,ChromEnd,Region,Annotation_A,Annotation_B,Group,Label,Region_Annot,Region_Count
chr1,605104,605675,chr1:605104-605675,fcc_astarr_macs_input_overlap,fcc_crispri_growth_signif,CRISPRi-Growth,Signif,chr1:605550-605627,1
chr1,605104,605675,chr1:605104-605675,fcc_astarr_macs_input_overlap,fcc_crispri_growth_total,CRISPRi-Growth,Total,chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627;chr1:605550-605627,10
chr1,778233,779389,chr1:778233-779389,fcc_astarr_macs_input_overlap,fcc_crispri_growth_total,CRISPRi-Growth,Total,chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153;chr1:778440-779153,10


**Explore: Group**

In [6]:
lst = lst_dat_region_annot_import
lst = lapply(lst, function(dat){
    res = table(dat$Group, dnn = "Group")
    dat = as.data.frame(res)
    return(dat)
})

dat = bind_rows(lst, .id = "Region")
dat = dat %>% tidyr::spread(Group, Freq)
fun_display_table(dat)

Region,CRISPRi-Growth
fcc_astarr_macs_input_overlap,77623
fcc_astarr_macs_input_union,85677


**Explore: Group x Label**

In [7]:
lst = lst_dat_region_annot_import
lst = lapply(lst, function(dat){
    res = table(dat$Group, dat$Label, dnn = c("Group", "Label"))
    dat = as.data.frame(res)
    return(dat)
})

dat = bind_rows(lst, .id = "Region")
dat = dat %>% tidyr::spread(Label, Freq)
fun_display_table(dat)

Region,Group,Signif,Total
fcc_astarr_macs_input_overlap,CRISPRi-Growth,4380,73243
fcc_astarr_macs_input_union,CRISPRi-Growth,4907,80770


**Explore: Group x Region Count**

In [8]:
lst = lst_dat_region_annot_import
lst = lapply(lst, function(dat){
    res = table(dat$Group, dat$Region_Count, dnn = c("Group", "Region_Count"))
    dat = as.data.frame(res)
    return(dat)
})

dat = bind_rows(lst, .id = "Region")
dat = dat %>% tidyr::spread(Region_Count, Freq)
fun_display_table(dat)

Region,Group,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,35,37,38,39,40,50,80
fcc_astarr_macs_input_overlap,CRISPRi-Growth,4445,190,185,214,268,286,325,347,428,64860,23,22,28,38,46,57,49,54,64,5190,4,2,2,4,14,4,5,8,6,416,1,1,1,3,2,28,3,
fcc_astarr_macs_input_union,CRISPRi-Growth,5006,249,247,262,374,399,424,462,550,69416,32,26,42,57,62,72,80,73,88,6853,9,3,3,5,20,8,9,9,14,729,1,1,2,4,3,77,5,1.0


## Arrange table

In [9]:
### arrange tables
lst = lst_dat_region_annot_import
lst = lapply(lst, function(dat){
    dat = dat %>% dplyr::filter(Label == "Signif")
    return(dat)
})

### assign and show
lst_dat_region_annot_arrange = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 4380   10

$fcc_astarr_macs_input_union
[1] 4907   10



Chrom,ChromStart,ChromEnd,Region,Annotation_A,Annotation_B,Group,Label,Region_Annot,Region_Count
chr1,605104,605675,chr1:605104-605675,fcc_astarr_macs_input_overlap,fcc_crispri_growth_signif,CRISPRi-Growth,Signif,chr1:605550-605627,1
chr1,826796,828040,chr1:826796-828040,fcc_astarr_macs_input_overlap,fcc_crispri_growth_signif,CRISPRi-Growth,Signif,chr1:826642-827902,1
chr1,1068587,1070616,chr1:1068587-1070616,fcc_astarr_macs_input_overlap,fcc_crispri_growth_signif,CRISPRi-Growth,Signif,chr1:1067929-1070953,1


**Explore: Group x Label**

In [10]:
lst = lst_dat_region_annot_arrange
lst = lapply(lst, function(dat){
    res = table(dat$Group, dat$Label, dnn = c("Group", "Label"))
    dat = as.data.frame(res)
    return(dat)
})

dat = bind_rows(lst, .id = "Region")
dat = dat %>% tidyr::spread(Label, Freq)
fun_display_table(dat)

Region,Group,Signif
fcc_astarr_macs_input_overlap,CRISPRi-Growth,4380
fcc_astarr_macs_input_union,CRISPRi-Growth,4907


## Matrix

In [11]:
### convert long to wide format
lst = lst_dat_region_annot_import
lst = lapply(lst, function(dat){
    ### get the significant regions
    tmp = dat %>% dplyr::filter(Label == "Signif")
    vec = unique(tmp$Region)
    vec_txt_region_signif = vec
    
    ### get all intersected regions
    dat = dat %>%
        dplyr::select(Chrom, ChromStart, ChromEnd, Region) %>%
        dplyr::distinct() 

    ### label significance
    dat = dat %>% dplyr::mutate(
        `CRISPRi-Growth` = ifelse(
            Region %in% vec_txt_region_signif,
            1,
            0
        )
    )
    return(dat)
})

### assign and show
lst_dat_region_annot_matrix = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 73243     5

$fcc_astarr_macs_input_union
[1] 80770     5



Chrom,ChromStart,ChromEnd,Region,CRISPRi-Growth
chr1,605104,605675,chr1:605104-605675,1
chr1,778233,779389,chr1:778233-779389,0
chr1,817031,817525,chr1:817031-817525,0


**Explore: Significant and unsignificant guides**

In [12]:
lst = lst_dat_region_annot_matrix
lst = lapply(lst, function(dat){
    dat = dat %>% dplyr::mutate(Note = ifelse(`CRISPRi-Growth`, "Signif", "UnSignif"))
    res = table(dat$Note, dnn = c("Label"))
    dat = as.data.frame(res)
    return(dat)
})

dat = bind_rows(lst, .id = "Region")
dat = dat %>% tidyr::spread(Label, Freq) %>% dplyr::mutate(Total = Signif + UnSignif)
fun_display_table(dat)

Region,Signif,UnSignif,Total
fcc_astarr_macs_input_overlap,4380,68863,73243
fcc_astarr_macs_input_union,4907,75863,80770


## Export results

In [13]:
for (txt_folder in VEC_TXT_FOLDER){

    ### get table
    dat_region_annot_result = lst_dat_region_annot_arrange[[txt_folder]]
    dat_region_annot_matrix = lst_dat_region_annot_matrix[[txt_folder]]
    
    ### set file directory
    txt_fdiry = file.path(
        FD_RES, 
        "region_annotation", 
        txt_folder,
        "summary"
    )
    
    ### write region annotation (long format)
    txt_fname = "region.annotation.fcc_crispri_growth.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)
    
    dat = dat_region_annot_result
    dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)
    write_tsv(dat, txt_fpath)

    ### write region annotation (wide format)
    txt_fname = "matrix.annotation.fcc_crispri_growth.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)
    
    dat = dat_region_annot_matrix
    dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)
    write_tsv(dat, txt_fpath)
}