**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
REPO DIRECTORY (FD_REPO): /data/reddylab/Kuei/repo 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/work 
DATA DIRECTORY (FD_DATA): /data/reddylab/Kuei/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/log 
PROJECT REF     (FD_REF): /data/reddylab/Kuei/repo/Proj_ENCODE_FCC/references 



## Prepare

**Set global variable**

In [2]:
vec = c(
    "fcc_astarr_macs_input_overlap",
    "fcc_astarr_macs_input_union"
)
names(vec) = vec

VEC_TXT_FOLDER = vec
for(txt in vec){cat(txt, "\n")}

fcc_astarr_macs_input_overlap 
fcc_astarr_macs_input_union 


In [3]:
TXT_FNAME_ANNOT = "region.annotation.fcc_starrmpracrispr.concat.tsv"

**View files**

In [4]:
txt_fdiry = file.path(FD_RES, "region_annotation", "*", "summary")
txt_fname = TXT_FNAME_ANNOT
txt_fglob = file.path(txt_fdiry, txt_fname)

vec = Sys.glob(txt_fglob)
for(txt in vec){cat(txt, "\n")}

/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_overlap/summary/region.annotation.fcc_starrmpracrispr.concat.tsv 
/data/reddylab/Kuei/repo/Proj_ENCODE_FCC/results/region_annotation/fcc_astarr_macs_input_union/summary/region.annotation.fcc_starrmpracrispr.concat.tsv 


## Import data

In [5]:
lst = lapply(VEC_TXT_FOLDER, function(txt_folder){
    ### set file directory
    txt_fdiry = file.path(
        FD_RES, 
        "region_annotation", 
        txt_folder,
        "summary"
    )
    txt_fname = TXT_FNAME_ANNOT
    txt_fpath = file.path(txt_fdiry, txt_fname)

    ### read table
    dat = read_tsv(txt_fpath, show_col_types = FALSE)
    return(dat)
})

### assign and show
lst_dat_region_annot_fcc_peak_concat = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 156686      6

$fcc_astarr_macs_input_union
[1] 201420      6



Chrom,ChromStart,ChromEnd,Region,Group,Label
chr1,10038,10405,chr1:10038-10405,ASTARR,ASTARR_R
chr1,16025,16338,chr1:16025-16338,ASTARR,ASTARR_R
chr1,17288,17689,chr1:17288-17689,ASTARR,ASTARR_R


## Summarize

**Count vote across all assays**

In [6]:
lst = lst_dat_region_annot_fcc_peak_concat
lst = lapply(lst, function(dat){
    ### ignore direction; get region and group
    dat = dat %>% 
        dplyr::select(Chrom:Region, Group) %>% 
        dplyr::distinct()

    ### count assay significance for each region
    dat = dat %>% 
        dplyr::group_by(Chrom, ChromStart, ChromEnd, Region) %>% 
        dplyr::summarise(
            Num_Assay = n(),
            Assays    = paste(Group, collapse = ","),
            .groups   = "drop"
        )
    return(dat)
})

### assign and show
lst_dat_region_annot_fcc_assayvote = lst

res = lapply(lst, dim)
print(res)

dat = lst[[1]]
fun_display_table(head(dat, 3))

$fcc_astarr_macs_input_overlap
[1] 100454      6

$fcc_astarr_macs_input_union
[1] 135780      6



Chrom,ChromStart,ChromEnd,Region,Num_Assay,Assays
chr1,10038,10405,chr1:10038-10405,1,ASTARR
chr1,16025,16338,chr1:16025-16338,1,ASTARR
chr1,17288,17689,chr1:17288-17689,1,ASTARR


**Explore: Count**

In [7]:
lst = lst_dat_region_annot_fcc_assayvote
lst = lapply(lst, function(dat){
    dat = dat %>% dplyr::mutate(Note = paste0("N", Num_Assay))
    res = table(dat$Note, dnn=c("Note"))
    dat = as.data.frame(res)
    return(dat)
})

dat = bind_rows(lst, .id = "Region")
dat = dat %>% tidyr::spread(Note, Freq) #%>% dplyr::mutate(Total = sum(N1:N7))
fun_display_table(dat)

Region,N1,N2,N3,N4,N5,N6,N7
fcc_astarr_macs_input_overlap,66501,22994,9740,1177,31,6,5
fcc_astarr_macs_input_union,95992,28122,10384,1237,34,6,5


```
    1     2     3     4     5     6     7 
66501 22994  9740  1177    31     6     5 
```

## Save results

In [8]:
for (txt_folder in VEC_TXT_FOLDER){

    ### get each table
    dat_region_annot_result = lst_dat_region_annot_fcc_assayvote[[txt_folder]]
    
    ### set file directory
    txt_fdiry = file.path(
        FD_RES, 
        "region_annotation", 
        txt_folder,
        "summary"
    )
    
    ### write table (full assay vote)
    txt_fname = "region.annotation.fcc_starrmpracrispr.assayvote.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)

    dat = dat_region_annot_result
    dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)
    write_tsv(dat, txt_fpath)

    ### write table (filtered assay vote)
    txt_fname = "region.annotation.fcc_starrmpracrispr.assayvote.filtered.tsv"
    txt_fpath = file.path(txt_fdiry, txt_fname)

    dat = dat_region_annot_result
    dat = dat %>% dplyr::filter(Num_Assay > 1)
    dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)
    write_tsv(dat, txt_fpath)
}

**Save a copy to reference folder**

In [9]:
### get table
txt = "fcc_astarr_macs_input_overlap"
dat = lst_dat_region_annot_fcc_assayvote[[txt]]
dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)

### assign and show
dat_reigon_annot_fcc_assayvote = dat
print(dim(dat))
fun_display_table(head(dat, 3))

[1] 100454      6


Chrom,ChromStart,ChromEnd,Region,Num_Assay,Assays
chr1,10038,10405,chr1:10038-10405,1,ASTARR
chr1,16025,16338,chr1:16025-16338,1,ASTARR
chr1,17288,17689,chr1:17288-17689,1,ASTARR


In [10]:
### filter assay vote
dat = dat_reigon_annot_fcc_assayvote
dat = dat %>% dplyr::filter(Num_Assay > 1)
dat = dat %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)

### assign and show
dat_reigon_annot_fcc_assayvote_filter = dat
print(dim(dat))
fun_display_table(head(dat, 3))

[1] 33953     6


Chrom,ChromStart,ChromEnd,Region,Num_Assay,Assays
chr1,605104,605675,chr1:605104-605675,2,"ASTARR,CRISPRi-Growth"
chr1,778233,779389,chr1:778233-779389,3,"ASTARR,LMPRA,WSTARR"
chr1,818708,819335,chr1:818708-819335,2,"ASTARR,WSTARR"


In [11]:
txt_fdiry = file.path(FD_REF, "fcc_region_results")
txt_fname = "region.annotation.fcc_starrmpracrispr.assayvote.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = dat_reigon_annot_fcc_assayvote
write_tsv(dat, txt_fpath)

In [12]:
txt_fdiry = file.path(FD_REF, "fcc_region_results")
txt_fname = "region.annotation.fcc_starrmpracrispr.assayvote.filtered.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = dat_reigon_annot_fcc_assayvote_filter
write_tsv(dat, txt_fpath)