**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


**Check data**

In [2]:
FOLDER = "annotation_crispri_growth"

In [3]:
fdiry = file.path(
    FD_RES, 
    "results", 
    "region", 
    FOLDER)
for(fname in dir(fdiry)){print(fname)}

[1] "crispri_growth.dhs.active.bed.gz"
[1] "crispri_growth.dhs.total.bed.gz"
[1] "crispri_growth.dhs.total.merge.bed.gz"
[1] "description.tsv"


In [4]:
fdiry = file.path(
    FD_RES, 
    "results", 
    "region", 
    "KS91_K562_ASTARRseq_peak_macs_input", 
    FOLDER)
for(fname in dir(fdiry)){print(fname)}

[1] "peak.annotation.crispri_growth_dhs.bed.gz"
[1] "peak.annotation.crispri_growth.dhs.active.bed.gz"
[1] "peak.annotation.crispri_growth.dhs.inactive.bed.gz"


## Import data

In [5]:
fdiry  = file.path(FD_RES, "results", "region", FOLDER)
fname = "description.tsv"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_cnames = dat
print(dim(dat))
dat

[1] 9 2


Name,Description
<chr>,<chr>
Chrom,Chromosome of DHS
Start,Start position of DHS
End,End position of DHS
Name,ID of significant guide within the DHS
Log2FC,Log2FC of the guide
Padj,Adjusted p value of the guide
Region,Location of the DHS
Assay_Type,Assay type
Assay_Label,Assay label


In [6]:
### init: set column names
cnames = dat_cnames$Name
cnames = c("Chrom_ATAC", "Start_ATAC", "End_ATAC", cnames, "Overlap")

### init: set file path
fdiry = file.path(
    FD_RES, 
    "results", 
    "region", 
    "KS91_K562_ASTARRseq_peak_macs_input", 
    FOLDER)
fname = paste("peak.annotation", ".active.", "bed.gz", sep="*")
fglob = file.path(fdiry, fname)
fpath = Sys.glob(fglob)
print(fpath)

### init: annotation and label
annotation = "CRISPRi"
label      = "CRISPRi_Growth"

### read data
dat = read_tsv(fpath, col_names = cnames, show_col_types = FALSE)
dat = dat %>% dplyr::mutate(
    Peak_ATAC  = paste0(Chrom_ATAC, ":", Start_ATAC, "-", End_ATAC),
    Annotation = annotation,
    Label      = label,
    Region     = paste0(Chrom, ":", Start, "-", End),
    Value      = Log2FC,
    Note       = Assay_Label
)

### assign and show
dat_peak_annot_import = dat
print(dim(dat))
head(dat)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/region/KS91_K562_ASTARRseq_peak_macs_input/annotation_crispri_growth/peak.annotation.crispri_growth.dhs.active.bed.gz"
[1] 4667   18


Chrom_ATAC,Start_ATAC,End_ATAC,Chrom,Start,End,Name,Log2FC,Padj,Region,Assay_Type,Assay_Label,Overlap,Peak_ATAC,Annotation,Label,Value,Note
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<chr>
chr1,605090,605823,chr1,605550,605627,chr1.1.1,-0.9859338,3.245787e-11,chr1:605550-605627,CRISPRi-Growth,CRISPRi-Growth,77,chr1:605090-605823,CRISPRi,CRISPRi_Growth,-0.9859338,CRISPRi-Growth
chr1,826754,828040,chr1,826642,827902,chr1.4.8,0.1855074,0.03250512,chr1:826642-827902,CRISPRi-Growth,CRISPRi-Growth,1148,chr1:826754-828040,CRISPRi,CRISPRi_Growth,0.1855074,CRISPRi-Growth
chr1,1067955,1068196,chr1,1067929,1070953,chr1.74.8,-0.3383702,0.03371964,chr1:1067929-1070953,CRISPRi-Growth,CRISPRi-Growth,241,chr1:1067955-1068196,CRISPRi,CRISPRi_Growth,-0.3383702,CRISPRi-Growth
chr1,1068570,1070643,chr1,1067929,1070953,chr1.74.8,-0.3383702,0.03371964,chr1:1067929-1070953,CRISPRi-Growth,CRISPRi-Growth,2073,chr1:1068570-1070643,CRISPRi,CRISPRi_Growth,-0.3383702,CRISPRi-Growth
chr1,1298873,1300608,chr1,1300037,1300357,chr1.170.3,-1.0553137,2.462243e-12,chr1:1300037-1300357,CRISPRi-Growth,CRISPRi-Growth,320,chr1:1298873-1300608,CRISPRi,CRISPRi_Growth,-1.0553137,CRISPRi-Growth
chr1,1324260,1325767,chr1,1324117,1325709,chr1.177.1,-1.2056016,1.782666e-07,chr1:1324117-1325709,CRISPRi-Growth,CRISPRi-Growth,1449,chr1:1324260-1325767,CRISPRi,CRISPRi_Growth,-1.2056016,CRISPRi-Growth


## Arrange and summarize

In [7]:
### init
dat = dat_peak_annot_import

### summarize
dat = dat %>%
    dplyr::group_by(Chrom_ATAC, Start_ATAC, End_ATAC, Peak_ATAC, Annotation, Label) %>%
    dplyr::summarise(
        Count  = n(), 
        Region = paste(Region, collapse="|"),
        Score  = mean(Value),
        Note   = paste(sort(unique(Note)), collapse="|"),
        .groups = "drop")

## Arrange
dat = dat %>% 
    dplyr::arrange(Chrom_ATAC, Start_ATAC, End_ATAC) %>%
    dplyr::select(-Chrom_ATAC, -Start_ATAC, -End_ATAC) %>%
    dplyr::rename("Peak" = Peak_ATAC)

### assign and show
dat_peak_annot_summary = dat
print(dim(dat))
head(dat)

[1] 4609    7


Peak,Annotation,Label,Count,Region,Score,Note
<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>
chr1:605090-605823,CRISPRi,CRISPRi_Growth,1,chr1:605550-605627,-0.9859338,CRISPRi-Growth
chr1:826754-828040,CRISPRi,CRISPRi_Growth,1,chr1:826642-827902,0.1855074,CRISPRi-Growth
chr1:1067955-1068196,CRISPRi,CRISPRi_Growth,1,chr1:1067929-1070953,-0.3383702,CRISPRi-Growth
chr1:1068570-1070643,CRISPRi,CRISPRi_Growth,1,chr1:1067929-1070953,-0.3383702,CRISPRi-Growth
chr1:1298873-1300608,CRISPRi,CRISPRi_Growth,1,chr1:1300037-1300357,-1.0553137,CRISPRi-Growth
chr1:1324260-1325767,CRISPRi,CRISPRi_Growth,1,chr1:1324117-1325709,-1.2056016,CRISPRi-Growth


## Explore and check results

In [8]:
dat = dat_peak_annot_summary
lst = split(dat, dat$Label)
lst = lapply(lst, function(dat){
    table(dat$Note)
})
lst

$CRISPRi_Growth

CRISPRi-Growth 
          4609 


In [9]:
dat = dat_peak_annot_summary
lst = split(dat, dat$Label)
lst = lapply(lst, function(dat){
    table(dat$Count)
})
lst

$CRISPRi_Growth

   1    2 
4551   58 


## Save results

In [10]:
fdiry = file.path(
    FD_RES, 
    "results", 
    "region", 
    "KS91_K562_ASTARRseq_peak_macs_input", 
    "summary")
fname = "peak.summary.crispri_growth.dhs.active.tsv"
fpath = file.path(fdiry, fname)

dat = dat_peak_annot_summary
write_tsv(dat, fpath)