**Set environment**

In [1]:
suppressWarnings(suppressMessages(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


**Check data files**

In [2]:
fdiry = file.path(FD_RES, "source", "CRISPRi_growth")
for (idx in dir(fdiry)) {print(idx)}

[1] "2023_resubmission"
[1] "k562-gw-v3-all.sorted.counts.results.hg38.txt.gz"
[1] "k562-gw-v3-all.sorted.counts.results.top_guide_fdr_0_05.hg38.bed.gz"


## Import data

**Import total guides & DHS**

In [3]:
### set file directory
fdiry = file.path(FD_RES, "source", "CRISPRi_growth")
fname = "k562-gw-v3-all.sorted.counts.results.hg38.txt.gz"
fpath = file.path(fdiry, fname)

### read data
dat = read_tsv(fpath, show_col_types = FALSE)
colnames(dat)[1] = "RowID"

### assign and show
dat_peak_crispri_growth_total = dat
print(dim(dat))
head(dat)

[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] 1092166      22


RowID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,weight,name,dhs_chrom,⋯,ID,chrPerturbationTarget,startPerturbationTarget,endPerturbationTarget,chrom,start,end,dhs_id,score,strand
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,⋯,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>
0,669.8027,-0.9859338,0.13527811,-7.5156478,5.662983e-14,3.245787e-11,1.0189024,chr1.1.1,chr1,⋯,chr1.1,chr1,540951,540970,chr1,605550,605627,chr1.1,521,.
1,1210.5948,0.01755679,0.11259581,0.9724725,0.3308155,0.8618381,1.3894218,chr1.1.2,chr1,⋯,chr1.1,chr1,540971,540990,chr1,605550,605627,chr1.1,521,.
2,2065.7482,0.0142221,0.07694619,0.7948325,0.426711,0.9074943,1.2206106,chr1.1.3,chr1,⋯,chr1.1,chr1,540963,540982,chr1,605550,605627,chr1.1,521,.
3,670.0537,0.01843545,0.12623486,1.0421596,0.2973377,0.886445,1.0189024,chr1.1.4,chr1,⋯,chr1.1,chr1,540953,540972,chr1,605550,605627,chr1.1,521,.
4,1023.917,0.0112653,0.10178712,0.6436207,0.5198214,0.9145863,1.3894218,chr1.1.5,chr1,⋯,chr1.1,chr1,540954,540973,chr1,605550,605627,chr1.1,521,.
5,605.4137,0.03242606,0.14597771,1.6813118,0.09270237,0.7390114,0.9584715,chr1.1.6,chr1,⋯,chr1.1,chr1,540979,540998,chr1,605550,605627,chr1.1,521,.


**Import significant guides**

In [4]:
### set file directory
fdiry = file.path(FD_RES, "source", "CRISPRi_growth")
fname = "k562-gw-v3-all.sorted.counts.results.top_guide_fdr_0_05.hg38.bed.gz"
fpath = file.path(fdiry, fname)

### read data
dat = read_csv(fpath, show_col_types = FALSE)
colnames(dat)[1] = "RowID"

### assign and show
dat_peak_crispri_growth_fdr = dat
print(dim(dat))
head(dat)

[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] 6242    7


RowID,chrom,start,end,name,log2FoldChange,padj
<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
504606,chr19,11155578,11156290,chr19.1899.3,-2.549922,3.771775e-123
686888,chr22,30356286,30357525,chr22.1285.6,-3.059909,9.81976e-120
868222,chr6,34530428,34530534,chr6.2172.8,-3.100194,1.232839e-117
50002,chr1,20685850,20686283,chr1.2930.10,-2.862158,9.548565e-109
811835,chr5,70924811,70925615,chr5.1649.1,-2.92711,4.2986399999999996e-107
811815,chr5,70049360,70050252,chr5.1647.1,-2.92711,4.2986399999999996e-107


## Arrange and summary

In [5]:
### init: list of data
lst = list(
    "total"  = dat_peak_crispri_growth_total,
    "active" = dat_peak_crispri_growth_fdr
)

### columns and assay type
vec = c('chrom', 'start', 'end', 'name', 'log2FoldChange', 'padj')
txt = "CRISPRi-Growth"

### extract columns and arrange table
lst = lapply(lst, function(dat){
    dat = dat %>%
        dplyr::select({{vec}}) %>% 
        dplyr::mutate(
            Region = paste0(chrom, ":", start, "-", end),
            Assay_Type  = txt,
            Assay_Label = txt
        ) %>%
        dplyr::arrange(chrom, start, end) %>%
        dplyr::distinct()
})

lst_dat_peak_crispri_growth_arrange = lst

In [6]:
### init
lst = lst_dat_peak_crispri_growth_arrange

### show information
dat = lst[['total']]
vec_txt_region = unique(dat$Region)
cat("#Row    (Total):", nrow(dat),              "\n")
cat("#Region (Total):", length(vec_txt_region), "\n")
cat("\n")

### show information
dat = lst[['active']]
vec_txt_region = unique(dat$Region)
cat("#Row    (Active):", nrow(dat),              "\n")
cat("#Region (Active):", length(vec_txt_region), "\n")

#Row    (Total): 1092166 
#Region (Total): 111702 

#Row    (Active): 6242 
#Region (Active): 6242 


## Data dictionary

In [7]:
lst = lst_dat_peak_crispri_growth_arrange

dat = lst[['total']]
cat ("====================\n")
for (txt in colnames(dat)){cat(txt, "\n")}

dat = lst[['active']]
cat ("====================\n")
for (txt in colnames(dat)){cat(txt, "\n")}

chrom 
start 
end 
name 
log2FoldChange 
padj 
Region 
Assay_Type 
Assay_Label 
chrom 
start 
end 
name 
log2FoldChange 
padj 
Region 
Assay_Type 
Assay_Label 


In [8]:
dat_cnames = tribble(
    ~Name, ~Description,
    "Chrom",       "Chromosome of DHS",
    "Start",       "Start position of DHS",
    "End",         "End   position of DHS",
    "Name",        "ID of significant guide within the DHS",
    "Log2FC",      "Log2FC of the guide",
    "Padj",        "Adjusted p value of the guide",
    "Region",      "Location of the DHS",
    "Assay_Type",  "Assay type",
    "Assay_Label", "Assay label"
)
dat_cnames

Name,Description
<chr>,<chr>
Chrom,Chromosome of DHS
Start,Start position of DHS
End,End position of DHS
Name,ID of significant guide within the DHS
Log2FC,Log2FC of the guide
Padj,Adjusted p value of the guide
Region,Location of the DHS
Assay_Type,Assay type
Assay_Label,Assay label


## Save results

In [9]:
FD_OUT = file.path(FD_RES, "results", "region", "annotation_crispri_growth")

In [10]:
fdiry = FD_OUT
fname = "description.tsv"
fpath = file.path(fdiry, fname)

dat = dat_cnames
write_tsv(dat, fpath)

In [11]:
fdiry = FD_OUT
fname = "crispri_growth.dhs.total.bed.gz"
fpath = file.path(fdiry, fname)

lst = lst_dat_peak_crispri_growth_arrange
dat = lst[['total']]
write_tsv(dat, fpath, col_names = FALSE)

print(dim(dat))
head(dat, 3)

[1] 1092166       9


chrom,start,end,name,log2FoldChange,padj,Region,Assay_Type,Assay_Label
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>
chr1,605550,605627,chr1.1.1,-0.9859338,3.245787e-11,chr1:605550-605627,CRISPRi-Growth,CRISPRi-Growth
chr1,605550,605627,chr1.1.2,0.01755679,0.8618381,chr1:605550-605627,CRISPRi-Growth,CRISPRi-Growth
chr1,605550,605627,chr1.1.3,0.0142221,0.9074943,chr1:605550-605627,CRISPRi-Growth,CRISPRi-Growth


In [12]:
fdiry = FD_OUT
fname = "crispri_growth.dhs.active.bed.gz"
fpath = file.path(fdiry, fname)

lst = lst_dat_peak_crispri_growth_arrange
dat = lst[['active']]
write_tsv(dat, fpath, col_names = FALSE)

print(dim(dat))
head(dat, 3)

[1] 6242    9


chrom,start,end,name,log2FoldChange,padj,Region,Assay_Type,Assay_Label
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>
chr1,605550,605627,chr1.1.1,-0.9859338,3.245787e-11,chr1:605550-605627,CRISPRi-Growth,CRISPRi-Growth
chr1,826642,827902,chr1.4.8,0.1855074,0.03250512,chr1:826642-827902,CRISPRi-Growth,CRISPRi-Growth
chr1,964946,965136,chr1.41.7,-1.1466792,1.464654e-26,chr1:964946-965136,CRISPRi-Growth,CRISPRi-Growth
