**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_ENCODE_FCC/references 



**Set global variables**

In [2]:
TXT_FOLDER_INP = "CRISPRi_Growth_K562_Gersbach_Alex"
TXT_FOLDER_OUT = "fcc_crispri_growth"

In [3]:
txt_folder = TXT_FOLDER_INP
txt_fdiry  = file.path(FD_DAT, "processed", txt_folder)

vec = dir(txt_fdiry)
for (txt in vec){cat(txt, "\n")}

2023_resubmission 
k562-gw-v3-all.sorted.counts.results.hg38.txt.gz 
k562-gw-v3-all.sorted.counts.results.top_guide_fdr_0_05.hg38.bed.gz 


## Import data

**Import total guides & tested DHS**

In [4]:
### set directory
txt_folder = TXT_FOLDER_INP
txt_fdiry  = file.path(FD_DAT, "processed", txt_folder)
txt_fname  = "k562-gw-v3-all.sorted.counts.results.hg38.txt.gz"
txt_fpath  = file.path(txt_fdiry, txt_fname)

### read data
dat = read_tsv(txt_fpath, show_col_types = FALSE)
colnames(dat)[1] = "RowID"

### assign and show
dat_region_crispri_growth_total = dat
print(dim(dat))
fun_display_table(head(dat))

[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] 1092166      22


RowID,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj,weight,name,dhs_chrom,dhs_start,dhs_end,ID,chrPerturbationTarget,startPerturbationTarget,endPerturbationTarget,chrom,start,end,dhs_id,score,strand
0,669.8027,-0.9859338,0.1352781,-7.5156478,0.0,0.0,1.0189024,chr1.1.1,chr1,540930,541007,chr1.1,chr1,540951,540970,chr1,605550,605627,chr1.1,521,.
1,1210.5948,0.0175568,0.1125958,0.9724725,0.3308155,0.8618381,1.3894218,chr1.1.2,chr1,540930,541007,chr1.1,chr1,540971,540990,chr1,605550,605627,chr1.1,521,.
2,2065.7482,0.0142221,0.0769462,0.7948325,0.426711,0.9074943,1.2206106,chr1.1.3,chr1,540930,541007,chr1.1,chr1,540963,540982,chr1,605550,605627,chr1.1,521,.
3,670.0537,0.0184355,0.1262349,1.0421596,0.2973377,0.886445,1.0189024,chr1.1.4,chr1,540930,541007,chr1.1,chr1,540953,540972,chr1,605550,605627,chr1.1,521,.
4,1023.917,0.0112653,0.1017871,0.6436207,0.5198214,0.9145863,1.3894218,chr1.1.5,chr1,540930,541007,chr1.1,chr1,540954,540973,chr1,605550,605627,chr1.1,521,.
5,605.4137,0.0324261,0.1459777,1.6813118,0.0927024,0.7390114,0.9584715,chr1.1.6,chr1,540930,541007,chr1.1,chr1,540979,540998,chr1,605550,605627,chr1.1,521,.


**Import significant guides**

In [5]:
### set file directory
txt_folder = TXT_FOLDER_INP
txt_fdiry  = file.path(FD_DAT, "processed", txt_folder)
txt_fname = "k562-gw-v3-all.sorted.counts.results.top_guide_fdr_0_05.hg38.bed.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

### read data
dat = read_tsv(txt_fpath, show_col_types = FALSE)
colnames(dat)[1] = "RowID"

### assign and show
dat_region_crispri_growth_signif = dat
print(dim(dat))
fun_display_table(head(dat))

[1m[22mNew names:
[36m•[39m `` -> `...1`


[1] 6242    7


RowID,chrom,start,end,name,log2FoldChange,padj
504606,chr19,11155578,11156290,chr19.1899.3,-2.549922,0
686888,chr22,30356286,30357525,chr22.1285.6,-3.059909,0
868222,chr6,34530428,34530534,chr6.2172.8,-3.100194,0
50002,chr1,20685850,20686283,chr1.2930.10,-2.862158,0
811835,chr5,70924811,70925615,chr5.1649.1,-2.92711,0
811815,chr5,70049360,70050252,chr5.1647.1,-2.92711,0


## Arrange tables

In [6]:
### init: list of data
lst = list(
    "Total"  = dat_region_crispri_growth_total,
    "Signif" = dat_region_crispri_growth_signif
)

### columns and assay type
vec_txt_cname = c(
    'Chrom', 'ChromStart', 'ChromEnd', 'Region', 
    'Guide_ID', 
    'Log2FC', 
    'Padj',
    'Group',
    'Label'
)
vec_txt_label = names(lst)
txt_group = "CRISPRi-Growth"

### extract columns and arrange table
lst = lapply(vec_txt_label, function(txt_label){
    dat = lst[[txt_label]]
    dat = dat %>%
        dplyr::mutate(
            Chrom      = chrom,
            ChromStart = start,
            ChromEnd   = end,
            Region     = fun_gen_region(chrom, start, end),
            Guide_ID   = name,
            Log2FC     = log2FoldChange,
            Padj       = padj,
            Group      = txt_group,
            Label      = txt_label
        ) %>%
        dplyr::select({{vec_txt_cname}}) %>% 
        dplyr::arrange(Chrom, ChromStart, ChromEnd) %>%
        dplyr::distinct()
    return(dat)
})
names(lst) = vec_txt_label

### assign
lst_dat_region_arrange = lst

In [8]:
lst = lst_dat_region_arrange
dat = lst[[1]]
dat = dat %>% dplyr::mutate(
    Method = "DESeq2",
    Source = "Gersbach"
)
head(dat)

Chrom,ChromStart,ChromEnd,Region,Guide_ID,Log2FC,Padj,Group,Label,Method,Source
<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
chr1,605550,605627,chr1:605550-605627,chr1.1.1,-0.9859338,3.245787e-11,CRISPRi-Growth,Total,DESeq2,Gersbach
chr1,605550,605627,chr1:605550-605627,chr1.1.2,0.01755679,0.8618381,CRISPRi-Growth,Total,DESeq2,Gersbach
chr1,605550,605627,chr1:605550-605627,chr1.1.3,0.0142221,0.9074943,CRISPRi-Growth,Total,DESeq2,Gersbach
chr1,605550,605627,chr1:605550-605627,chr1.1.4,0.01843545,0.886445,CRISPRi-Growth,Total,DESeq2,Gersbach
chr1,605550,605627,chr1:605550-605627,chr1.1.5,0.0112653,0.9145863,CRISPRi-Growth,Total,DESeq2,Gersbach
chr1,605550,605627,chr1:605550-605627,chr1.1.6,0.03242606,0.7390114,CRISPRi-Growth,Total,DESeq2,Gersbach


In [12]:
lst = list(
    "Assay"  = "CRISPRi-Growth",
    "Method" = "DESeq2",
    "Source" = "Gersbach"
)
lst

In [13]:
jsonlite::toJSON(lst)

{"Assay":["CRISPRi-Growth"],"Method":["DESeq2"],"Source":["Gersbach"]} 

In [14]:
jsonlite::toJSON(lst, auto_unbox=TRUE)

{"Assay":"CRISPRi-Growth","Method":"DESeq2","Source":"Gersbach"} 

In [15]:
jsonlite::toJSON(lst, auto_unbox=TRUE) %>% jsonlite::prettify()

{
    "Assay": "CRISPRi-Growth",
    "Method": "DESeq2",
    "Source": "Gersbach"
}
 

**Explore: number of regions**

In [12]:
### init
lst = lst_dat_region_arrange

### show information
dat = lst[['Total']]
vec_txt_region = unique(dat$Region)
cat("#Row    (Total):", nrow(dat),              "\n")
cat("#Region (Total):", length(vec_txt_region), "\n")
cat("\n")

### show information
dat = lst[['Signif']]
vec_txt_region = unique(dat$Region)
cat("#Row    (Signif):", nrow(dat),              "\n")
cat("#Region (Signif):", length(vec_txt_region), "\n")

#Row    (Total): 1092166 
#Region (Total): 111702 

#Row    (Signif): 6242 
#Region (Signif): 6242 


**Check: column names**

In [13]:
dat = lst[['Signif']]
colnames(dat)

**Check: all active regions are within the set of total regions**

In [15]:
dat  = lst[['Signif']]
vec1 = unique(dat$Region)

dat  = lst[['Total']]
vec2 = unique(dat$Region)

print(all(vec1 %in% vec2))

[1] TRUE


## Save results

**Write tables**

In [17]:
### set directory
txt_folder = TXT_FOLDER_OUT
txt_fdiry  = file.path(FD_RES, "region", txt_folder)
txt_cmd    = paste("mkdir -p", txt_fdiry)
system(txt_cmd)

### write table
txt_fname = "K562.hg38.CRISPRi_Growth.total.bed.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = lst[['Total']]
write_tsv(dat, txt_fpath, col_names = FALSE)

### write table
txt_fname = "K562.hg38.CRISPRi_Growth.signif.bed.gz"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = lst[['Signif']]
write_tsv(dat, txt_fpath, col_names = FALSE)