**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_ENCODE_FCC/references 



## Import data

**Define helper functions**

In [2]:
txt_fdiry = file.path(FD_REF, "encode_crispri_hcrff")
txt_fname = "ENCODE_K562_hg38_CRISPRi_HCRFF.tsv"
txt_fpath = file.path(txt_fdiry, txt_fname)

dat = read_tsv(txt_fpath, show_col_types = FALSE)
dat_ref_chcrff = dat
fun_display_table(head(dat))

Assay,Biosample,Index_Experiment,Index_Process,Index_File,File_Type,Output_Type,Genome,Target,File_Summary,Lab
Flow-FISH CRISPR screen,K562,ENCSR793WTM,Lab custom GRCh38 (ENCAN194INC),ENCFF863AVQ,bed CRISPR element quantifications,element quantifications,hg38,CAPRIN1,CAPRIN1_peakwise_file,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR793WTM,Lab custom GRCh38 (ENCAN194INC),ENCFF444UXP,bigWig,perturbation signal,hg38,CAPRIN1,CAPRIN1_perturb_signal_R1,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR793WTM,Lab custom GRCh38 (ENCAN194INC),ENCFF994KKV,bigWig,perturbation signal,hg38,CAPRIN1,CAPRIN1_perturb_signal_R2,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR794SPV,Lab custom GRCh38 (ENCAN095VUQ),ENCFF619FXH,bed CRISPR element quantifications,element quantifications,hg38,CAT,CAT_peakwise_file,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR794SPV,Lab custom GRCh38 (ENCAN095VUQ),ENCFF918VCM,bigWig,perturbation signal,hg38,CAT,CAT_perturb_signal_R1,"Pardis Sabeti, Broad"
Flow-FISH CRISPR screen,K562,ENCSR794SPV,Lab custom GRCh38 (ENCAN095VUQ),ENCFF965PMF,bigWig,perturbation signal,hg38,CAT,CAT_perturb_signal_R2,"Pardis Sabeti, Broad"


In [3]:
vec_txt_gene = unique(dat_ref_chcrff$Target)

get_info = function(string, patterns){
    idx = str_detect(string = string, pattern = patterns)
    return(patterns[idx])
}

get_gene = function(strings){
    res = sapply(strings, function(string){get_info(string, vec_txt_gene)})
    return(res)
}

**Read table**

In [13]:
### set directory
txt_assay  = "CRISPRi_FlowFISH_K562_Riley_JinWoo"
txt_folder = "guide_scores"
txt_fdiry  = file.path(FD_RES, "assay_fcc", txt_assay, txt_folder)
txt_fname  = "*.bed"
txt_fglob  = file.path(txt_fdiry, txt_fname)

vec_txt_fpath = Sys.glob(txt_fglob)
vec_txt_cname = c("Chrom", "ChromStart", "ChromEnd", "Name", "Score")

### read table
lst = lapply(vec_txt_fpath, function(txt_fpath){
    txt = get_gene(txt_fpath)
    dat = read_tsv(txt_fpath, col_names = vec_txt_cname, show_col_types = FALSE)
    dat = dat %>% dplyr::mutate(Target = txt)
    return(dat)
})
dat = bind_rows(lst)

### assign and show
dat_score_chcrff = dat
fun_display_table(head(dat, 3))

Chrom,ChromStart,ChromEnd,Name,Score,Target
chr10,100694991,100694992,id-1,0.436247,CAPRIN1
chr10,100695172,100695173,id-2,-1.05777,CAPRIN1
chr10,100695184,100695185,id-3,1.24201,CAPRIN1


## Arrange table

In [17]:
dat = dat_score_chcrff
dat = dat %>% 
    dplyr::mutate(Strand = ".") %>% 
    dplyr::select(Chrom, ChromStart, ChromEnd, Name, Score, Strand, Target) %>% 
    dplyr::arrange(Chrom, ChromStart, ChromEnd)

### assign and show
dat_score_chcrff_arrange = dat
fun_display_table(head(dat, 3))

Chrom,ChromStart,ChromEnd,Name,Score,Strand,Target
chr1,74582275,74582276,id-1,0.414014,.,CD164
chr1,74582275,74582276,id-1,-0.493091,.,MYC
chr1,74582275,74582276,id-1,0.024514,.,NMU


## Save results

**Write table**

In [18]:
### set directory
txt_assay  = "CRISPRi_FlowFISH_K562_Riley_JinWoo"
txt_folder = "guide_scores"
txt_fdiry  = file.path(FD_RES, "assay_fcc", txt_assay, txt_folder)
txt_fname  = "CRISPRi_HCRFF_K562.hg38.ZScore.unstranded.bed.gz"
txt_fpath  = file.path(txt_fdiry, txt_fname)

dat = dat_score_chcrff_arrange
write_tsv(dat, txt_fpath, col_names=FALSE)

**Check results**

In [19]:
### set directory
txt_assay  = "CRISPRi_FlowFISH_K562_Riley_JinWoo"
txt_folder = "guide_scores"
txt_fdiry  = file.path(FD_RES, "assay_fcc", txt_assay, txt_folder)
txt_fname  = "CRISPRi_HCRFF_K562.hg38.ZScore.unstranded.bed.gz"
txt_fpath  = file.path(txt_fdiry, txt_fname)

### 
txt_cmd = paste("zcat", txt_fpath, "| head")
vec     = system(txt_cmd, intern = TRUE)
for(txt in vec){cat(txt, "\n")}

Chrom	ChromStart	ChromEnd	Name	Score	Strand	Target 
chr1	74582275	74582276	id-1	0.414014	.	CD164 
chr1	74582275	74582276	id-1	-0.493091	.	MYC 
chr1	74582275	74582276	id-1	0.024514	.	NMU 
chr1	74582275	74582276	id-1	0.44089	.	PVT1 
chr1	74582309	74582310	id-2	-0.181799	.	CD164 
chr1	74582309	74582310	id-2	0.997649	.	MYC 
chr1	74582309	74582310	id-2	-0.439687	.	NMU 
chr1	74582309	74582310	id-2	1.25627	.	PVT1 
chr1	80526338	80526339	id-3	-0.186275	.	CD164 
