**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../run_config_project_sing.R")))
show_env()

You are working on        Singularity 
BASE DIRECTORY (FD_BASE): /mount 
REPO DIRECTORY (FD_REPO): /mount/repo 
WORK DIRECTORY (FD_WORK): /mount/work 
DATA DIRECTORY (FD_DATA): /mount/data 

You are working with      ENCODE FCC 
PATH OF PROJECT (FD_PRJ): /mount/repo/Proj_ENCODE_FCC 
PROJECT RESULTS (FD_RES): /mount/repo/Proj_ENCODE_FCC/results 
PROJECT SCRIPTS (FD_EXE): /mount/repo/Proj_ENCODE_FCC/scripts 
PROJECT DATA    (FD_DAT): /mount/repo/Proj_ENCODE_FCC/data 
PROJECT NOTE    (FD_NBK): /mount/repo/Proj_ENCODE_FCC/notebooks 
PROJECT DOCS    (FD_DOC): /mount/repo/Proj_ENCODE_FCC/docs 
PROJECT LOG     (FD_LOG): /mount/repo/Proj_ENCODE_FCC/log 
PROJECT APP     (FD_APP): /mount/repo/Proj_ENCODE_FCC/app 
PROJECT REF     (FD_REF): /mount/repo/Proj_ENCODE_FCC/references 



## Import data

**Import library design**

In [2]:
txt_fdiry = file.path(FD_DAT, "processed", "CRISPRi_FlowFISH_K562_Riley_JinWoo", "track_bedgraph")
txt_fname = "*HS_exp_r1.tsv.gz"
txt_fglob = file.path(txt_fdiry, txt_fname)

vec_txt_fpath  = Sys.glob(txt_fglob)
vec_txt_fname  = basename(vec_txt_fpath)
vec_txt_target = str_remove(vec_txt_fname, "_HS_exp_r1.tsv.gz")
vec_txt_target

In [3]:
lst = lapply(vec_txt_fpath, function(txt_fpath){
        
    ### read data
    vec_txt_cname = c(
        "Chrom", "ChromStart", "ChromEnd", "Name", "SeqCounts", "Strand", "Guide_ID", 
        "Chrom_TSS", "Start_TSS", "End_TSS", "Strand_Gene", 
        "Gene_Symbol", 
        "Gene_ENS", 
        "Guide_SpacerSeq",
        "Guide_Seq",
        "Guide_Type",
        "Notes"
    )
    vec_col_ctype = cols(
        "Chrom" = col_character(),
        "ChromStart" = col_integer(),
        "ChromEnd"   = col_integer()
    )
    dat = read_tsv(
        txt_fpath, 
        col_names = vec_txt_cname, 
        col_types = vec_col_ctype, 
        show_col_types = FALSE)
    return(dat)
})
names(lst) = vec_txt_target

lst_dat_crispri_info = lst
for (idx in names(lst)){
    dat = lst[[idx]]
    txt = format(idx, width = 7, justify = "left")
    cat(txt, dim(dat), "\n")
}

dat = lst[[1]]
head(dat)

CAPRIN1 60000 17 
CAT     60000 17 
CD164   60001 17 
ERP29   60000 17 
FADS1   10000 17 
FADS2   10000 17 
FADS3   10000 17 
FEN1    10000 17 
GATA1   15335 17 
HBE1    21170 17 
HBG1    21170 17 
HBG2    21170 17 
HBS1L   21170 17 
HDAC6   15335 17 
LMO2    60000 17 
MEF2C   60000 17 
MYB     21170 17 
MYC     51130 17 
NMU     45500 17 
PVT1    51130 17 


Chrom,ChromStart,ChromEnd,Name,SeqCounts,Strand,Guide_ID,Chrom_TSS,Start_TSS,End_TSS,Strand_Gene,Gene_Symbol,Gene_ENS,Guide_SpacerSeq,Guide_Seq,Guide_Type,Notes
<chr>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
chr11,61748850,61748853,NA|chr11:61748831-61748850:+,121,+,chr11:61748831-61748850:+,chr11,34051731,34051732,+,CAPRIN1,ENSG00000135387,CTTTACCTCCACTGTAAGGC,GCTTTACCTCCACTGTAAGGC,negative_control,STT
chr11,61748851,61748854,NA|chr11:61748832-61748851:+,153,+,chr11:61748832-61748851:+,chr11,34051731,34051732,+,CAPRIN1,ENSG00000135387,TTTACCTCCACTGTAAGGCA,GTTTACCTCCACTGTAAGGCA,negative_control,STT
chr11,61748971,61748974,NA|chr11:61748975-61748994:-,172,-,chr11:61748975-61748994:-,chr11,34051731,34051732,+,CAPRIN1,ENSG00000135387,CTACCGAGCCACAGTGAGTG,GCTACCGAGCCACAGTGAGTG,negative_control,STT
chr11,61749002,61749005,NA|chr11:61748983-61749002:+,3,+,chr11:61748983-61749002:+,chr11,34051731,34051732,+,CAPRIN1,ENSG00000135387,GTGGCTCGGTAGAGAAGCCA,GTGGCTCGGTAGAGAAGCCA,negative_control,STT
chr11,61749033,61749036,NA|chr11:61749014-61749033:+,246,+,chr11:61749014-61749033:+,chr11,34051731,34051732,+,CAPRIN1,ENSG00000135387,GAGGACTCATCTCCATTGAT,GAGGACTCATCTCCATTGAT,negative_control,STT
chr11,61749038,61749041,NA|chr11:61749042-61749061:-,851,-,chr11:61749042-61749061:-,chr11,34051731,34051732,+,CAPRIN1,ENSG00000135387,ACTTGCATACAAGGACCTAG,GACTTGCATACAAGGACCTAG,negative_control,STT


**Import scores**

In [4]:
### set directory
txt_assay  = "CRISPRi_FlowFISH_K562_Riley_JinWoo"
txt_folder = "guide_scores"
txt_fdiry  = file.path(FD_RES, "assay_fcc", txt_assay, txt_folder)
txt_fname  = "CRISPRi_HCRFF_K562.hg38.ZScore.unstranded.bed.gz"
txt_fpath  = file.path(txt_fdiry, txt_fname)

vec_txt_cname = c("Chrom", "ChromStart", "ChromEnd", "Name", "Score", "Strand", "Target")
dat = read_tsv(txt_fpath, col_names = vec_txt_cname, show_col_types = FALSE)
lst = split(dat, dat$Target)

lst_dat_crispri_score = lst
dat_crispri_score = dat
head(dat)

Chrom,ChromStart,ChromEnd,Name,Score,Strand,Target
<chr>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<chr>
chr1,74582275,74582276,id-1,0.414014,.,CD164
chr1,74582275,74582276,id-1,-0.493091,.,MYC
chr1,74582275,74582276,id-1,0.024514,.,NMU
chr1,74582275,74582276,id-1,0.44089,.,PVT1
chr1,74582309,74582310,id-2,-0.181799,.,CD164
chr1,74582309,74582310,id-2,0.997649,.,MYC


## Filter to remove safe targeting guides

In [5]:
### loop through each target in crispri screens
lst = lapply(vec_txt_target, function(txt_target){
    
    ### table: guide attributes
    lst = lst_dat_crispri_info
    dat = lst[[txt_target]]
    dat = dat %>% 
        dplyr::mutate(ChromLoc  = ifelse(Strand == "+", ChromStart - 1, ChromStart + 1)) %>%
        dplyr::mutate(Guide_Loc = paste(Chrom, as.integer(ChromLoc), sep = ":"))
    dat_info = dat
    
    ### table: guide scores
    lst = lst_dat_crispri_score
    dat = lst[[txt_target]]
    dat = dat %>% dplyr::mutate(Guide_Loc = paste(Chrom, as.integer(ChromStart), sep = ":"))
    dat_score = dat

    ### get the list of targeting guides
    dat = dat_info
    dat = dat %>% dplyr::filter(Guide_Type == "targeting")
    vec = dat$Guide_Loc

    ### select the targeting guides in the guide log2fc
    dat = dat_score
    dat = dat %>% dplyr::filter(Guide_Loc %in% vec)
    dat_score_filter = dat
    
    ### show progress
    cat("\n==================================================\n")
    cat(txt_target, "\n")
    
    cat("\n----- Guide Attributes -----\n")
    dat = dat_info
    res = table(dat$Guide_Type, dat$Chrom)
    print(res)
    
    cat("\n----- Guide Scores -----\n")
    dat = dat_score
    res = table(dat$Chrom)
    print(res)
    
    cat("\n----- Guide Filtered -----\n")
    dat = dat_score_filter
    res = table(dat$Chrom)
    print(res)
    flush.console()
    
    return(dat_score_filter)
})

### concatenate and arrange
dat = bind_rows(lst)
dat = dat %>% dplyr::select(-Guide_Loc) %>% dplyr::arrange(Chrom, ChromStart, ChromEnd)

### assign and show
cat("\n==================================================\n")
dat_crispri_score_filter = dat
fun_display_table(head(dat))


CAPRIN1 

----- Guide Attributes -----
                  
                   chr10 chr11  chr5
  negative_control  1500  3000  1500
  targeting            0 52500     0

----- Guide Scores -----

chr10 chr11  chr5 
 1494 54529  1472 

----- Guide Filtered -----

chr11 
51543 

CAT 

----- Guide Attributes -----
                  
                   chr10 chr11  chr5
  negative_control  1500  3000  1500
  targeting            0 52500     0

----- Guide Scores -----

chr10 chr11  chr5 
 1494 54529  1472 

----- Guide Filtered -----

chr11 
51543 

CD164 

----- Guide Attributes -----
                  
                    chr1 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18
  negative_control    60  1558  3089    30    67    27    34     8     7    29
  targeting            0     0     0     0     0     0     0     0     0     0
                  
                   chr19  chr2 chr21  chr3  chr4  chr5  chr6  chr7  chr8  chr9
  negative_control     2    63    21    48    99    68  

Chrom,ChromStart,ChromEnd,Name,Score,Strand,Target
chr11,4091884,4091885,id-99,0.114291,.,HBE1
chr11,4091884,4091885,id-99,0.650791,.,HBG1
chr11,4091884,4091885,id-99,0.694152,.,HBG2
chr11,4091884,4091885,id-99,-0.074553,.,HBS1L
chr11,4091884,4091885,id-99,-0.257127,.,MYB
chr11,4091885,4091886,id-100,-0.546764,.,HBE1


## Save results

In [6]:
### set directory
txt_assay  = "CRISPRi_FlowFISH_K562_Riley_JinWoo"
txt_folder = "guide_scores"
txt_fdiry  = file.path(FD_RES, "assay_fcc", txt_assay, txt_folder)
txt_fname  = "CRISPRi_HCRFF_K562.hg38.ZScore.filtered.unstranded.bed.gz"
txt_fpath  = file.path(txt_fdiry, txt_fname)

### write table
dat = dat_crispri_score_filter
write_tsv(dat, txt_fpath, col_names = FALSE)