**Set environment**

In [1]:
suppressWarnings(suppressMessages(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_encode_fcc 
BASE DIRECTORY (FD_BASE): /data/reddylab/Kuei 
WORK DIRECTORY (FD_WORK): /data/reddylab/Kuei/out 
CODE DIRECTORY (FD_CODE): /data/reddylab/Kuei/code 
PATH OF PROJECT (FD_PRJ): /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS (FD_RES): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 
PATH OF LOG     (FD_LOG): /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/log 


## Import data

In [2]:
fdiry = file.path(FD_RES, "source", "CRISPRi_FlowFISH")
fname = "K562_HCRFF_20genes_TSS_TES_coords.out"
fpath = file.path(fdiry, fname)

dat = read_tsv(fpath, show_col_types = FALSE)

dat_gene_info = dat
print(dim(dat))
head(dat)

[1] 20  5


Gene,Chrom,Strand,Start,End
<chr>,<chr>,<chr>,<dbl>,<dbl>
CAPRIN1,chr11,+,34051730,34102610
CAT,chr11,+,34438933,34472060
CD164,chr6,-,109366513,109382467
ERP29,chr12,+,112013347,112023220
FADS1,chr11,-,61799628,61817003
FADS2,chr11,+,61816202,61867354


In [4]:
genes = dat_gene_info$Gene
print(length(genes))
print(genes)

[1] 20
 [1] "CAPRIN1" "CAT"     "CD164"   "ERP29"   "FADS1"   "FADS2"   "FADS3"  
 [8] "FEN1"    "GATA1"   "HBE1"    "HBG1"    "HBG2"    "HBS1L"   "HDAC6"  
[15] "LMO2"    "MEF2C"   "MYB"     "MYC"     "NMU"     "PVT1"   


In [5]:
fdiry = file.path(FD_RES, "source", "CRISPRi_FlowFISH", "track_bedgraph")

lst = lapply(genes, function(gene){
    
    ###
    fname = paste(gene, "HS_exp_r1.tsv.gz", sep="_")
    fpath = file.path(fdiry, fname)
    
    ### read data
    cnames = c("Chrom", "Start", "End", "Name", "SeqCounts", "Strand", "Guide_ID", 
           "Chrom_TSS", "Start_TSS", "End_TSS", "Strand_Gene", 
           "Gene_Symbol", 
           "Gene_ENS", 
           "Guide_SpacerSeq",
           "Guide_Seq",
           "Guide_Type",
           "Notes"
          )
    ctypes = cols(
        "Chrom" = col_character(),
        "Start" = col_integer(),
        "End"   = col_integer()
    )
    dat = read_tsv(fpath, col_names = cnames, col_types = ctypes, show_col_types = FALSE)
    return(dat)
})
names(lst) = genes

lst_crispr_info = lst
for (idx in names(lst)){
    dat = lst[[idx]]
    txt = format(idx, width = 7, justify = "left")
    cat(txt, dim(dat), "\n")
}

CAPRIN1 60000 17 
CAT     60000 17 
CD164   60001 17 
ERP29   60000 17 
FADS1   10000 17 
FADS2   10000 17 
FADS3   10000 17 
FEN1    10000 17 
GATA1   15335 17 
HBE1    21170 17 
HBG1    21170 17 
HBG2    21170 17 
HBS1L   21170 17 
HDAC6   15335 17 
LMO2    60000 17 
MEF2C   60000 17 
MYB     21170 17 
MYC     51130 17 
NMU     45500 17 
PVT1    51130 17 


In [6]:
fdiry = file.path(FD_RES, "results", "CRISPRi_FlowFISH", "coverage")

lst = lapply(genes, function(gene){
    
    ###
    fname = paste(gene, "HCRFF_rAVG.log2FC_2.bed", sep="_")
    fpath = file.path(fdiry, fname)
    
    ### read data
    cnames = c("Chrom", "Start", "End", "Name", "Score")
    ctypes = cols(
        "Chrom" = col_character(),
        "Start" = col_integer(),
        "End"   = col_integer(),
        "Name"  = col_character(),
        "Score" = col_double()
    )
    dat = read_tsv(fpath, col_names = cnames, col_types = ctypes, show_col_types = FALSE)
    return(dat)
})
names(lst) = genes

lst_crispr_log2fc = lst
for (idx in names(lst)){
    dat = lst[[idx]]
    txt = format(idx, width = 7, justify = "left")
    cat(txt, dim(dat), "\n")
}

CAPRIN1 57495 5 
CAT     57495 5 
CD164   57664 5 
ERP29   57381 5 
FADS1   9398 5 
FADS2   9398 5 
FADS3   9398 5 
FEN1    9398 5 
GATA1   13732 5 
HBE1    20485 5 
HBG1    20485 5 
HBG2    20485 5 
HBS1L   20485 5 
HDAC6   13732 5 
LMO2    57495 5 
MEF2C   57632 5 
MYB     20485 5 
MYC     49986 5 
NMU     43563 5 
PVT1    49986 5 


## Filtering

In [7]:
lst = lapply(genes, function(gene){
    
    ### table: guides attribute
    dat = lst_crispr_info[[gene]]
    dat = dat %>% 
        dplyr::mutate(
            Loc = ifelse(
                Strand == "+", 
                Start - 1,
                Start + 1
            )
        ) %>%
        dplyr::mutate(Guide_Loc  = paste(Chrom, as.integer(Loc), sep = "_"))
    dat_info = dat
    
    ### table: guides log2fc
    dat = lst_crispr_log2fc[[gene]]
    dat = dat %>%
        dplyr::mutate(Guide_Loc  = paste(Chrom, as.integer(Start), sep = "_"))
    dat_score = dat
    
    ### get the list of targeting guides
    dat = dat_info
    dat = dat %>% dplyr::filter(Guide_Type == "targeting")
    vec = dat$Guide_Loc
    
    ### only select the targeting guides in the guide log2fc
    dat = dat_score
    dat = dat %>% dplyr::filter(Guide_Loc %in% vec)
    dat_score_filter = dat
    
    ### show progress
    cat("\n==================================================\n")
    cat(gene, "\n")
    
    cat("\nGuide Attributes:\n")
    dat = dat_info
    res = table(dat$Guide_Type, dat$Chrom)
    print(res)
    
    cat("\nGuide Log2FC:\n")
    dat = dat_score
    res = table(dat$Chrom)
    print(res)
    
    cat("\nGuide Filtered:\n")
    dat = dat_score_filter
    res = table(dat$Chrom)
    print(res)
    
    ###
    flush.console()
    return(dat_score_filter)
})
names(lst) = genes

cat("\n==================================================\n")
lst_crispr_log2fc_filter = lst
for (idx in names(lst)){
    dat = lst[[idx]]
    txt = format(idx, width = 7, justify = "left")
    cat(txt, dim(dat), "\n")
}


CAPRIN1 

Guide Attributes:
                  
                   chr10 chr11  chr5
  negative_control  1500  3000  1500
  targeting            0 52500     0

Guide Log2FC:

chr10 chr11  chr5 
 1494 54529  1472 

Guide Filtered:

chr11 
51543 

CAT 

Guide Attributes:
                  
                   chr10 chr11  chr5
  negative_control  1500  3000  1500
  targeting            0 52500     0

Guide Log2FC:

chr10 chr11  chr5 
 1494 54529  1472 

Guide Filtered:

chr11 
51543 

CD164 

Guide Attributes:
                  
                    chr1 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18
  negative_control    60  1558  3089    30    67    27    34     8     7    29
  targeting            0     0     0     0     0     0     0     0     0     0
                  
                   chr19  chr2 chr21  chr3  chr4  chr5  chr6  chr7  chr8  chr9
  negative_control     2    63    21    48    99    68    58    66    92    38
  targeting            0     0     0     0     0     0

## Summarize

In [8]:
lst = lst_crispr_log2fc_filter
dat = bind_rows(lst, .id = "Gene") %>%
    dplyr::select(Chrom, Start, End, Name, Score, Gene)

dat_crispr_log2fc_filter = dat
print(dim(dat))
head(dat)

[1] 610960      6


Chrom,Start,End,Name,Score,Gene
<chr>,<int>,<int>,<chr>,<dbl>,<chr>
chr11,33064196,33064197,id-1495,0.807624,CAPRIN1
chr11,33064217,33064218,id-1496,-2.71246,CAPRIN1
chr11,33064229,33064230,id-1497,0.642045,CAPRIN1
chr11,33064230,33064231,id-1498,1.85354,CAPRIN1
chr11,33064258,33064259,id-1499,0.126467,CAPRIN1
chr11,33064264,33064265,id-1500,0.217096,CAPRIN1


## Save

In [9]:
fdiry = file.path(FD_RES, "results", "CRISPRi_FlowFISH", "coverage")
fname = "Merge_HCRFF_rAVG.log2FC_2.filtered.bed"
fpath = file.path(fdiry, fname)
print(fpath)

dat = dat_crispr_log2fc_filter
write_tsv(dat, fpath, col_names = FALSE)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/CRISPRi_FlowFISH/coverage/Merge_HCRFF_rAVG.log2FC_2.filtered.bed"


In [10]:
fdiry = file.path(FD_RES, "results", "CRISPRi_FlowFISH", "coverage")
fname = "Merge_HCRFF_rAVG.log2FC_2.filtered.bed.gz"
fpath = file.path(fdiry, fname)
print(fpath)

dat = dat_crispr_log2fc_filter
write_tsv(dat, fpath, col_names = FALSE)

[1] "/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/CRISPRi_FlowFISH/coverage/Merge_HCRFF_rAVG.log2FC_2.filtered.bed.gz"


## Explore

In [11]:
lst = lst_crispr_log2fc_filter

for (idx in names(lst)){
    
    ###
    fdiry = file.path(FD_RES, "results", "CRISPRi_FlowFISH", "coverage")
    fname = paste(idx, "HCRFF_rAVG.log2FC_2.filtered.bed", sep="_")
    fpath = file.path(fdiry, fname)
    
    ###
    dat = lst[[idx]]
    dat = dat %>% dplyr::select(Chrom, Start, End, Name, Score)
    write_tsv(dat, fpath, col_names = FALSE)
    
    ###
    cat("\n==================================================\n")
    cat(idx,   "\n")
    cat(fpath, "\n\n")
    print(head(dat))
    flush.console()
}


#dat = dat_crispr_log2fc_filter
#write_tsv(dat, fpath, col_names = FALSE)


CAPRIN1 
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/CRISPRi_FlowFISH/coverage/CAPRIN1_HCRFF_rAVG.log2FC_2.filtered.bed 

[90m# A tibble: 6 × 5[39m
  Chrom    Start      End Name     Score
  [3m[90m<chr>[39m[23m    [3m[90m<int>[39m[23m    [3m[90m<int>[39m[23m [3m[90m<chr>[39m[23m    [3m[90m<dbl>[39m[23m
[90m1[39m chr11 33[4m0[24m[4m6[24m[4m4[24m196 33[4m0[24m[4m6[24m[4m4[24m197 id-1495  0.808
[90m2[39m chr11 33[4m0[24m[4m6[24m[4m4[24m217 33[4m0[24m[4m6[24m[4m4[24m218 id-1496 -[31m2[39m[31m.[39m[31m71[39m 
[90m3[39m chr11 33[4m0[24m[4m6[24m[4m4[24m229 33[4m0[24m[4m6[24m[4m4[24m230 id-1497  0.642
[90m4[39m chr11 33[4m0[24m[4m6[24m[4m4[24m230 33[4m0[24m[4m6[24m[4m4[24m231 id-1498  1.85 
[90m5[39m chr11 33[4m0[24m[4m6[24m[4m4[24m258 33[4m0[24m[4m6[24m[4m4[24m259 id-1499  0.126
[90m6[39m chr11 33[4m0[24m[4m6[24m[4m4[24m264 33[4m0[24m[4m6[24m[4m4[24m265 id-1500  0.2