In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /mount/work 
PATH OF SOURCE:     /mount/work/source 
PATH OF EXECUTABLE: /mount/work/exe 
PATH OF ANNOTATION: /mount/work/annotation 
PATH OF PROJECT:    /mount/project 
PATH OF RESULTS:    /mount/work/out/proj_combeffect_encode_fcc 


## Helper functions

In [2]:
PREFIX  = "Tewhey_K562_TileMPRA"

REGIONS = c("GATA1", "MYC", "FADS")

GROUPS  = c("Input", "Output")

#SAMPLES = c(
#    paste0("Input_rep",  1:4),
#    paste0("Output_rep", 1:3))

CNAMES = c("Chrom", "Start", "End", "Count")

In [6]:
get_info = function(fpath, strings){
    idx = str_detect(string = fpath, pattern = strings)
    return(strings[idx])
}

get_region = function(fpath){return(get_info(fpath, REGIONS))}
get_group  = function(fpath){return(get_info(fpath, GROUPS))}
#get_sample = function(fpath){return(get_info(fpath, SAMPLES))}

## fragment coverage

In [7]:
fdiry = file.path(FD_RES, PREFIX, "coverage_astarrseq_peak_macs_input")
fname = paste0("*stranded_pos*bed*")
#dir(fdiry)
fglob = file.path(fdiry, fname)
fpaths = Sys.glob(fglob)
print(fpaths)

[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20200905.FADS.stranded_pos.Input.bed.gz"  
[2] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20200905.FADS.stranded_pos.Output.bed.gz" 
[3] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.GATA1.stranded_pos.Input.bed.gz" 
[4] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.GATA1.stranded_pos.Output.bed.gz"
[5] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.MYC.stranded_pos.Input.bed.gz"   
[6] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.MYC.stranded_pos.Output.bed.gz"  


In [8]:
for (fpath in fpaths){
    reg = get_region(fpath)
    grp = get_group(fpath)
    
    print(fpath)
    cat("Region:", reg, ";", "Group:", grp, "\n")
}

[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20200905.FADS.stranded_pos.Input.bed.gz"
Region: FADS ; Group: Input 
[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20200905.FADS.stranded_pos.Output.bed.gz"
Region: FADS ; Group: Output 
[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.GATA1.stranded_pos.Input.bed.gz"
Region: GATA1 ; Group: Input 
[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.GATA1.stranded_pos.Output.bed.gz"
Region: GATA1 ; Group: Output 
[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/Tile_K562_hg38_20210130.MYC.stranded_pos.Input.bed.gz"
Region: MYC ; Group: Input 
[1] "/mount/work/out/proj_combeffect_encode_fcc/T

In [17]:
lst = lapply(fpaths, function(fpath){
    ###
    reg = get_region(fpath)
    grp = get_group(fpath)
    
    ###
    dat = read_tsv(fpath, col_name = CNAMES, show_col_types = FALSE)
    dat = dat %>% 
        dplyr::mutate(Peak = paste(Chrom, Start, End, sep="_"), Region = reg, Group = grp) %>%
        dplyr::mutate(across(Count, na_if, ".")) %>%
        dplyr::mutate(across(Count, as.integer)) %>%
        na.omit
    return(dat)
})

dat_cnt = bind_rows(lst) %>% 
    spread(Group, Count) %>%
    mutate(log2FoldChange = log2(Output / Input))
print(dim(dat_cnt))
head(dat_cnt)

[1] 407   8


Chrom,Start,End,Peak,Region,Input,Output,log2FoldChange
<chr>,<dbl>,<dbl>,<chr>,<chr>,<int>,<int>,<dbl>
chr11,61792068,61793464,chr11_61792068_61793464,FADS,245,3157,3.6877006
chr11,61800085,61801113,chr11_61800085_61801113,FADS,250,3715,3.8933622
chr11,61806630,61807154,chr11_61806630_61807154,FADS,373,587,0.6541849
chr11,61814735,61817343,chr11_61814735_61817343,FADS,215,5053,4.5547316
chr11,61822094,61822443,chr11_61822094_61822443,FADS,252,293,0.2174769
chr11,61825795,61826306,chr11_61825795_61826306,FADS,182,565,1.6343124


In [19]:
sum(is.na(dat_cnt$Input))

In [20]:
sum(is.na(dat_cnt$Output))

In [21]:
fdiry = file.path(FD_RES, PREFIX, "coverage_astarrseq_peak_macs_input", "summary")
fname = paste0(PREFIX, "_summary.stranded_pos.tsv")
fpath = file.path(fdiry, fname)
print(fpath)
write_tsv(dat_cnt, fpath)

[1] "/mount/work/out/proj_combeffect_encode_fcc/Tewhey_K562_TileMPRA/coverage_astarrseq_peak_macs_input/summary/Tewhey_K562_TileMPRA_summary.stranded_pos.tsv"
