**Set environment**

In [1]:
suppressMessages(source("../config_sing.R"))

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /mount/work 
PATH OF SOURCE:     /mount/work/source 
PATH OF EXECUTABLE: /mount/work/exe 
PATH OF ANNOTATION: /mount/work/annotation 
PATH OF PROJECT:    /mount/project 
PATH OF RESULTS:    /mount/work/out/proj_combeffect_encode_fcc 


In [2]:
fdiry = file.path(FD_RES, "KS91_K562_ASTARRseq", "coverage")
dir(fdiry)

In [9]:
fdiry = file.path(FD_RES, "KS91_K562_ASTARRseq", "fragment")
fname = "library_size.tsv"
fpath = file.path(fdiry, fname)
dat_lib = read_tsv(fpath, show_col_types = FALSE)
head(dat_lib)

Sample,Group,Replicate,Size
<chr>,<chr>,<chr>,<dbl>
Input_rep1,Input,rep1,358823
Input_rep2,Input,rep2,461577
Input_rep3,Input,rep3,496229
Input_rep4,Input,rep4,464845
Input_rep5,Input,rep5,454013
Input_rep6,Input,rep6,409058


In [19]:
### init: file directory
fdiry = file.path(FD_RES, "KS91_K562_ASTARRseq", "coverage")

### init: column names and types
ctypes = c(col_character(), col_integer(), col_integer(), col_integer())
cnames = c("Chrom", "Start", "End", "Depth")

### init
SIZES = c(100, 200, 500, 1000)

### loop: foreach size import data and summmarize the count of fragments
for (size in SIZES){
    
    ### INPUT: set sample group and number of replicates
    GROUP = "Input"
    REPLS = paste0("rep", 1:6)
     
    ### INPUT: import data
    lst_dat_inp = lapply(REPLS, function(repl){
        ### get sample file path
        sam   = paste(GROUP, repl, sep="_")
        fglob = paste0("*", sam, "*", "bin", size, ".bed.gz")
        fpath = Sys.glob(file.path(fdiry, fglob))
        
        ### read data
        dat = read_tsv(
            fpath, 
            col_types = ctypes, 
            col_names = cnames)
        dat$Sample = sam
        return(dat)
    })
    
    ### OUTPUT: set sample group and number of replicates
    GROUP = "Output"
    REPLS = paste0("rep", 1:4)

    ### OUTPUT: import data
    lst_dat_out = lapply(REPLS, function(repl){
        ### get sample file path
        sam   = paste(GROUP, repl, sep="_")
        fglob = paste0("*", sam, "*", "bin", size, ".bed.gz")
        fpath = Sys.glob(file.path(fdiry, fglob))
        
        ### read data
        dat = read_tsv(
            fpath, 
            col_types = ctypes, 
            col_names = cnames)
        dat$Sample = sam
        return(dat)
    })
    
    ### Combine input and output
    dat = bind_rows(lst_dat_inp, lst_dat_out) %>% 
        left_join(dat_lib, by="Sample") %>%
        mutate(Depth_Norm = Depth / Size)
    
    ### rearrange and save results
    dat = dat %>% dplyr::select(
        "Chrom",  "Start", "End", 
        "Sample", "Group", "Replicate", 
        "Depth",  "Size",  "Depth_Norm")
    
    fname = paste0(
        "KS91_K562_hg38_ASTARRseq_Depth.GATA1.unstranded.",
        "bin", size, 
        ".tsv")
    fpath = file.path(fdiry, fname)
    write_tsv(dat, fpath)
}

-----