**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /mount/work 
PATH OF SOURCE:     /mount/work/source 
PATH OF EXECUTABLE: /mount/work/exe 
PATH OF ANNOTATION: /mount/work/annotation 
PATH OF PROJECT:    /mount/project 
PATH OF RESULTS:    /mount/work/out/proj_combeffect_encode_fcc 


## Helper function

In [2]:
PREFIX  = "KS91_K562_ASTARRseq"

GROUPS  = c("Input", "Output")

SAMPLES = c(
    paste0("Input_rep",  1:6),
    paste0("Output_rep", 1:4))

CNAMES = c("Chrom", "Start", "End", "Count")

get_info = function(fpath, strings){
    idx = str_detect(string = fpath, pattern = strings)
    return(strings[idx])
}

get_group  = function(fpath){return(get_info(fpath, GROUPS))}
get_sample = function(fpath){return(get_info(fpath, SAMPLES))}

## Get file paths and set metadata

In [3]:
PREFIX = "KS91_K562_ASTARRseq"
FOLDER = "coverage_astarrseq_peak_macs_input"
fglob  = file.path(FD_RES, PREFIX, FOLDER, "*counts.txt.gz")
fpaths = Sys.glob(fglob)
for (fpath in fpaths){
    print(fpath)
    print(get_group(fpath))
    print(get_sample(fpath))
}

[1] "/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep1.masked.dedup.fragments.counts.txt.gz"
[1] "Input"
[1] "Input_rep1"
[1] "/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep2.masked.dedup.fragments.counts.txt.gz"
[1] "Input"
[1] "Input_rep2"
[1] "/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep3.masked.dedup.fragments.counts.txt.gz"
[1] "Input"
[1] "Input_rep3"
[1] "/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep4.masked.dedup.fragments.counts.txt.gz"
[1] "Input"
[1] "Input_rep4"
[1] "/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep5.masked.dedup.fragments.counts.txt.gz"

In [4]:
dat_meta = data.frame(
    Sample = sapply(fpaths, get_sample),
    Group  = sapply(fpaths, get_group),
    FPath  = fpaths
)
rownames(dat_meta) = sapply(fpaths, get_sample)
dat_meta

Unnamed: 0_level_0,Sample,Group,FPath
Unnamed: 0_level_1,<chr>,<chr>,<chr>
Input_rep1,Input_rep1,Input,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep1.masked.dedup.fragments.counts.txt.gz
Input_rep2,Input_rep2,Input,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep2.masked.dedup.fragments.counts.txt.gz
Input_rep3,Input_rep3,Input,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep3.masked.dedup.fragments.counts.txt.gz
Input_rep4,Input_rep4,Input,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep4.masked.dedup.fragments.counts.txt.gz
Input_rep5,Input_rep5,Input,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep5.masked.dedup.fragments.counts.txt.gz
Input_rep6,Input_rep6,Input,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Input_rep6.masked.dedup.fragments.counts.txt.gz
Output_rep1,Output_rep1,Output,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Output_rep1.f3q10.fragments.counts.txt.gz
Output_rep2,Output_rep2,Output,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Output_rep2.f3q10.fragments.counts.txt.gz
Output_rep3,Output_rep3,Output,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Output_rep3.f3q10.fragments.counts.txt.gz
Output_rep4,Output_rep4,Output,/mount/work/out/proj_combeffect_encode_fcc/KS91_K562_ASTARRseq/coverage_astarrseq_peak_macs_input/KS91_K562_hg38_ASTARRseq_Output_rep4.f3q10.fragments.counts.txt.gz


## Import data and arrange into count matrix

In [5]:
cnames = c("Chrom", "Start", "End", "Count")
ctypes = c(col_character(), col_integer(), col_integer(), col_integer())

lst = lapply(fpaths, function(fpath){
    sam = get_sample(fpath)
    dat = read_tsv(fpath, col_names = cnames, col_types = ctypes) %>% 
        dplyr::filter(Count != ".") %>% 
        dplyr::mutate(Count  = as.integer(Count)) %>%
        dplyr::mutate(
            Peak   = paste(Chrom, Start, End, sep="_"),
            Sample = sam)
    return(dat)
})

dat_count = bind_rows(lst) %>% spread(Sample, Count)
head(dat_count)

Chrom,Start,End,Peak,Input_rep1,Input_rep2,Input_rep3,Input_rep4,Input_rep5,Input_rep6,Output_rep1,Output_rep2,Output_rep3,Output_rep4
<chr>,<dbl>,<dbl>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
chr1,10015,10442,chr1_10015_10442,155,214,257,236,185,214,7,12,4,11
chr1,14253,14645,chr1_14253_14645,110,144,160,141,130,130,8,26,30,57
chr1,16015,16477,chr1_16015_16477,141,208,206,190,202,182,9,9,18,23
chr1,17237,17772,chr1_17237_17772,259,350,399,367,369,331,7,13,23,59
chr1,28903,29613,chr1_28903_29613,263,338,368,333,352,317,12,18,3,32
chr1,30803,31072,chr1_30803_31072,82,115,171,136,105,115,13,22,14,33


## Store the results

In [6]:
fdiry = file.path(FD_RES, PREFIX, FOLDER, "summary")

fname = "wgs_count_matrix.tsv"
fpath = file.path(fdiry, fname)
write_tsv(dat_count, fpath)

fname = "wgs_metadata.tsv"
fpath = file.path(fdiry, fname)
write_tsv(dat_meta, fpath)