In [1]:
suppressMessages(suppressWarnings(source("config_sing.R")))
print(FD_WORK)
print(FD_RES)

[1] "/home/mount/work"
[1] "/home/mount/work/out/proj_combeffect"


In [9]:
TARGET="chr1"

SAMPLES_INP20X = c(
    paste0("Input", 1:5, "_20x"),
    paste0("TFX",   2:5, "_DMSO"),
    paste0("TFX",   2:5, "_Dex"))
SAMPLES = SAMPLES_INP20X

### set column names and types
CTYPES = c(col_character(), col_integer(), col_integer(), col_integer(),
           col_character(), col_integer(), col_integer(),
           col_character(), col_double(),  col_integer())
CNAMES = c("Chrom_Frag", "Start_Frag", "End_Frag", "Count_Frag",
           "Chrom_MTF",  "Start_MTF",  "End_MTF",
           "Motif", "Score", "Overlap")

THRESHOLD_COVER = 10
THRESHOLD_MOTIF = 10.81

### print start message
cat("Target:           ", TARGET,          "\n")
cat("Threshold (Cover):", THRESHOLD_COVER, "\n")
cat("Threshold (Motif):", THRESHOLD_MOTIF, "\n")

fname = "AP1_2_merge.bed.gz"
fdiry  = file.path(FD_RES, "annotation_fragment")

Target:            chr1 
Threshold (Cover): 10 
Threshold (Motif): 10.81 


In [8]:
timer = Sys.time()
### start message and get the name of motif
mtf = str_remove_all(fname, pattern = "_merge.bed.gz")
msg = paste(mtf, "Start")
cat(msg, "\n"); flush.console()

lst_dat = lapply(SAMPLES[1:5], function(sam){
    ### set path
    fpath = file.path(fdiry, sam, TARGET, fname)    
    msg   = paste(mtf, "Import", fpath)
    cat(msg, "\n"); flush.console()
    
    ### import data
    dat = read_tsv(fpath, col_types=CTYPES, col_names=CNAMES)
    if (nrow(dat) == 0){
        return(NULL)
    } else {
        ###
        num1 = nrow(dat)    
        dat = dat %>% 
            mutate(Sample = sam) %>%
            mutate(Length_MTF = End_MTF - Start_MTF)  %>%
            mutate(Length_Dif = Length_MTF - Overlap) %>% 
            dplyr::filter(Length_Dif == 0) %>%
            dplyr::filter(Score >= THRESHOLD_MOTIF)
        num2 = nrow(dat)

        ###
        msg = paste(num1, num2, sep="-")
        msg = paste(mtf, "Filter", sam, msg)
        cat(msg, "\n"); flush.console()
        return(dat)
    }
})

print(Sys.time() - timer)

AP1_2_merge.bed Start 
AP1_2_merge.bed Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input1_20x/chr1/AP1_2_merge.bed 
AP1_2_merge.bed Filter Input1_20x 23733960-780112 
AP1_2_merge.bed Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input2_20x/chr1/AP1_2_merge.bed 
AP1_2_merge.bed Filter Input2_20x 22582536-742146 
AP1_2_merge.bed Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input3_20x/chr1/AP1_2_merge.bed 
AP1_2_merge.bed Filter Input3_20x 22763411-748012 
AP1_2_merge.bed Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input4_20x/chr1/AP1_2_merge.bed 
AP1_2_merge.bed Filter Input4_20x 26233440-865834 
AP1_2_merge.bed Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input5_20x/chr1/AP1_2_merge.bed 
AP1_2_merge.bed Filter Input5_20x 22369860-736095 
Time difference of 2.635614 mins


In [10]:
timer = Sys.time()
### start message and get the name of motif
mtf = str_remove_all(fname, pattern = "_merge.bed.gz")
msg = paste(mtf, "Start")
cat(msg, "\n"); flush.console()

lst_dat = lapply(SAMPLES[1:5], function(sam){
    ### set path
    fpath = file.path(fdiry, sam, TARGET, fname)    
    msg   = paste(mtf, "Import", fpath)
    cat(msg, "\n"); flush.console()
    
    ### import data
    dat = read_tsv(fpath, col_types=CTYPES, col_names=CNAMES)
    if (nrow(dat) == 0){
        return(NULL)
    } else {
        ###
        num1 = nrow(dat)    
        dat = dat %>% 
            mutate(Sample = sam) %>%
            mutate(Length_MTF = End_MTF - Start_MTF)  %>%
            mutate(Length_Dif = Length_MTF - Overlap) %>% 
            dplyr::filter(Length_Dif == 0) %>%
            dplyr::filter(Score >= THRESHOLD_MOTIF)
        num2 = nrow(dat)

        ###
        msg = paste(num1, num2, sep="-")
        msg = paste(mtf, "Filter", sam, msg)
        cat(msg, "\n"); flush.console()
        return(dat)
    }
})

print(Sys.time() - timer)

AP1_2 Start 
AP1_2 Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input1_20x/chr1/AP1_2_merge.bed.gz 
AP1_2 Filter Input1_20x 23733960-780112 
AP1_2 Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input2_20x/chr1/AP1_2_merge.bed.gz 
AP1_2 Filter Input2_20x 22582536-742146 
AP1_2 Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input3_20x/chr1/AP1_2_merge.bed.gz 
AP1_2 Filter Input3_20x 22763411-748012 
AP1_2 Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input4_20x/chr1/AP1_2_merge.bed.gz 
AP1_2 Filter Input4_20x 26233440-865834 
AP1_2 Import /home/mount/work/out/proj_combeffect/annotation_fragment/Input5_20x/chr1/AP1_2_merge.bed.gz 
AP1_2 Filter Input5_20x 22369860-736095 
Time difference of 3.931366 mins


In [5]:
dim(lst_dat[[1]])

In [6]:
head(lst_dat[[1]])

Chrom_Frag,Start_Frag,End_Frag,Count_Frag,Chrom_MTF,Start_MTF,End_MTF,Motif,Score,Overlap,Sample,Length_MTF,Length_Dif
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
chr1,14376,15250,1,chr1,15176,15187,AP1/2,12.1785,11,Input1_20x,11,0
chr1,14424,15516,1,chr1,15176,15187,AP1/2,12.1785,11,Input1_20x,11,0
chr1,14448,15495,1,chr1,15176,15187,AP1/2,12.1785,11,Input1_20x,11,0
chr1,14449,15491,1,chr1,15176,15187,AP1/2,12.1785,11,Input1_20x,11,0
chr1,14713,15705,2,chr1,15176,15187,AP1/2,12.1785,11,Input1_20x,11,0
chr1,14784,15828,2,chr1,15176,15187,AP1/2,12.1785,11,Input1_20x,11,0
