In [1]:
suppressMessages(suppressWarnings(source("config_sing.R")))
print(FD_WORK)
print(FD_RES)

[1] "/home/mount/work"
[1] "/home/mount/work/out/proj_combeffect"


In [2]:
###################################################
# Import annotated fragments
###################################################

### set column names and types
ctypes = c(col_character(), col_integer(), col_integer(), col_integer(),
           col_character(), col_integer(), col_integer(),
           col_character(), col_double(),  col_integer())
cnames = c("Chrom_Frag", "Start_Frag", "End_Frag", "Count_Frag",
           "Chrom_MTF",  "Start_MTF",  "End_MTF",
           "Motif", "Score", "Overlap")

### set samples
SAMPLES = c(
    paste0("Input", 1:5),
    paste0("Input", 1:5, "_20x"),
    paste0("TFX",   2:5, "_DMSO"),
    paste0("TFX",   2:5, "_Dex"))

### import bed files for each sample 
fdiry = file.path(FD_RES, "annotation_fragment")
fname = "target_PER1.bed.gz"

lst_dat = lapply(SAMPLES, function(sam){
    ### set path
    fpath = file.path(fdiry, sam, fname)
    print(fpath); flush.console()
    
    ### import data
    dat = read_tsv(fpath, col_types=ctypes, col_names=cnames) %>% mutate(Sample = sam)
    return(dat)
})

### arrange data
dat_ann_frag = bind_rows(lst_dat)

### check environment (before release variables)
print(mem_used())
lst_dat = NULL

### check environment (after release variables)
print(mem_used())

[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input1/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input2/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input3/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input4/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input5/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input1_20x/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input2_20x/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input3_20x/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input4_20x/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/Input5_20x/target_PER1.bed.gz"
[1] "/home/mount/work/out/proj_combeffect/annotation_fragment/TFX2_DMSO/target_PER1.bed.gz

In [3]:
head(dat_ann_frag)

Chrom_Frag,Start_Frag,End_Frag,Count_Frag,Chrom_MTF,Start_MTF,End_MTF,Motif,Score,Overlap,Sample
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>
chr17,8148117,8149012,1,chr17,8148107,8148124,KLF/SP/2,9.0318,7,Input1
chr17,8148117,8149012,1,chr17,8148109,8148121,INSM1,6.1647,4,Input1
chr17,8148117,8149012,1,chr17,8148109,8148129,GC-tract,8.3277,12,Input1
chr17,8148117,8149012,1,chr17,8148123,8148133,GLI,7.4318,10,Input1
chr17,8148117,8149012,1,chr17,8148124,8148139,NR/17,7.8649,15,Input1
chr17,8148117,8149012,1,chr17,8148126,8148137,KLF/SP/1,11.3678,11,Input1


In [4]:
mtf = "AHR"
dat = dat_ann_frag %>% dplyr::filter(Motif == mtf)

In [6]:
dim(dat)

In [5]:
head(dat)

Chrom_Frag,Start_Frag,End_Frag,Count_Frag,Chrom_MTF,Start_MTF,End_MTF,Motif,Score,Overlap,Sample
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>
chr17,8150329,8151343,1,chr17,8151284,8151290,AHR,7.9331,6,Input1
chr17,8150402,8151376,1,chr17,8151284,8151290,AHR,7.9331,6,Input1
chr17,8150410,8151356,1,chr17,8151284,8151290,AHR,7.9331,6,Input1
chr17,8150421,8151368,1,chr17,8151284,8151290,AHR,7.9331,6,Input1
chr17,8150585,8151628,1,chr17,8151284,8151290,AHR,7.9331,6,Input1
chr17,8150681,8151651,1,chr17,8151284,8151290,AHR,7.9331,6,Input1


In [9]:
fname = "AHR_merge.bed.gz"
TARGET= "target_PER1"

In [13]:
SAMPLES = c(
    paste0("Input", 1:5),
    paste0("Input", 1:5, "_20x"),
    paste0("TFX",   2:5, "_DMSO"),
    paste0("TFX",   2:5, "_Dex"))

In [17]:
mtf = str_remove_all(fname, pattern = "_merge.bed.gz")
msg = paste(mtf, "Start")
print(msg); flush.console()

###
fdiry  = file.path(FD_RES, "annotation_fragment")
lst_dat = lapply(SAMPLES, function(sam){
    ### set path
    fpath = file.path(fdiry, sam, TARGET, fname)    
    #print(fpath); flush.console()

    ### import data
    dat = read_tsv(fpath, col_types=ctypes, col_names=cnames)
    if (nrow(dat) == 0){
        return(NULL)
    } else {
        dat = dat %>% 
            mutate(Sample = sam) %>%
            mutate(Length_MTF = End_MTF - Start_MTF) %>%
            mutate(Length_Dif = Length_MTF - Overlap)
        return(dat)
    }
})

### arrange data
dat2 = bind_rows(lst_dat)

[1] "AHR Start"


In [18]:
dim(dat2)

In [16]:
dat2

Chrom_Frag,Start_Frag,End_Frag,Count_Frag,Chrom_MTF,Start_MTF,End_MTF,Motif,Score,Overlap,Sample,Length_MTF,Length_Dif
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
chr17,8150329,8151343,1,chr17,8151284,8151290,AHR,7.9331,6,Input1,6,0
chr17,8150402,8151376,1,chr17,8151284,8151290,AHR,7.9331,6,Input1,6,0
chr17,8150410,8151356,1,chr17,8151284,8151290,AHR,7.9331,6,Input1,6,0
chr17,8150421,8151368,1,chr17,8151284,8151290,AHR,7.9331,6,Input1,6,0
chr17,8150585,8151628,1,chr17,8151284,8151290,AHR,7.9331,6,Input1,6,0
chr17,8150681,8151651,1,chr17,8151284,8151290,AHR,7.9331,6,Input1,6,0
chr17,8150320,8151356,1,chr17,8151284,8151290,AHR,7.9331,6,Input2,6,0
chr17,8150394,8151381,1,chr17,8151284,8151290,AHR,7.9331,6,Input2,6,0
chr17,8150421,8151368,1,chr17,8151284,8151290,AHR,7.9331,6,Input2,6,0
chr17,8150504,8151322,1,chr17,8151284,8151290,AHR,7.9331,6,Input2,6,0
