**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /data/reddylab/Kuei 
WORK DIRECTORY:     /data/reddylab/Kuei/out 
CODE DIRECTORY:     /data/reddylab/Kuei/code 
PATH OF SOURCE:     /data/reddylab/Kuei/source 
PATH OF EXECUTABLE: /data/reddylab/Kuei/bin 
PATH OF ANNOTATION: /data/reddylab/Kuei/annotation 
PATH OF PROJECT:    /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS:    /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 


## Import data

In [2]:
ASSAYS = c("KS91_K562_ASTARRseq", "A001_K562_WSTARRseq", "Tewhey_K562_TileMPRA")
TYPES  = c("raw", "raw", "norm")
LABELS = c("ASTARR", "WSTARR", "TMPRA")
REGIONS = c("GATA1", "MYC", "FADS")
FOLDER  = "coverage"

names(TYPES)  = ASSAYS
names(LABELS) = ASSAYS

In [3]:
lst_track = lapply(REGIONS, function(REGION){
    
    ### import data
    lst = lapply(ASSAYS, function(ASSAY){
        ### init
        TYPE  = TYPES[ASSAY]
        LABEL = LABELS[ASSAY]
        cat("Assay:", ASSAY, "|", "Type:", TYPE, "|", "Region:", REGION, "\n")
        flush.console()

        ### set file path
        fdiry = file.path(FD_RES, "results", ASSAY, FOLDER, "summary")
        fname = paste("track", TYPE, "Log2FC", REGION, "tsv", sep=".")
        fpath = file.path(fdiry, fname)

        ### import data
        dat = read_tsv(fpath, show_col_types = FALSE)
        dat$Assay = LABEL
        return(dat)
    })
    
    ### combine data
    dat = bind_rows(lst)
    return(dat)
})

names(lst_track) = REGIONS

Assay: KS91_K562_ASTARRseq | Type: raw | Region: GATA1 
Assay: A001_K562_WSTARRseq | Type: raw | Region: GATA1 
Assay: Tewhey_K562_TileMPRA | Type: norm | Region: GATA1 
Assay: KS91_K562_ASTARRseq | Type: raw | Region: MYC 
Assay: A001_K562_WSTARRseq | Type: raw | Region: MYC 
Assay: Tewhey_K562_TileMPRA | Type: norm | Region: MYC 
Assay: KS91_K562_ASTARRseq | Type: raw | Region: FADS 
Assay: A001_K562_WSTARRseq | Type: raw | Region: FADS 
Assay: Tewhey_K562_TileMPRA | Type: norm | Region: FADS 


## Separate to FC and pLogFC

In [4]:
COLUMN = "FC"
lst_track_Xfc = lapply(lst_track, function(dat_track){
    ### select column and arrange
    dat_track = dat_track %>% 
        dplyr::select(Chrom, Start, End, Assay, !!COLUMN) %>% 
        dplyr::rename(Score = !!COLUMN) %>%
        dplyr::filter(!is.na(Score), !is.infinite(Score)) %>% 
        tidyr::spread(Assay, Score)
    
    return(dat_track)
})

COLUMN = "pLog2FC"
lst_track_Lfc = lapply(lst_track, function(dat_track){
    ### select column and arrange
    dat_track = dat_track %>% 
        dplyr::select(Chrom, Start, End, Assay, !!COLUMN) %>% 
        dplyr::rename(Score = !!COLUMN) %>%
        dplyr::filter(!is.na(Score), !is.infinite(Score)) %>% 
        tidyr::spread(Assay, Score)
    
    return(dat_track)
})

In [7]:
lst = lst_track_Xfc
print(names(lst))

for (dat in lst){
    ### show info
    cat("\n++++++++++++++++++++++++\n")
    cat(dim(dat), "\n\n")
    print(head(dat, 3))
    
    cat("\n", unique(dat$Chrom), "\n")
    print(range(dat$Start))
    print(range(dat$End))
}

[1] "GATA1" "MYC"   "FADS" 

++++++++++++++++++++++++
1915106 6 

[90m# A tibble: 3 × 6[39m
  Chrom    Start      End ASTARR TMPRA WSTARR
  [3m[90m<chr>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m
[90m1[39m chrX  47[4m7[24m[4m8[24m[4m5[24m501 47[4m7[24m[4m8[24m[4m5[24m502   1.17  1.82  0.888
[90m2[39m chrX  47[4m7[24m[4m8[24m[4m5[24m502 47[4m7[24m[4m8[24m[4m5[24m503   1.17  1.82  0.888
[90m3[39m chrX  47[4m7[24m[4m8[24m[4m5[24m503 47[4m7[24m[4m8[24m[4m5[24m504   1.17  1.82  0.888

 chrX 
[1] 47785501 49880650
[1] 47785502 49880651

++++++++++++++++++++++++
2000650 6 

[90m# A tibble: 3 × 6[39m
  Chrom     Start       End ASTARR TMPRA WSTARR
  [3m[90m<chr>[39m[23m     [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m
[90m1[39m chr8  126[4m7[24m[4m3[24m

In [8]:
lst = lst_track_Lfc
print(names(lst))

for (dat in lst){
    ### show info
    cat("\n++++++++++++++++++++++++\n")
    cat(dim(dat), "\n\n")
    print(head(dat, 3))
    
    cat("\n", unique(dat$Chrom), "\n")
    print(range(dat$Start))
    print(range(dat$End))
}

[1] "GATA1" "MYC"   "FADS" 

++++++++++++++++++++++++
2095150 6 

[90m# A tibble: 3 × 6[39m
  Chrom    Start      End  ASTARR TMPRA   WSTARR
  [3m[90m<chr>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m
[90m1[39m chrX  47[4m7[24m[4m8[24m[4m5[24m501 47[4m7[24m[4m8[24m[4m5[24m502 0.002[4m0[24m[4m8[24m 0.864 -[31m0[39m[31m.[39m[31m00[39m[31m5[4m1[24m[4m4[24m[39m
[90m2[39m chrX  47[4m7[24m[4m8[24m[4m5[24m502 47[4m7[24m[4m8[24m[4m5[24m503 0.002[4m0[24m[4m8[24m 0.864 -[31m0[39m[31m.[39m[31m00[39m[31m5[4m1[24m[4m4[24m[39m
[90m3[39m chrX  47[4m7[24m[4m8[24m[4m5[24m503 47[4m7[24m[4m8[24m[4m5[24m504 0.002[4m0[24m[4m8[24m 0.864 -[31m0[39m[31m.[39m[31m00[39m[31m5[4m1[24m[4m4[24m[39m

 chrX 
[1] 47785501 49880650
[1] 47785502 49880651

++++++++++++++++++++++++
2000650 6 

[90m# A tibble: 3 × 6[39m
  Chrom  

## Set windows

In [20]:
SIZE    = 200
STEP    =  50

lst_window = lapply(REGIONS, function(REGION){
    ### extract
    dat_track = lst_track[[REGION]]
    
    ### set window ranges
    chrom   = unique(dat_track$Chrom)
    x_start = min(dat_track$Start)
    x_end   = max(dat_track$End) - SIZE

    ### define windwos
    dat_window = data.frame(
        Start = seq(x_start, x_end, STEP),
        End   = seq(x_start, x_end, STEP) + SIZE) %>% 
        dplyr::mutate(
            Chrom = chrom,
            Start = as.integer(Start),
            End   = as.integer(End),
            Loc   = as.integer((Start + End) / 2)) %>%
        dplyr::select(Chrom, Start, End, Loc)

    ### show info
    cat("\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n")
    cat("Sliding windows of", REGION, "\n")
    cat(dim(dat_window), "\n\n")
    print(head(dat_window))
    print(tail(dat_window))
    
    ### return results
    return(dat_window)
})

names(lst_window) = REGIONS


^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Sliding windows of GATA1 
41900 4 

  Chrom    Start      End      Loc
1  chrX 47785501 47785701 47785601
2  chrX 47785551 47785751 47785651
3  chrX 47785601 47785801 47785701
4  chrX 47785651 47785851 47785751
5  chrX 47785701 47785901 47785801
6  chrX 47785751 47785951 47785851
      Chrom    Start      End      Loc
41895  chrX 49880201 49880401 49880301
41896  chrX 49880251 49880451 49880351
41897  chrX 49880301 49880501 49880401
41898  chrX 49880351 49880551 49880451
41899  chrX 49880401 49880601 49880501
41900  chrX 49880451 49880651 49880551

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Sliding windows of MYC 
40010 4 

  Chrom     Start       End       Loc
1  chr8 126735901 126736101 126736001
2  chr8 126735951 126736151 126736051
3  chr8 126736001 126736201 126736101
4  chr8 126736051 126736251 126736151
5  chr8 126736101 126736301 126736201
6  chr8 126736151 126736351 126736251
      Chrom     Start       End       Loc
40005  chr8 128736101 128736301 128736201


## Save the sliding windows

In [21]:
fdiry = file.path(FD_RES, "results", "comparison", "comparison_local")

lst = lst_window
for (REGION in REGIONS) {
    ### set file path
    txt_size = paste0("size_", SIZE, "bp")
    txt_step = paste0("step_", STEP, "bp")
    fname = paste("window", txt_size, txt_step, REGION, "tsv", sep=".")
    fpath = file.path(fdiry, fname)
    
    ### save data
    dat = lst[[REGION]]
    write_tsv(dat, fpath)
    
    ### show progress
    cat("+++++++++++++++++++\n")
    cat("Region:", REGION, "\n")
    cat("Save file:\n")
    cat(fpath, "\n\n")
}

+++++++++++++++++++
Region: GATA1 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/window.size_200bp.step_50bp.GATA1.tsv 

+++++++++++++++++++
Region: MYC 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/window.size_200bp.step_50bp.MYC.tsv 

+++++++++++++++++++
Region: FADS 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/window.size_200bp.step_50bp.FADS.tsv 



## Save the track (FC)

In [26]:
fdiry = file.path(FD_RES, "results", "comparison", "comparison_local")

lst = lst_track_Xfc
for (REGION in REGIONS) {
    ### set file path
    fname = paste("track", "FC", REGION, "tsv", sep=".")
    fpath = file.path(fdiry, fname)
    
    ### save data
    dat = lst[[REGION]]
    write_tsv(dat, fpath)
    
    ### show progress
    cat("+++++++++++++++++++\n")
    cat("Region:", REGION, "\n")
    cat("Save file:\n")
    cat(fpath, "\n\n")
}

+++++++++++++++++++
Region: GATA1 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/track.FC.GATA1.tsv 

+++++++++++++++++++
Region: MYC 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/track.FC.MYC.tsv 

+++++++++++++++++++
Region: FADS 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/track.FC.FADS.tsv 



## Save the track (pLog2FC)

In [27]:
fdiry = file.path(FD_RES, "results", "comparison", "comparison_local")

lst = lst_track_Lfc
for (REGION in REGIONS) {
    ### set file path
    fname = paste("track", "pLog2FC", REGION, "tsv", sep=".")
    fpath = file.path(fdiry, fname)
    
    ### save data
    dat = lst[[REGION]]
    write_tsv(dat, fpath)
    
    ### show progress
    cat("+++++++++++++++++++\n")
    cat("Region:", REGION, "\n")
    cat("Save file:\n")
    cat(fpath, "\n\n")
}

+++++++++++++++++++
Region: GATA1 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/track.pLog2FC.GATA1.tsv 

+++++++++++++++++++
Region: MYC 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/track.pLog2FC.MYC.tsv 

+++++++++++++++++++
Region: FADS 
Save file:
/data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/comparison/comparison_local/track.pLog2FC.FADS.tsv 

