**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
suppressMessages(suppressWarnings(library("GenomicRanges")))
suppressMessages(suppressWarnings(library("rtracklayer")))
suppressMessages(suppressWarnings(library("BSgenome.Hsapiens.UCSC.hg38")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /data/reddylab/Kuei 
WORK DIRECTORY:     /data/reddylab/Kuei/out 
CODE DIRECTORY:     /data/reddylab/Kuei/code 
PATH OF SOURCE:     /data/reddylab/Kuei/source 
PATH OF EXECUTABLE: /data/reddylab/Kuei/bin 
PATH OF ANNOTATION: /data/reddylab/Kuei/annotation 
PATH OF PROJECT:    /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS:    /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 


## Import data and export as BigWig

In [2]:
ASSAY   = "Tewhey_K562_TileMPRA"
REGIONS = c("GATA1", "MYC", "FADS")
FOLDER  = "coverage"

CHROMS  = paste0("chr", c(1:22, "X", "Y"))
COLUMNS = c("Input", "Output", "FC", "Log2FC", "pLog2FC")

STRAND = "*"

GENOME = BSgenome.Hsapiens.UCSC.hg38

In [3]:
TYPES  = c("raw", "norm")

for (TYPE in TYPES){
    
    ### import data
    lst = lapply(REGIONS, function(REGION) {

        ### set file path
        fdiry = file.path(FD_RES, "results", ASSAY, FOLDER, "summary")
        fname = paste("track", TYPE, "Log2FC", REGION, "tsv", sep=".") # "track.raw.cpm.GATA1.tsv"
        fpath = file.path(fdiry, fname)

        ### import data
        dat = read_tsv(fpath, show_col_types = FALSE)
        return(dat)
    })

    ### combine data
    dat_track = bind_rows(lst)
    
    ### show progress
    print(dim(dat_track))
    print(head(dat_track))
    flush.console()

    ### show some stats
    cat("#{Input   == 0}:  ", sum(dat_track$Input  == 0), "\n")
    cat("#{Output  == 0}:  ", sum(dat_track$Output == 0), "\n")
    cat("#{Both    == 0}:  ", sum((dat_track$Input == 0) & (dat_track$Output == 0)), "\n")
    cat("#{Input   == NA}: ", sum(is.na(dat_track$Input)), "\n")
    cat("#{Output  == NA}: ", sum(is.na(dat_track$Output)), "\n")
    cat("#{FC      == NA}: ", sum(is.na(dat_track$FC)), "\n")
    cat("#{FC      == Inf}:", sum(is.infinite(dat_track$FC)), "\n")
    cat("#{pLog2FC == NA}: ", sum(is.na(dat_track$pLog2FC)), "\n")
    cat("#{pLog2FC == Inf}:", sum(is.infinite(dat_track$pLog2FC)), "\n")
    
    
    for (COLUMN in COLUMNS) {
        
        ### show progress
        cat("\n+++++++++++++++++++++++\n")
        cat("Type:", TYPE, "|", "Column:", COLUMN, "\n\n")
        flush.console()

        ### get the specified scores
        dat = dat_track %>% 
            dplyr::select(Chrom, Start, End, !!COLUMN) %>% 
            dplyr::rename(Score=!!COLUMN) %>%
            dplyr::filter(!is.na(Score), !is.infinite(Score)) %>%
            dplyr::mutate(Chrom = factor(Chrom, levels=CHROMS)) %>%
            dplyr::arrange(Chrom, Start, End)

        ### show progress
        print(dim(dat))
        print(head(dat))
        flush.console()

        ### contruct granages
        grg = GRanges(
            seqnames   = dat$Chrom,               
            ranges     = IRanges(
                start  = dat$Start+1,
                end    = dat$End), 
            strand     = STRAND,
            seqlengths = seqlengths(GENOME))
        genome(grg) = "hg38"

        ### assign scores
        mcols(grg)$score = dat$Score

        ### export track as bigwig
        fdiry = file.path(FD_RES, "results", ASSAY, "coverage", "summary")
        fname = paste(ASSAY, "track", TYPE, COLUMN, "bw", sep=".")
        fpath = file.path(fdiry, fname)
        export(grg, fpath, format = "BigWig")

        ### show progress
        cat("\n", fpath, "\n")
        flush.console()
    }
}

[1] 4206820       8
[90m# A tibble: 6 × 8[39m
  Chrom    Start      End Input Output    FC Log2FC pLog2FC
  [3m[90m<chr>[39m[23m    [3m[90m<dbl>[39m[23m    [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m   [3m[90m<dbl>[39m[23m
[90m1[39m chrX  47[4m7[24m[4m8[24m[4m5[24m501 47[4m7[24m[4m8[24m[4m5[24m502  40.4   22.2 0.550 -[31m0[39m[31m.[39m[31m862[39m  -[31m0[39m[31m.[39m[31m834[39m
[90m2[39m chrX  47[4m7[24m[4m8[24m[4m5[24m502 47[4m7[24m[4m8[24m[4m5[24m503  40.4   22.2 0.550 -[31m0[39m[31m.[39m[31m862[39m  -[31m0[39m[31m.[39m[31m834[39m
[90m3[39m chrX  47[4m7[24m[4m8[24m[4m5[24m503 47[4m7[24m[4m8[24m[4m5[24m504  40.4   22.2 0.550 -[31m0[39m[31m.[39m[31m862[39m  -[31m0[39m[31m.[39m[31m834[39m
[90m4[39m chrX  47[4m7[24m[4m8[24m[4m5[24m504 47[4m7[24m[4m8[24m[4m5[24m505  40.4   22.2 0.550 -[31m0[39m[31m.


+++++++++++++++++++++++
Column: Input 

[1] 4206820       4
[90m# A tibble: 6 × 4[39m
  Chrom     Start       End Score
  [3m[90m<fct>[39m[23m     [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m [3m[90m<dbl>[39m[23m
[90m1[39m chr8  126[4m7[24m[4m3[24m[4m5[24m901 126[4m7[24m[4m3[24m[4m5[24m902  39.9
[90m2[39m chr8  126[4m7[24m[4m3[24m[4m5[24m902 126[4m7[24m[4m3[24m[4m5[24m903  39.9
[90m3[39m chr8  126[4m7[24m[4m3[24m[4m5[24m903 126[4m7[24m[4m3[24m[4m5[24m904  39.9
[90m4[39m chr8  126[4m7[24m[4m3[24m[4m5[24m904 126[4m7[24m[4m3[24m[4m5[24m905  39.9
[90m5[39m chr8  126[4m7[24m[4m3[24m[4m5[24m905 126[4m7[24m[4m3[24m[4m5[24m906  39.9
[90m6[39m chr8  126[4m7[24m[4m3[24m[4m5[24m906 126[4m7[24m[4m3[24m[4m5[24m907  39.9

 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/Tewhey_K562_TileMPRA/coverage/summary/Tewhey_K562_TileMPRA.track.raw.Input.bw 

+++++++++++++++++++++++
Column: Output