**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
suppressMessages(suppressWarnings(library("GenomicRanges")))
suppressMessages(suppressWarnings(library("rtracklayer")))
suppressMessages(suppressWarnings(library("BSgenome.Hsapiens.UCSC.hg38")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /data/reddylab/Kuei 
WORK DIRECTORY:     /data/reddylab/Kuei/out 
CODE DIRECTORY:     /data/reddylab/Kuei/code 
PATH OF SOURCE:     /data/reddylab/Kuei/source 
PATH OF EXECUTABLE: /data/reddylab/Kuei/bin 
PATH OF ANNOTATION: /data/reddylab/Kuei/annotation 
PATH OF PROJECT:    /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS:    /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 


## Import data

In [2]:
ASSAY   = "KS91_K562_ASTARRseq"
REGIONS = c("GATA1", "MYC", "FADS")
FOLDER  = "coverage"

CHROMS  = paste0("chr", c(1:22, "X", "Y"))
COLUMNS = c("Input", "Output", "FC", "Log2FC", "pLog2FC")

STRAND = "*"
TYPE   = "raw"
GENOME = BSgenome.Hsapiens.UCSC.hg38

In [3]:
### import data
lst = lapply(REGIONS, function(REGION) {
    
    ### set file path
    fdiry = file.path(FD_RES, "results", ASSAY, FOLDER, "summary")
    fname = paste("track", "raw", "Log2FC", REGION, "tsv", sep=".") # "track.raw.cpm.GATA1.tsv"
    fpath = file.path(fdiry, fname)
    
    ### import data
    dat = read_tsv(fpath, show_col_types = FALSE)
    return(dat)
})

### combine data
dat_track = bind_rows(lst)
head(dat_track)

### show some stats
cat("#{Input   == 0}:  ", sum(dat_track$Input  == 0), "\n")
cat("#{Output  == 0}:  ", sum(dat_track$Output == 0), "\n")
cat("#{Both    == 0}:  ", sum((dat_track$Input == 0) & (dat_track$Output == 0)), "\n")
cat("#{Input   == NA}: ", sum(is.na(dat_track$Input)), "\n")
cat("#{Output  == NA}: ", sum(is.na(dat_track$Output)), "\n")
cat("#{FC      == NA}: ", sum(is.na(dat_track$FC)), "\n")
cat("#{FC      == Inf}:", sum(is.infinite(dat_track$FC)), "\n")
cat("#{pLog2FC == NA}: ", sum(is.na(dat_track$pLog2FC)), "\n")
cat("#{pLog2FC == Inf}:", sum(is.infinite(dat_track$pLog2FC)), "\n")

Chrom,Start,End,Input,Output,FC,Log2FC,pLog2FC
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chrX,47785501,47785502,0.008420745,0.009875188,1.172721,0.2298602,0.002079296
chrX,47785502,47785503,0.008420745,0.009875188,1.172721,0.2298602,0.002079296
chrX,47785503,47785504,0.008420745,0.009875188,1.172721,0.2298602,0.002079296
chrX,47785504,47785505,0.008420745,0.009875188,1.172721,0.2298602,0.002079296
chrX,47785505,47785506,0.008420745,0.009875188,1.172721,0.2298602,0.002079296
chrX,47785506,47785507,0.008420745,0.009875188,1.172721,0.2298602,0.002079296


#{Input   == 0}:   304212 
#{Output  == 0}:   388249 
#{Both    == 0}:   303318 
#{Input   == NA}:  0 
#{Output  == NA}:  0 
#{FC      == NA}:  303318 
#{FC      == Inf}: 894 
#{pLog2FC == NA}:  0 
#{pLog2FC == Inf}: 0 


## Export BigWig

In [4]:
for (COLUMN in COLUMNS) {
    ### show progress
    cat("\n+++++++++++++++++++++++\n")
    cat("Column:", COLUMN, "\n\n")
    flush.console()
    
    ### get the specified scores
    dat = dat_track %>% 
        dplyr::select(Chrom, Start, End, !!COLUMN) %>% 
        dplyr::rename(Score=!!COLUMN) %>%
        dplyr::filter(!is.na(Score), !is.infinite(Score)) %>%
        dplyr::mutate(Chrom = factor(Chrom, levels=CHROMS)) %>%
        dplyr::arrange(Chrom, Start, End)
    
    ### show progress
    print(dim(dat))
    print(head(dat))
    flush.console()
    
    ### contruct granages
    grg = GRanges(
        seqnames   = dat$Chrom,               
        ranges     = IRanges(
            start  = dat$Start+1,
            end    = dat$End), 
        strand     = STRAND,
        seqlengths = seqlengths(GENOME))
    genome(grg) = "hg38"

    ### assign scores
    mcols(grg)$score = dat$Score

    ### export track as bigwig
    fdiry = file.path(FD_RES, "results", ASSAY, "coverage", "summary")
    fname = paste(ASSAY, "track", TYPE, COLUMN, "bw", sep=".")
    fpath = file.path(fdiry, fname)
    export(grg, fpath, format = "BigWig")
    
    ### show progress
    cat("\n", fpath, "\n")
    flush.console()
}


+++++++++++++++++++++++
Column: Input 

[1] 4206820       4
[90m# A tibble: 6 × 4[39m
  Chrom     Start       End  Score
  [3m[90m<fct>[39m[23m     [3m[90m<dbl>[39m[23m     [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m
[90m1[39m chr8  126[4m7[24m[4m3[24m[4m5[24m901 126[4m7[24m[4m3[24m[4m5[24m902 0.037[4m5[24m
[90m2[39m chr8  126[4m7[24m[4m3[24m[4m5[24m902 126[4m7[24m[4m3[24m[4m5[24m903 0.037[4m5[24m
[90m3[39m chr8  126[4m7[24m[4m3[24m[4m5[24m903 126[4m7[24m[4m3[24m[4m5[24m904 0.038[4m6[24m
[90m4[39m chr8  126[4m7[24m[4m3[24m[4m5[24m904 126[4m7[24m[4m3[24m[4m5[24m905 0.038[4m2[24m
[90m5[39m chr8  126[4m7[24m[4m3[24m[4m5[24m905 126[4m7[24m[4m3[24m[4m5[24m906 0.038[4m2[24m
[90m6[39m chr8  126[4m7[24m[4m3[24m[4m5[24m906 126[4m7[24m[4m3[24m[4m5[24m907 0.038[4m2[24m

 /data/reddylab/Kuei/out/proj_combeffect_encode_fcc/results/KS91_K562_ASTARRseq/coverage/summary/KS91_K562_ASTARRseq