In [1]:
suppressMessages(suppressWarnings(source("../config_sing.R")))
suppressMessages(suppressWarnings(library("GenomicRanges")))
suppressMessages(suppressWarnings(library("rtracklayer")))
suppressMessages(suppressWarnings(library("BSgenome.Hsapiens.UCSC.hg38")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /mount/work 
PATH OF SOURCE:     /mount/work/source 
PATH OF EXECUTABLE: /mount/work/exe 
PATH OF ANNOTATION: /mount/work/annotation 
PATH OF PROJECT:    /mount/project 
PATH OF RESULTS:    /mount/work/out/proj_combeffect_encode_fcc 


In [3]:
dir(FD_RES)

In [11]:
###
fdiry = file.path(FD_RES, "KS91_K562_ASTARRseq", "coverage")
fname = "KS91_K562_hg38_ASTARRseq_Depth.GATA1.unstranded.perbase.tsv"
fpath = file.path(fdiry, fname)

dat_astarr_gata1 = read_tsv(fpath, show_col_types = FALSE)
head(dat_astarr_gata1)

Chrom,Loc,Sample,Group,Replicate,Region,Depth,Size,Depth_Norm
<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
chrX,47786400,Input_rep1,Input,rep1,GATA1,0,358823,0
chrX,47786401,Input_rep1,Input,rep1,GATA1,0,358823,0
chrX,47786402,Input_rep1,Input,rep1,GATA1,0,358823,0
chrX,47786403,Input_rep1,Input,rep1,GATA1,0,358823,0
chrX,47786404,Input_rep1,Input,rep1,GATA1,0,358823,0
chrX,47786405,Input_rep1,Input,rep1,GATA1,0,358823,0


In [26]:
start  = 48780000
end    = 48826000

dat = dat_astarr_gata1
dat = dat %>% dplyr::filter(start <= Loc, Loc <= end)
dat = dat %>% dplyr::filter(Replicate %in% c("rep1", "rep2", "rep3", "rep4"))

dat = dat %>% 
    dplyr::select(Chrom, Loc, Depth_Norm, Sample) %>% 
    spread(Sample, Depth_Norm) %>%
    mutate(
        Rep1 = log2((Output_rep1 + 10) / (Input_rep1 + 10)),
        Rep2 = log2((Output_rep2 + 10) / (Input_rep2 + 10)),
        Rep3 = log2((Output_rep3 + 10) / (Input_rep3 + 10)),
        Rep4 = log2((Output_rep4 + 10) / (Input_rep4 + 10)))

dat = dat %>% 
    dplyr::select(Chrom, Loc, Rep1, Rep2, Rep3, Rep4) %>%
    gather(Sample, Value, -Chrom, -Loc)
head(dat)

Chrom,Loc,Sample,Value
<chr>,<dbl>,<chr>,<dbl>
chrX,48780000,Rep1,2.898608e-06
chrX,48780001,Rep1,-1.420414e-05
chrX,48780002,Rep1,-1.420414e-05
chrX,48780003,Rep1,-1.380209e-05
chrX,48780004,Rep1,-1.380209e-05
chrX,48780005,Rep1,-1.380209e-05


In [27]:
tmp = dat
tmp = tmp %>% 
    dplyr::filter(Sample == "Rep1") %>% 
    dplyr::rename(Start = Loc) %>% 
    dplyr::mutate(
        End    = Start + 1, 
        Strand = "*",
        Score  = Value) %>%
    dplyr::select(-Sample, -Value)


head(tmp)

Chrom,Start,End,Strand,Score
<chr>,<dbl>,<dbl>,<chr>,<dbl>
chrX,48780000,48780001,*,2.898608e-06
chrX,48780001,48780002,*,-1.420414e-05
chrX,48780002,48780003,*,-1.420414e-05
chrX,48780003,48780004,*,-1.380209e-05
chrX,48780004,48780005,*,-1.380209e-05
chrX,48780005,48780006,*,-1.380209e-05


In [28]:
genome_hg38 = BSgenome.Hsapiens.UCSC.hg38

In [29]:
gr = GRanges(
    seqnames   = tmp$Chrom,               
    ranges     = IRanges(
        start  = tmp$Start+1,
        end    = tmp$End), 
    strand     = tmp$Strand,
    seqlengths = seqlengths(genome_hg38))

mcols(gr)$score = tmp$Score
genome(gr) = "hg38"

print(gr)

GRanges object with 46001 ranges and 1 metadata column:
          seqnames    ranges strand |        score
             <Rle> <IRanges>  <Rle> |    <numeric>
      [1]     chrX  48780001      * |  2.89861e-06
      [2]     chrX  48780002      * | -1.42041e-05
      [3]     chrX  48780003      * | -1.42041e-05
      [4]     chrX  48780004      * | -1.38021e-05
      [5]     chrX  48780005      * | -1.38021e-05
      ...      ...       ...    ... .          ...
  [45997]     chrX  48825997      * | -6.08451e-06
  [45998]     chrX  48825998      * | -5.68245e-06
  [45999]     chrX  48825999      * | -5.68245e-06
  [46000]     chrX  48826000      * | -5.68245e-06
  [46001]     chrX  48826001      * | -5.68245e-06
  -------
  seqinfo: 640 sequences from hg38 genome


In [30]:
fdiry = file.path(FD_RES, "KS91_K562_ASTARRseq", "coverage")
fname = "test.bw"
fpath = file.path(fdiry, fname)

export(gr, fpath, format = "BigWig")