**Set environment**

In [1]:
suppressMessages(suppressWarnings(source("../config/config_sing.R")))
suppressMessages(suppressWarnings(library("DESeq2")))
show_env()

You are in Singularity: singularity_proj_combeffect 
BASE DIRECTORY:     /data/reddylab/Kuei 
WORK DIRECTORY:     /data/reddylab/Kuei/out 
CODE DIRECTORY:     /data/reddylab/Kuei/code 
PATH OF SOURCE:     /data/reddylab/Kuei/source 
PATH OF EXECUTABLE: /data/reddylab/Kuei/bin 
PATH OF ANNOTATION: /data/reddylab/Kuei/annotation 
PATH OF PROJECT:    /data/reddylab/Kuei/code/Proj_CombEffect_ENCODE_FCC 
PATH OF RESULTS:    /data/reddylab/Kuei/out/proj_combeffect_encode_fcc 


In [52]:
PREFIX   = "Tewhey_K562_TileMPRA"
FOLDER   = "coverage_astarrseq_peak_macs_input"
DATASETS = c("OL13", "OL43", "OL45", "OL13_OL43_OL45")
TYPE     = "raw"

In [61]:
for (DATASET in DATASETS){
    ###
    cat("\n======================================\n")
    cat("Dataset:", DATASET, "\n")
    
    ###
    fdiry = file.path(FD_RES, "results", PREFIX, FOLDER, "summary")
    fname = paste("metadata", TYPE, DATASET, "tsv", sep=".")
    fpath = file.path(fdiry, fname)

    dat_meta = read_tsv(fpath, show_col_types = FALSE)
    print(dim(dat_meta))
    
    ###
    fdiry = file.path(FD_RES, "results", PREFIX, FOLDER, "summary")
    fname = paste("matrix", TYPE, "count", DATASET, "tsv", sep=".")
    fpath = file.path(fdiry, fname)

    dat_count = read_tsv(fpath, show_col_types = FALSE)
    print(dim(dat_count))
    
    ###
    dat_col = dat_meta  %>% 
        dplyr::select(Sample, Group) %>% 
        dplyr::rename(condition = Group) %>%
        column_to_rownames(var = "Sample")

    dat_cnt = dat_count %>% 
        dplyr::select(Peak, starts_with("Input"), starts_with("Output")) %>%
        column_to_rownames(var = "Peak")

    dat_cnt[is.na(dat_cnt)] = 0

    ###
    cat("\n++++++++++++++++++++++++++++++++++++++\n")
    cat("Check if row and column names matched:", "\n")
    print(all(rownames(dat_col) %in% colnames(dat_cnt)))
    print(all(rownames(dat_col) ==   colnames(dat_cnt)))
    
    ### create a DESeqDataSet object
    dds = DESeqDataSetFromMatrix(
        countData = dat_cnt, 
        colData   = dat_col, 
        design    = ~condition)

    ### remove the peaks which have < 10 reads
    dds = dds[rowSums(counts(dds)) >= 10,]

    ### set control condition as reference
    dds$condition <- relevel(dds$condition, ref = "Input")
    
    ### apply DESeq2 data processing
    dds = DESeq(dds)
    
    ### get normalized counts
    cat("\n++++++++++++++++++++++++++++++++++++++\n")
    cat("Get normalized counts:", "\n")
    mat = counts(dds, normalized=TRUE)
    mat = as.data.frame(mat) %>% rownames_to_column(var = "Peak")
    print(head(mat))
    
    ### get results: log2FC
    cat("Get results (Log2FC):", "\n")
    res = results(dds)
    res = as.data.frame(res) %>% rownames_to_column(var = "Peak")
    print(head(res))
    
    ### save the results
    cat("\n++++++++++++++++++++++++++++++++++++++\n")
    cat("Save the results:", "\n")
    
    fdiry = file.path(FD_RES, "results", PREFIX, FOLDER, "summary")
    fname = paste("matrix", "deseq", "count", DATASET, "tsv", sep=".")
    fpath = file.path(fdiry, fname)
    write_tsv(mat, fpath)
    cat(fname, "\n")
    
    fdiry = file.path(FD_RES, "results", PREFIX, FOLDER, "summary")
    fname = paste("result", "Log2FC", TYPE, "deseq", DATASET, "tsv", sep=".")
    fpath = file.path(fdiry, fname)
    write_tsv(res, fpath)
    cat(fname, "\n")
}


Dataset: OL13 
[1] 8 4
[1] 22 13

++++++++++++++++++++++++++++++++++++++
Check if row and column names matched: 
[1] TRUE
[1] TRUE


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing




++++++++++++++++++++++++++++++++++++++
Get normalized counts: 
                     Peak Input.rep1 Input.rep2 Input.rep3 Input.rep4
1 chr11_61792068_61793464  124670.74  123167.59  127268.62  123604.24
2 chr11_61800085_61801113   93961.66   94620.71   93628.85   92690.22
3 chr11_61806630_61807154   73669.62   69879.13   70880.48   69350.47
4 chr11_61814735_61817343  202141.92  203733.30  202944.36  196887.87
5 chr11_61822094_61822443   32095.95   31325.07   31749.03   30863.91
6 chr11_61825795_61826306   32867.32   33514.81   33610.61   34128.29
  Output.rep1 Output.rep2 Output.rep3 Output.rep4
1  379932.388  400497.009   419950.20   382846.15
2  349840.912  346627.120   355854.18   304277.02
3   24533.110   25122.717    29545.41    31637.71
4 1147134.438 1222026.706  1180879.95  1106807.59
5    6818.186    8160.235    10473.71    11418.74
6   24191.171   25906.336    25389.86    26706.79
Get results (Log2FC): 
                     Peak  baseMean log2FoldChange      lfcSE       stat


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing




++++++++++++++++++++++++++++++++++++++
Get normalized counts: 
                      Peak Input.rep1 Input.rep2 Input.rep3 Input.rep4
1 chr8_126778902_126779728  10154.124   9822.553   9786.855  10027.350
2 chr8_126782925_126783318   3011.385   3021.321   2880.060   2915.941
3 chr8_126804776_126805825  15844.035  16328.343  16100.478  16122.583
4 chr8_126817018_126817490   9802.665   9905.971   9735.194   9898.398
5 chr8_126820465_126821393  16224.864  16525.593  15895.682  16125.308
6 chr8_126824093_126824987  27068.199  27853.159  27310.758  26933.632
  Input.rep5 Input.rep6 Output.rep1 Output.rep2 Output.rep3 Output.rep4
1   9935.618  10281.401   31277.428   24664.370   34513.088   36773.294
2   2920.152   3030.901    2073.673    2037.224    2140.843    2322.149
3  15934.728  16023.962   52916.009   45575.081   47325.145   51267.303
4   9634.821   9532.126    6477.050    6516.826    6755.113    6492.146
5  16259.812  16333.813   14816.056   14773.000   15189.643   14449.230
6  2761

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing




++++++++++++++++++++++++++++++++++++++
Get normalized counts: 
                   Peak Input.rep1 Input.rep2 Input.rep3 Input.rep4 Output.rep1
1 chr11_4539569_4540043   3102.859   2940.539   3048.390   3146.506    1725.192
2 chr11_4551336_4552012   4537.882   4458.541   4674.430   4427.583    5413.024
3 chr11_4553969_4555012   5899.648   5799.756   5803.977   6100.200   23317.516
4 chr11_4569016_4569992   5971.900   6162.757   6152.332   6120.309    2718.384
5 chr11_4577444_4578031   4752.633   4858.077   5092.223   4974.082    3438.101
6 chr11_4601808_4602487   4367.285   4447.933   4459.167   4587.275    2326.184
  Output.rep2 Output.rep3 Output.rep4
1    1645.827    1599.379    1616.614
2    4530.116    4584.608    5634.670
3   28368.117   32091.208   21789.627
4    2769.688    2593.758    2925.407
5    3445.739    3498.119    3245.153
6    2224.610    2085.054    2282.190
Get results (Log2FC): 
                   Peak  baseMean log2FoldChange      lfcSE       stat
1 chr11_4539569_

converting counts to integer mode

“some variables in design formula are characters, converting to factors”
estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

-- note: fitType='parametric', but the dispersion trend was not well captured by the
   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.

final dispersion estimates

fitting model and testing




++++++++++++++++++++++++++++++++++++++
Get normalized counts: 
                     Peak Input.rep1 Input.rep2 Input.rep3 Input.rep4
1 chr11_61792068_61793464  128400.17   78342.13  128995.05   81828.58
2 chr11_61800085_61801113   96772.44   60184.56   94898.94   61362.85
3 chr11_61806630_61807154   75873.39   44447.41   71841.99   45911.45
4 chr11_61814735_61817343  208188.83  129586.85  205697.35  130343.87
5 chr11_61822094_61822443   33056.07   19924.66   32179.71   20432.55
6 chr11_61825795_61826306   33850.52   21317.47   34066.55   22593.64
  Output.rep1 Output.rep2 Output.rep3 Output.rep4
1   387728.86  465098.228   635961.62   480064.23
2   357019.88  402538.985   538896.27   381543.64
3    25036.55   29175.077    44742.79    39671.64
4  1170674.41 1419142.823  1788293.74  1387864.90
5     6958.10    9476.503    15861.12    14318.36
6    24687.59   30085.096    38449.74    33488.58
Get results (Log2FC): 
                     Peak  baseMean log2FoldChange     lfcSE      stat
1 

In [56]:
head(mat)

Unnamed: 0,Input.rep1,Input.rep2,Input.rep3,Input.rep4,Output.rep1,Output.rep2,Output.rep3,Output.rep4
chr11_61792068_61793464,128400.17,78342.13,128995.05,81828.58,387728.86,465098.228,635961.62,480064.23
chr11_61800085_61801113,96772.44,60184.56,94898.94,61362.85,357019.88,402538.985,538896.27,381543.64
chr11_61806630_61807154,75873.39,44447.41,71841.99,45911.45,25036.55,29175.077,44742.79,39671.64
chr11_61814735_61817343,208188.83,129586.85,205697.35,130343.87,1170674.41,1419142.823,1788293.74,1387864.9
chr11_61822094_61822443,33056.07,19924.66,32179.71,20432.55,6958.1,9476.503,15861.12,14318.36
chr11_61825795_61826306,33850.52,21317.47,34066.55,22593.64,24687.59,30085.096,38449.74,33488.58


In [59]:
tmp = as.data.frame(mat)
tmp = tmp %>% rownames_to_column(var = "Peak")
head(tmp)

Unnamed: 0_level_0,Peak,Input.rep1,Input.rep2,Input.rep3,Input.rep4,Output.rep1,Output.rep2,Output.rep3,Output.rep4
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,chr11_61792068_61793464,128400.17,78342.13,128995.05,81828.58,387728.86,465098.228,635961.62,480064.23
2,chr11_61800085_61801113,96772.44,60184.56,94898.94,61362.85,357019.88,402538.985,538896.27,381543.64
3,chr11_61806630_61807154,75873.39,44447.41,71841.99,45911.45,25036.55,29175.077,44742.79,39671.64
4,chr11_61814735_61817343,208188.83,129586.85,205697.35,130343.87,1170674.41,1419142.823,1788293.74,1387864.9
5,chr11_61822094_61822443,33056.07,19924.66,32179.71,20432.55,6958.1,9476.503,15861.12,14318.36
6,chr11_61825795_61826306,33850.52,21317.47,34066.55,22593.64,24687.59,30085.096,38449.74,33488.58
