In [4]:
library(tidyverse)
library(readr)
library(GenomicFeatures)
library(DESeq2)
library(org.Mm.eg.db)
library(rjson)
library(tximport)
library(DBI)
library(rje)
library(plyr)

code.dir <- getwd()
base.dir <- gsub("/codes_local", "", code.dir)

In [5]:
######################################## Convert ENSEMBL ID to gene symbols ########################################
# Download convert table from:http://useast.ensembl.org/biomart/martview/8c1957c27101a044a318d51140a289e1

cv_file <- '/home/pipkin/references/mm_BioMart_GeneStableID_GeneName.txt'
cv_tb <- read_csv(cv_file)

matchGN <- function(input, outfilename, cvTb=cv_tb){
    colnames(input) <- c("ensembl_stable_ID", colnames(input)[2:length(colnames(input))])
    output <- cvTb %>% right_join(input, by="ensembl_stable_ID")
    output$ensembl_stable_ID <- NULL
    write_csv(output, outfilename)
}


###--- Make reference
#txdb <- makeTxDbFromGFF('/home/pipkin/references/GRCm38.99/Mus_musculus.GRCm38.99.gtf')
#saveDb(txdb, file='/home/pipkin/references/GRCm38.99/Mus_musculus.GRCm38.99')
mmRef <- '/home/pipkin/references/GRCm38.100/Mus_musculus.GRCm38.100'

###--- Convert transcript ID to gene ID
txdb <- loadDb(mmRef)
k <- keys(txdb, "GENEID")
res <- AnnotationDbi::select(txdb, k, "TXNAME", "GENEID")
tx2gene <- res[,2:1]


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────[39m
cols(
  ensembl_stable_ID = [31mcol_character()[39m,
  gene_name = [31mcol_character()[39m
)


'select()' returned 1:many mapping between keys and columns



In [14]:
salmon_out_dir <- file.path(base.dir, "0_salmon")
deseq_out_dir <- file.path(base.dir, "1_DEseq2_merge_shChd7")
deseq_compiled_dir <- file.path(base.dir, "1_DEseq2_compiled")

In [7]:
##########---------- Read Quant Files
meta.file <- file.path(base.dir, "meta_merge_shChd7.csv")
meta.df <- read_csv(meta.file)

files <- file.path(file.path(base.dir, "0_salmon") ,meta.df$Samples, "quant.sf")
names(files) <- meta.df$Names

# Drop in freps TURE = ignore verison  # Ignore TX verison stringsplits on . 
txi <- tximport(files, type="salmon", tx2gene=tx2gene, ignoreTxVersion = TRUE, dropInfReps = TRUE) #dropInfReps = TRUE, 

# Construct sampleTable
sampleTable <- data.frame(condition = factor(meta.df$Cond))
rownames(sampleTable) <- colnames(txi$counts)

#import into DESEQ2 framework
dds <- DESeqDataSetFromTximport(txi, sampleTable, ~ condition)
summary(dds)

dds <- DESeq(dds) #RunDESEQ


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────[39m
cols(
  Samples = [31mcol_character()[39m,
  Cond = [31mcol_character()[39m,
  Names = [31mcol_character()[39m
)


reading in files with read_tsv

1 
2 
3 
4 
5 
6 
7 
8 
9 


transcripts missing from tx2gene: 176

summarizing abundance

summarizing counts

summarizing length

using counts and average transcript lengths from tximport



estimating size factors

using 'avgTxLength' from assays(dds), correcting for library size

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



In [16]:
norm.df <- data.frame(counts(dds, normalized=T))
norm.name <- file.path(deseq_compiled_dir, "DESeq2_normalized_counts_bysample.csv")
norm.name.gn <- file.path(deseq_compiled_dir, "DESeq2_normalized_counts_bysample_gn.csv")
write.csv(norm.df, norm.name)

matchGN(read_csv(norm.name), norm.name.gn)

“Missing column names filled in: 'X1' [1]”

[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────[39m
cols(
  X1 = [31mcol_character()[39m,
  Cd4_1_1 = [32mcol_double()[39m,
  Chd7_1_1 = [32mcol_double()[39m,
  Chd7_2_1 = [32mcol_double()[39m,
  Chd7_3_1 = [32mcol_double()[39m,
  Cd4_1_2 = [32mcol_double()[39m,
  Chd7_1_2 = [32mcol_double()[39m,
  Chd7_2_2 = [32mcol_double()[39m,
  Chd7_3_2 = [32mcol_double()[39m,
  Cd19_1_1 = [32mcol_double()[39m
)




In [50]:
for (i in meta.df$Cond) {
    for (j in meta.df$Cond) {
        if (i != j ){
            contrast <- c("condition", i, j)
            out_name <- paste(i, "_vs_", j, ".csv", sep="")
            out_name <- file.path(deseq_out_dir, out_name)
            out_name_gn <- gsub(".csv","_gn.csv",  out_name)
            results <- as_tibble(results(dds, contrast = contrast), rownames='ensembl_id')
            write_csv(results, out_name)
            matchGN(results, out_name_gn)
        }
    }
}