In [None]:
library(clusterProfiler)

library(dplyr)
library(ggplot2)
library(reshape)
library(tximport)
library(DESeq2)
library("pheatmap")
library("RColorBrewer")
library(AnnotationDbi)
library(vsn)

In [None]:
tsv_file <- "nf_outs/star_salmon/salmon.merged.gene_counts_length_scaled.tsv"

# Read the TSV file using read.table() or read.delim()
data <- read.table(tsv_file, sep = "\t", header = TRUE,row.names = 1)
dat=data[c('ENSMUSG00000074987','ENSMUSG00000016458'),]
dat = melt(dat, id='gene_name')
ggplot(dat,                                      # Grouped barplot using ggplot2
       aes(x = gene_name,
           y = value,
           fill =variable )) +
  geom_bar(stat = "identity",
           position = "dodge")+coord_flip()

# DEG

In [None]:
tx = c('CONTROL_REP1','CONTROL_REP2','KO_REP1','KO_REP2')
samps= as.data.frame(cbind(tx,c('CONTROL','CONTROL','KO','KO')))
colnames(samps)=c('sample_id','condition')
files <- file.path("/data/lemsaraa/bulk_wt1os/nf_outs/star_salmon/", samps$sample_id, "quant.sf")
names(files) <- samps$sample_id
head(files)

In [None]:
library(tximport)
txi <- tximport(files, type="salmon", txOut=TRUE,
                countsFromAbundance="scaledTPM")
cts <- txi$counts
cts <- cts[rowSums(cts) > 0,]

In [None]:
gse=readRDS('nf_outs/star_salmon/salmon.merged.gene_counts_length_scaled.rds')

In [None]:
gse$condition = as.factor(sapply(gse$names, function(x) strsplit(x,'_')[[1]][1]))
levels(gse$condition)

In [None]:
assay(gse)=round(as.matrix(assay(gse)))

In [None]:
dds = DESeqDataSetFromMatrix(countData = assay(gse), colData = gse@colData, design = ~condition)


In [None]:
nrow(dds)
keep <- rowSums(counts(dds) >= 10) >= 1
dds <- dds[keep,]
nrow(dds)
#transformation
meanSdPlot(assay(dds), ranks = FALSE)

vsd <- vst(dds, blind = TRUE)
meanSdPlot(assay(vsd), ranks = FALSE)

head(assay(vsd), 3)


In [None]:
# rld <- rlog(dds, blind = FALSE)
# head(assay(rld), 3)

# df <- bind_rows(
#   as_data_frame(log2(counts(dds, normalized=TRUE)[, 1:2]+1)) %>%
#          mutate(transformation = "log2(x + 1)"),
#   as_data_frame(assay(vsd)[, 1:2]) %>% mutate(transformation = "vst"),
#   as_data_frame(assay(rld)[, 1:2]) %>% mutate(transformation = "rlog"))
  
# colnames(df)[1:2] <- c("x", "y")  

# lvls <- c("log2(x + 1)", "vst", "rlog")
# df$transformation <- factor(df$transformation, levels=lvls)

# ggplot(df, aes(x = x, y = y)) + geom_hex(bins = 80) +
#   coord_fixed() + facet_grid( . ~ transformation)  


In [None]:
sampleDistMatrix <- cor(assay(vsd))
rownames(sampleDistMatrix) <- vsd$names
colnames(sampleDistMatrix) <- vsd$names
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         col = colors)

In [None]:
#PCA
plotPCA(vsd, intgroup = c("condition"))

# Differential expression analysis

In [None]:
dds <- estimateSizeFactors(dds)
dds$condition= relevel(dds$condition, ref= 'CONTROL')
dds <- DESeq(dds)
res <- results(dds,lfcThreshold = 1)
summary(res)
resSig <- subset(res, padj < 0.1)
resSig

In [None]:
# topGene <- rownames(res)[which.min(res$padj)]
# plotCounts(dds, gene = topGene, intgroup=c("condition"))

## MA-plot

In [None]:
library("ashr")
resultsNames(dds)
res <- lfcShrink(dds, coef="condition_KO_vs_CONTROL", type="ashr")
plotMA(res, ylim = c(-30, 20))

In [None]:
# hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20,
#      col = "grey50", border = "white")

In [None]:
res[c('ENSMUSG00000074987','ENSMUSG00000016458'),]

In [None]:
gene_id=unique(read.table('nf_outs/star_salmon/salmon_tx2gene.tsv', sep='\t',row.names = 1))
colnames(gene_id)=c('symbol','name')
resSig$symbol=rownames(resSig)
resSig=merge(as.data.frame(resSig), gene_id, by= 'symbol')

In [None]:
library("genefilter")
topVarGenes <- c('ENSMUSG00000074987','ENSMUSG00000016458')
mat  <- assay(vsd)[topVarGenes, ]
# mat  <- mat - rowMeans(mat)
rownames(mat)=c('Wt1os','Wt1')
anno <- as.data.frame(colData(vsd)[, c("condition")])
pheatmap(mat)

In [None]:
topVarGenes <- resSig$symbol
mat  <- assay(vsd)[topVarGenes, ]
# mat  <- mat - rowMeans(mat)
rownames(mat)=as.character(resSig$name)
anno <- as.data.frame(colData(vsd)[, c("condition")])
pheatmap(mat)

In [None]:
# theme_s=theme(plot.margin = margin(0.5,0.5,0.5,0.5, "cm"),
#               plot.caption = element_text(color = "red", face = "bold"),
#              plot.title = element_text(color = "black", size = 24, face = "bold"),
#   plot.subtitle = element_text(color = "blue"),
             
#              legend.text = element_text(size=20),text =element_text(size=20),axis.title = element_text(size = 16)
#          , title =element_text(size=20) 
#              )

# plots <- function(ek, num=10) {
# if (class(ek) %in% c('enrichResult','gseaResult')){

# tryCatch(expr={ek=simplify(ek)},
#          error = function(e) {
#     print('ok')  })
# tryCatch({
# p1 <- barplot(ek,showCategory=num)+theme_s
# p2 <- dotplot(ek,showCategory=num)+theme_s
# sim_mat <- pairwise_termsim(ek)
# p3=emapplot(sim_mat)+theme_s
# bg <- deg$logFC
# names(bg) <- as.character(deg$protein)

# p4=cnetplot(ek,foldChange=bg)+theme_s
# p5=heatplot(ek,foldChange=bg)+coord_flip()+theme_s
    
# pdf(file = paste0(name,ek@ontology,'.pdf'),
#     # The directory you want to save the file in
#     width = 14, # The width of the plot in inches
#     height = 18) # The height of the plot in inches

# print(p1)
# print(p2)
# print(p3)
# print(p4)    
# dev.off()

# pdf(file = paste0(name,ek@ontology,'heatmap','.pdf'),
#     # The directory you want to save the file in
#     width = 18, # The width of the plot in inches
#     height = 100) # The height of the plot in inches

# print(p5)    
# dev.off()
# },error = function(e) {
#     print('ok')  })
         
# }}

# dotplots <- function(ek, num=10) {
#     if (class(ek) %in% c('enrichResult','compareClusterResult')){
#     p=dotplot(ek,showCategory=num)+theme_s
#     }else{
#     p=NULL
#     }
#         return(p)

#     }
# pvalueCutoff=0.05
# qvalueCutoff=0.05

In [None]:
#  name='CON.VS.KO'
#  deg=as.data.frame(res)
#  deg1 <- deg[deg$padj<0.05 & abs(deg$log2FoldChange)>1,]
    
# # differentially expressed genes ids
# degs <- rownames(deg1)
# # all gene ids
# bgs <- rownames(deg)

# print('start')
# ego_cc <- enrichGO(gene=degs,keyType ="ENSRMBL",universe=bgs,OrgDb=org.Mm.eg.db,ont="CC",
#                     readable=TRUE)
# ego_bp <- enrichGO(gene=degs ,keyType ="ENSRMBL",universe=bgs,OrgDb=org.Mm.eg.db,ont="BP",
#                     readable=TRUE)
# ego_mf <- enrichGO(gene=degs,keyType ="ENSRMBL",universe=bgs,OrgDb=org.Mm.eg.db,ont="MF",
#                     readable=TRUE)
# ek <- enrichMKEGG(gene=degs,organism='mmu', keyType='ENSEMBL', universe =bgs )
# plots(ego_cc)
# plots(ego_bp)
# plots(ego_mf)
# plots(ek)
# print(ego_cc)
# print('comparison')
# mydf <- data.frame(Entrez=rownames(deg1), FC=deg1$logFC)
# mydf$group <- "upregulated"
# mydf$group[mydf$FC < 0] <- "downregulated"

# formula_res <- compareCluster(Entrez~group, data=mydf, fun="enrichGO",keyType ="ENSRMBL",universe=bgs,OrgDb=org.Mm.eg.db,ont="CC",
#                     readable=TRUE)
# p1= dotplots(formula_res)

# formula_res <- compareCluster(Entrez~group, data=mydf, fun="enrichGO",keyType ="ENSRMBL",universe=bgs,OrgDb=org.Mm.eg.db,ont="BP",
#                     readable=TRUE)
# p2= dotplots(formula_res)

# formula_res <- compareCluster(Entrez~group, data=mydf, fun="enrichGO",keyType ="ENSRMBL",universe=bgs,OrgDb=org.Mm.eg.db,ont="MF",
#                     readable=TRUE)
# p3 =dotplots(formula_res)

    
# formula_res <- compareCluster(Entrez~group, data=mydf, fun="enrichMKEGG",keyType ="ENSRMBL",organism='mmu',universe=bgs)
# p4=dotplots(formula_res)
# formula_res <- compareCluster(Entrez~group, data=mydf, fun="enrichKEGG",keyType ="ENSRMBL",organism='mmu',universe=bgs)
# p5=dotplots(formula_res)
# pdf(file = paste0(name,'_compareclusters','.pdf'),
#     # The directory you want to save the file in
#     width = 12, # The width of the plot in inches
#     height = 14) # The height of the plot in inches

# print(p1)
# print(p2)
# print(p3)
# print(p4)
# print(p5)
# dev.off()


In [None]:
library(org.Mm.eg.db)


In [None]:
# trans = c('ENSMUST00000135153','ENSMUST00000172701','ENSMUST00000099647','ENSMUST00000174870',
#  'ENSMUST00000143043','ENSMUST00000146842','ENSMUST00000133470','ENSMUST00000139585'
#  ,'ENSMUST00000145107','ENSMUST00000111098','ENSMUST00000111099','ENSMUST00000153944')
# counts = list()
# files= c('200774_S40','200775_S41','200776_S42','200777_S43')
# for (f in files)
# {
# tsv_file <- paste0("salmon_out/",f,"/quant.sf")

# # Read the TSV file using read.table() or read.delim()
# data <- read.table(tsv_file, sep = "\t", header = TRUE,row.names = 1)

# log= list()
# for (i in trans){
# log = c(log,(rownames(data)[grepl(paste0("^",i), rownames(data))]))
    
# }
# print(log)
# print(data[unlist(log),])
# counts = c(counts, data[unlist(log),])
#     }

In [None]:
counts