Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
Merge branch 'activeDev' of https://github.com/CCBR/Pipeliner into ac…
Browse files Browse the repository at this point in the history
…tiveDev
  • Loading branch information
tovahmarkowitz committed Jan 30, 2019
2 parents 0e56f04 + cb2fedc commit cd72f4a
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 9 deletions.
69 changes: 62 additions & 7 deletions Results-template/Scripts/PcaReport.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ suppressMessages(library('gplots'))
suppressMessages(library('reshape'))
suppressMessages(library('ggplot2'))
suppressMessages(library('ggfortify'))
suppressMessages(library(ggdendro))
suppressMessages(library(amap))
suppressMessages(library(DT))
suppressMessages(library(plotly))
Expand Down Expand Up @@ -107,7 +108,12 @@ y <- estimateTagwiseDisp(y) #default trend: moveingave
ylog2=cpm(y,log=TRUE,normalized.lib.sizes=TRUE,prior.count=0.5) # prior count like avelogcpm
rawlog2= cpm(y,log=TRUE,normalized.lib.sizes=FALSE,prior.count=0.5)
ddslog2= cpm(dds.ndata,log=TRUE,normalized.lib.sizes=FALSE,prior.count=0.5)
#ddslog2= cpm(dds.ndata,log=TRUE,normalized.lib.sizes=FALSE,prior.count=0.5)
rld <- rlogTransformation(dds, blind=TRUE)
rldm=assay(rld)
colnames(rldm)=colnames(x)
## save it
```

Expand All @@ -130,7 +136,7 @@ tmmhist
### DESeq2

```{r, echo=FALSE, warning=FALSE,message=FALSE}
deshist <- ggplotly(ggplot(melt(as.data.frame(ddslog2))) + geom_density(aes(x = value,colour = variable)) + labs(x = NULL) + theme(legend.position='right') + scale_x_log10())
deshist <- ggplotly(ggplot(melt(as.data.frame(rldm))) + geom_density(aes(x = value,colour = variable)) + labs(x = NULL) + theme(legend.position='right') + scale_x_log10())
deshist
```

Expand Down Expand Up @@ -242,8 +248,8 @@ p
#rld <- rlogTransformation(dds, blind=TRUE)
#rldm=assay(rld)
#colnames(rldm)=colnames(x)
#deseq2.edf=as.matrix(rldm)
deseq2.edf=ddslog2
deseq2.edf=as.matrix(rldm)
#deseq2.edf=ddslog2
deseq2.tedf= t(deseq2.edf)
deseq2.tedf=deseq2.tedf[,apply(deseq2.tedf,2,var)!= 0]
deseq2.pca=prcomp(deseq2.tedf,scale.=T)
Expand Down Expand Up @@ -327,7 +333,7 @@ p
```{r, echo=FALSE,message=FALSE,warning=FALSE}
before.dfm <- melt(as.data.frame(rawlog2))
edgeR.dfm <- melt(as.data.frame(ylog2))
deseq2.dfm <- melt(as.data.frame(ddslog2))
deseq2.dfm <- melt(as.data.frame(rldm))
limma.dfm <- melt(as.data.frame(v1$E))
```

Expand Down Expand Up @@ -368,6 +374,54 @@ hmapheight = length(sampleinfo$label)
if(hmapheight<8){
hmapheight = 8
}
create_heatmap <- function(data){
dd_col <- as.dendrogram(hclust(dist(data)))
dd_row <- as.dendrogram(hclust(dist(t(data))))
dendro_1 <- dendro_data(dd_col)
dendro_2 <- dendro_data(dd_row)
hmcol <- colorRampPalette(c("black","red","yellow","white"),space="rgb")(100)
ggdend <- function(df) {
ggplot() +
geom_segment(data = df, aes(x=x, y=y, xend=xend, yend=yend)) +
labs(x = "", y = "") + theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
panel.grid = element_blank())
}
dendro_columns <- ggdend(dendro_1$segments)
dendro_rows <- ggdend(dendro_2$segments) + coord_flip()
melt_mat <- melt(data)
hmap <- ggplot(data = melt_mat, aes(x=X1, y=X2, fill=value)) +
geom_tile() +
scale_fill_gradientn(colours = rev(hmcol), limit = c(0,max(data)), space = "Lab", name="Correlation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal",
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
coord_fixed()
eaxis <- list(showticklabels = FALSE, showgrid = FALSE, zeroline = FALSE)
p_empty <- plot_ly() %>% layout(margin = list(l = 200), xaxis = eaxis,yaxis = eaxis)
sply <- subplot(dendro_columns, p_empty, hmap, dendro_rows, nrows = 2)
sply <- layout(sply,
yaxis = list(domain=c(0.47, 1)),
xaxis = list(domain=c(0, 0.5)),
xaxis3 = list(domain=c(0, 0.5)),
xaxis4 = list(domain=c(0.5, 1)),
margin = list(l = 150, r = 0, b = 50,t = 0))
return(sply)
}
```

### Before Normalization
Expand All @@ -377,6 +431,7 @@ hmapheight = 8
hmcol <- colorRampPalette(c("black","red","yellow","white"),space="rgb")(100)
before.distrawlog2=amap::Dist(t(rawlog2),method="pearson")
before.mat = as.matrix(before.distrawlog2)
heatmap.2(before.mat, trace="none", col = rev(hmcol), labCol=FALSE, colRow=as.numeric(as.factor(sampleinfo$condition)), margin=c(16, 16), main="Before normalization")
```

Expand All @@ -393,7 +448,7 @@ heatmap.2(edgeR.mat, trace="none", col = rev(hmcol), labCol=FALSE, colRow=as.num

```{r, echo=FALSE,message=FALSE,fig.show='hold',fig.align='center', warning=FALSE, fig.height=hmapheight}
deseq2.dists <- amap::Dist(t(ddslog2),method="pearson")
deseq2.dists <- amap::Dist(t(rldm),method="pearson")
deseq2.mat <- as.matrix(deseq2.dists)
heatmap.2(deseq2.mat, trace="none", col = rev(hmcol), labCol=FALSE, colRow=as.numeric(as.factor(sampleinfo$condition)), margin=c(16, 16), main="DESeq2")
```
Expand Down Expand Up @@ -421,7 +476,7 @@ for(i in 1:length(sampleinfo$label)){
abline(h=0, col="red", lty=2, lwd=2)
}
for(i in 1:length(sampleinfo$label)){
plotMD(ddslog2,column=i, main=paste0("DESeq2 ",sampleinfo$label[i]), xlim=c(-5,15), ylim=c(-15,15))
plotMD(rldm,column=i, main=paste0("DESeq2 ",sampleinfo$label[i]), xlim=c(-5,15), ylim=c(-15,15))
abline(h=0, col="red", lty=2, lwd=2)
}
for(i in 1:length(sampleinfo$label)){
Expand Down
8 changes: 8 additions & 0 deletions Rules/samtools_sort.rl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
rule samtools_sort:
input: "{x}.p2Aligned.out.bam",
output: temp("{x}.p2Aligned.sortedByCoord.out.bam")
params: novosort=config['bin'][pfamily]['NOVOSORT'],rname="pl:sort"
threads: 8
shell: "module load samtools/1.9; samtools sort -@ {threads} -o {output} {input};"


4 changes: 2 additions & 2 deletions Rules/star.align.2.rl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
rule star_align_2:
input: file1="{x}.R1.trimmed.fastq.gz",file2="{x}.R2.trimmed.fastq.gz",length="QC/{x}_readlength.txt",tab=expand("{x}SJ.out.tab",x=samples)
output: out1=temp("{x}.p2Aligned.sortedByCoord.out.bam"),out4="{x}.p2SJ.out.tab",out5="{x}.p2Log.final.out"
output: out1=temp("{x}.p2Aligned.out.bam"),out4="{x}.p2SJ.out.tab",out5="{x}.p2Log.final.out"
params: rname='pl:star2p',prefix="{x}.p2",outsamunmapped=config['bin'][pfamily]['OUTSAMUNMAPPED'],wigtype=config['bin'][pfamily]['WIGTYPE'],wigstrand=config['bin'][pfamily]['WIGSTRAND'], gtffile=config['references'][pfamily]['FUSIONGTFFILE'], nbjuncs=config['bin'][pfamily]['NBJUNCS'],starref=config['references'][pfamily]['STARREF']
threads: 32
run:
import glob
rl=int(open(input.length).readlines()[0].strip())-1
dbrl=sorted(list(map(lambda x:int(re.findall("genes-(\d+)",x)[0]),glob.glob(params.starref+'*/',recursive=False))))
bestdbrl=next(x[1] for x in enumerate(dbrl) if x[1] >= rl)
cmd="module load STAR/2.5.2b; STAR --genomeDir {params.starref}"+str(bestdbrl)+" --readFilesIn {input.file1} {input.file2} --readFilesCommand zcat --runThreadN {threads} --outFileNamePrefix {params.prefix} --outSAMunmapped {params.outsamunmapped} --sjdbFileChrStartEnd {input.tab} --sjdbGTFfile {params.gtffile} --limitSjdbInsertNsj 10000000 --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 39627002904"
cmd="module load STAR/2.5.2b; STAR --genomeDir {params.starref}"+str(bestdbrl)+" --readFilesIn {input.file1} {input.file2} --readFilesCommand zcat --runThreadN {threads} --outFileNamePrefix {params.prefix} --outSAMunmapped {params.outsamunmapped} --sjdbFileChrStartEnd {input.tab} --sjdbGTFfile {params.gtffile} --limitSjdbInsertNsj 10000000 --outSAMtype BAM Unsorted"
shell(cmd)
5 changes: 5 additions & 0 deletions cluster.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
"gres": "lscratch:256",
"threads": "2"
},
"samtools_sort": {
"mem": "48g",
"gres": "lscratch:256",
"threads": "8"
},
"picard_dedup": {
"mem": "96g",
"gres": "lscratch:256",
Expand Down
1 change: 1 addition & 0 deletions rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"freec_wgs_tumoronly": ["wgs-somatic-tumoronly"],
"sequenza": ["wgs-somatic","exomeseq-somatic"],
"avia": ["wgslow","exomeseq-germline","exomeseq-germline-recal","exomeseq-germline-partial","rnaseqvargerm"],
"samtools_sort": ["rnaseqvargerm"],
"index_bams": ["wgslow","exomeseq-germline","wgs-somatic-tumoronly","exomeseq-somatic-tumoronly","wgs-somatic","exomeseq-somatic"],
"rnaseqforfusions": ["rnaseqfusion"],
"rnafusioncleanup": ["none"],
Expand Down

0 comments on commit cd72f4a

Please sign in to comment.