Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 150 lines (126 sloc) 5.67 KB
#!/usr/bin/Rscript --vanilla
# Copyright (c) 2011 Expression Analysis / Gunjan Hariani, Erik Aronesty
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# $Id: fastx-graph 525 2012-12-25 19:41:22Z earonesty $
if (!require(getopt)) {
write(c("Installing package on:",system("hostname",intern=T)),file=stderr())
install.packages('getopt',repos='http://R-Forge.R-project.org')
# will die if it fails at this point
library(getopt)
}
if (!require("Hmisc")) {
write(c("Installing package on:",system("hostname",intern=T)),file=stderr())
install.packages('Hmisc',repos='http://R-Forge.R-project.org')
# will die if it fails at this point
library("Hmisc")
}
spec <- matrix(c(
'input' , 'i', 1, "character", "file from fastq-stats -x (required)",
'gc' , 'G', 1, "character", "input gc content file (optional)",
'out' , 'o', 1, "character", "output filename (optional)",
'help' , 'h', 0, "logical", "this help"
),ncol=5,byrow=T)
opt = getopt(spec);
if (!is.null(opt$help) || is.null(opt$input)) {
cat(paste(getopt(spec, usage=T, command="fastx-graph"),"\n"));
q();
}
in.file <- opt$input
gc.file <- opt$gc
out.file <- opt$out
fastx <- read.table(in.file,sep="\t",header=T,as.is=T)
# output is in, minus txt, plus png
if (is.null(out.file)) {
in.file <- gsub(".txt$","",in.file,perl=T)
out.file <- paste(in.file,".png",sep="")
}
# correct for bug in R if the file has a % sign in it
out.file <- gsub("%","%%",out.file)
png(out.file,width=1000, height=500)
par(mar=c(4,3,5,5),xaxs="i",yaxs="i",xpd=T)
plot(c(0,0),pch="",ylim=c(min(c(0,fastx$lW)),max(41,fastx$rW)),xlim=c(0,(nrow(fastx)+1)),
xlab="",ylab="",las=2,cex.axis=.85,cex.lab=.85,xaxt="n")
lims <- par("xaxp")[1:2]
major.ticks <- pretty(lims,n=par("xaxp")[3])
minor.tick(nx=(major.ticks[2]-major.ticks[1]),ny=10)
minor.ticks <- 1:nrow(fastx)
mtext("Cycle",side=3,line=3,cex=.85)
axis(1,at=major.ticks,cex.axis=.85,labels=F)
axis(3,at=major.ticks,cex.axis=.85,labels=F)
axis(3,at=minor.ticks,tcl=par("tcl")*0.5,labels=minor.ticks,cex.axis=.80,las=3)
colnames(fastx) <- c("column","count","min","max","sum","mean","Q1","med",
"Q3","IQR","lW","rW","A_count","C_count","G_count",
"T_count","N_count","Max_count")
for(i in 1:nrow(fastx)) {
par(new=T)
rect(fastx$column[i]-.35,fastx$Q1[i],fastx$column[i]+0.35,fastx$Q3[i],col="gray")
segments(fastx$column[i]-.35,fastx$med[i],fastx$column[i]+0.35,fastx$med[i],col="red",lwd=1.5)
segments(fastx$column[i],fastx$lW[i],fastx$column[i],fastx$Q1[i],lty="dashed")
segments(fastx$column[i],fastx$Q3[i],fastx$column[i],fastx$rW[i],lty="dashed")
segments(fastx$column[i]-.35,fastx$lW[i],fastx$column[i]+0.35,fastx$lW[i])
segments(fastx$column[i]-.35,fastx$rW[i],fastx$column[i]+0.35,fastx$rW[i])
}
# if theres a significant difference
tots<-fastx$N_count+fastx$T_count+fastx$G_count+fastx$C_count+fastx$A_count
if (min(tots) < (max(tots)*.98)) {
par(new=T)
plot(tots,col="gray",xaxt="n",yaxt="n",xlab="",ylab="",pch="+",ylim=c(min(tots),max(tots)))
lines(fastx$N_count+fastx$T_count+fastx$G_count+fastx$C_count+fastx$A_count,col="gray",xaxt="n",yaxt="n",xlab="",ylab="", lty=2)
}
par(new=T)
plot(fastx$column,fastx$A_count*100/fastx$count,col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
ylim=c(0,100),lwd=2)
par(new=T)
plot(fastx$column,fastx$C_count*100/fastx$count,col="blue",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
ylim=c(0,100),lwd=2)
par(new=T)
plot(fastx$column,fastx$G_count*100/fastx$count,col="green",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
ylim=c(0,100),lwd=2)
par(new=T)
plot(fastx$column,fastx$T_count*100/fastx$count,col="black",type="l",xaxt="n",yaxt="n",xlab="",ylab="",
ylim=c(0,100),lwd=2)
par(new=T)
barplot(fastx$N_count*100/fastx$count,col="orange",xaxt="n",yaxt="n",xlab="",ylab="",
ylim=c(0,100))
axis(4,at=seq(0,100,25),cex.axis=.85)
mtext("Pct Base Distribution",side=4,line=3,cex=.85)
mtext("Base Quality",side=2,line=2,cex=.85)
legend(0,-4,col=c("red","blue","green","black","orange"),lty=1,
legend=c("A","C","G","T","N"),cex=.70,horiz=T,lwd=2)
if(!is.null(gc.file)) {
tmp <- read.table(gc.file,sep="\t",header=F,as.is=T)
if (tmp[1,1] == 'pct-GC') {
# silly legacy format
GC <- read.table(gc.file,sep="\t",header=T,as.is=T,skip=3)
} else if (tmp[1,1] == 'pct_GC') {
GC <- read.table(gc.file,sep="\t",header=T,as.is=T,skip=1)
} else {
GC <- read.table(gc.file,sep="\t",header=T,as.is=T)
}
colnames(GC)=c("pct_gc", "count")
par(new=T)
par(fig=c(0.1,0.2,0.45,0.60))
par(mar=c(0,0,1,0))
plot(GC$pct_gc,GC$count,type="l",xaxt="n",yaxt="n",
main="%GC per read",cex.main=.90)
axis(1,seq(0,100,20),labels=seq(0,100,20),las=2,tck=-0.1,cex.axis=.75)
}
graphics.off()