Skip to content

Commit

Permalink
v0.45 fixed the find/location functions (not optimal).
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph committed Jul 19, 2011
1 parent 0a4e271 commit 4e8f9e3
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 20 deletions.
2 changes: 1 addition & 1 deletion README.md
@@ -1,4 +1,4 @@
#PARSES V0.44
#PARSES V0.45
For more information see [wiki](https://github.com/Lythimus/PARSES/wiki)

#Requirements
Expand Down
Empty file modified Xextractspans.pl 100755 → 100644
Empty file.
Empty file modified Xfilterspans.pl 100755 → 100644
Empty file.
Empty file modified Xnovotonm.pl 100755 → 100644
Empty file.
Empty file modified addTaxon.pl 100755 → 100644
Empty file.
Empty file modified fac.pl 100755 → 100644
Empty file.
Empty file modified parallelBlast.sh 100755 → 100644
Empty file.
57 changes: 38 additions & 19 deletions rakefile.rb
Expand Up @@ -6,11 +6,14 @@
require 'rake/clean'
require 'csv'

# Joseph Coco
# See README.md for usage.

#### DISCLAIMER
# This product may not be used for any sort of financial gain. Licenses for both MEGAN and Novoalign are strictly for non-profit research at non-profit institutions and academic usage.

PROG_NAME = 'PARSES'
VER = '0.44'
VER = '0.45'
PROG_DIR = File.dirname(__FILE__)
MEGAN_EXPANSION = 'expand direction=vertical; update;'

Expand Down Expand Up @@ -282,6 +285,7 @@ def findFile(filename, locate)

# Find a novo index of the specificied criteria, make one if not found
def buildNovoIndex(name, pathGlob)
locate = (command? 'locate') && (!ENV['LOCATE_PATH'].to_s.empty? or File.exists? '/var/lib/mlocate/mlocate.db')
novoIndex=findFile("#{name}*.ndx", locate)
if novoIndex.to_s.empty?
novoIndex="#{File.dirname(pathGlob)}/#{name}.ndx"
Expand All @@ -292,6 +296,7 @@ def buildNovoIndex(name, pathGlob)

# Find a bowtie index of the specificied criteria, make one if not found
def buildBowtieIndex(name, pathGlob)
locate = (command? 'locate') && (!ENV['LOCATE_PATH'].to_s.empty? or File.exists? '/var/lib/mlocate/mlocate.db')
bowtieIndex=findFile("#{name}*.ebwt", locate)
bowtieIndex= $1 if bowtieIndex =~ /(.*#{name}.*)\.\d+\.ebwt/i
resourceFiles=''
Expand Down Expand Up @@ -390,7 +395,7 @@ class OS
file sequence.removeNonMappedPath => sequence.novoalignPath do
puts 'RemoveHuman'
seqFileName = sequence.novoalignPath if forceFile != 'true'
exitStatus = safeExec("\"#{PROG_DIR}/Xnovotonm.pl\" \"#{seqFileName}\";", log, sequence,
exitStatus = safeExec("perl \"#{PROG_DIR}/Xnovotonm.pl\" \"#{seqFileName}\";", log, sequence,
'Removal of Human mapped reads from novoalign results not performed')
if exitStatus == 0
readsLeft=`egrep -c '^[ACTGN]' "#{sequence.removeNonMappedPath}"`
Expand All @@ -412,9 +417,9 @@ class OS
'TopHat sequence alignment not performed')
safeExec("samtools view -h -o \"#{ENV['seq']}_tophat_out/accepted_hits.sam\" \"#{ENV['seq']}_tophat_out/accepted_hits.bam\";", log, sequence,
'Samtools conversion not performed')
safeExec("\"#{PROG_DIR}/Xextractspans.pl\" \"#{ENV['seq']}_tophat_out/accepted_hits.sam\";", log, sequence,
safeExec("perl \"#{PROG_DIR}/Xextractspans.pl\" \"#{ENV['seq']}_tophat_out/accepted_hits.sam\";", log, sequence,
'Spanning region extraction not performed')
exitStatus = safeExec("\"#{PROG_DIR}/Xfilterspans.pl\" \"#{sequence.removeNonMappedPath}\" \"#{ENV['seq']}_tophat_out/accepted_hits.sam.spans\";", log, sequence,
exitStatus = safeExec("perl \"#{PROG_DIR}/Xfilterspans.pl\" \"#{sequence.removeNonMappedPath}\" \"#{ENV['seq']}_tophat_out/accepted_hits.sam.spans\";", log, sequence,
'Filter of spanning regions not performed')
if exitStatus == 0
readsLeft=`egrep -cv '>' "#{sequence.blast1Path}"`
Expand All @@ -437,7 +442,6 @@ class OS
blastPieces = FileList["#{seqFileName}.[a-zA-Z0-9][a-zA-Z0-9]"]
piecesLeft = blastPieces.length
fileCount = [pieces, blastPieces.length].min
puts fileCount
@blastCommands = []
blastPieces.each { | blastPiece |
@blastCommands << "blastn -db \'#{progSettings.ntDatabase}\' -soft_masking true #{dust} -num_threads #{ncpu} -evalue #{sequence.eValue1} -outfmt #{sequence.blastOutputFormat} -query \'#{blastPiece}\' -out \'#{blastPiece}.blast\' &"
Expand All @@ -451,7 +455,7 @@ class OS
end
}
`cat #{seqFileName}.[a-z][a-z].blast > #{seqFileName}.mergedBlast`
safeExec("\"#{PROG_DIR}/addTaxon.pl\" \"#{progSettings.giTaxIdNuclDatabase.to_s}\" \"#{seqFileName}.mergedBlast\" \"#{seqFileName}\";", log, sequence,
safeExec("perl \"#{PROG_DIR}/addTaxon.pl\" \"#{progSettings.giTaxIdNuclDatabase.to_s}\" \"#{seqFileName}.mergedBlast\" \"#{seqFileName}\";", log, sequence,
'Adding taxon to end of file not performed')
end

Expand All @@ -468,7 +472,7 @@ class OS
exitStatus, results = safeExec("MEGAN -f \"#{sequence.abyssPath}\" -x \"#{MEGAN_EXPANSION*sequence.expansionNumber} uncollapse all;\";", log, sequence,
'Opening MEGAN file not performed')
else
safeExec("MEGAN +g -E -x \"import blastfile='#{sequence.megan1Path}' fastafile='#{sequence.blast1Path}' meganfile='#{sequence.abyssPath}' minscore=#{sequence.minScoreByLength} toppercent=#{sequence.topPercent} winscore=#{sequence.winScore} minsupport=#{[(sequence.minSupport*`egrep -cv '>' "#{sequence.blast1Path}"`.chomp.to_i).to_i, 5].max} usecogs=#{sequence.useCogs} useseed=#{sequence.useSeed} usekegg=#{sequence.useKegg}; #{MEGAN_EXPANSION*sequence.expansionNumber} select nodes=all; uncollapse subtrees; update; exportimage format='#{sequence.imageFileType}' file='#{sequence.megan1Path + '.' + sequence.imageFileType.downcase}' REPLACE=true; quit;\";", log, sequence,
safeExec("MEGAN +g -E -x \"import blastfile='#{sequence.megan1Path}' fastafile='#{sequence.blast1Path}' meganfile='#{sequence.abyssPath}' minscore=#{sequence.minScoreByLength} toppercent=#{sequence.topPercent} winscore=#{sequence.winScore} minsupport=#{[(sequence.minSupport*`egrep -cv '>' "#{sequence.blast1Path}"`.chomp.to_i).to_i, 5].max} usecogs=#{sequence.useCogs} useseed=#{sequence.useSeed} usekegg=#{sequence.useKegg}; #{MEGAN_EXPANSION*sequence.expansionNumber} select nodes=all; uncollapse subtrees; update; exportimage file= '#{sequence.megan1Path + '.' + sequence.imageFileType.downcase}' format= '#{sequence.imageFileType}' REPLACE=true; quit;\";", log, sequence,
'MEGAN processing of BLASTed reads not performed')
exitStatus, results = safeExec("MEGAN -f \"#{sequence.abyssPath}\" -x \"#{MEGAN_EXPANSION*sequence.expansionNumber} uncollapse all;\";", log, sequence,
'Opening MEGAN file not performed')
Expand All @@ -489,10 +493,11 @@ class OS
seqFileName = sequence.abyssPathGlob if forceFile != 'true'
exitStatus=-1
FileList["#{seqFileName}"].each { | abyssFiles |
exitStatus = safeExec("\"#{PROG_DIR}/abyssKmerOptimizer.pl\" #{abyssFiles} #{sequence.minKmerLength} #{sequence.maxKmerLength} #{setDataTypes.abyss};", log, sequence,
exitStatus = safeExec("perl \"#{PROG_DIR}/abyssKmerOptimizer.pl\" #{abyssFiles} #{sequence.minKmerLength} #{sequence.maxKmerLength} #{setDataTypes.abyss};", log, sequence,
'ABySS not performed')
}
`cat #{sequence.blastPathGlob} > #{sequence.blast2Path}` if forceFile != 'true'
`cat #{sequence.blastPathGlob} > #{sequence.blast2Path}.commentSpaces` if forceFile != 'true'
`sed 's/ /,/g' #{sequence.blast2Path}.commentSpaces > #{sequence.blast2Path}`
if exitStatus == 0
coverageThreshold = $_ if (results =~ /(Using a coverage threshold of \d+)/)
medianKmerCoverage = $_ if (results =~ /(The median k-mer coverage is \d+)/)
Expand All @@ -510,7 +515,7 @@ class OS
seqFileName = sequence.blast2Path if forceFile != 'true'
safeExec("blastn -db \"#{progSettings.ntDatabase}\" -soft_masking true -num_threads #{ncpu} -evalue #{sequence.eValue2} -outfmt #{sequence.blastOutputFormat} -query \"#{seqFileName}\" -out \"#{sequence.megan2Path}.noTax\";", log, sequence,
'BLAST of contigs not performed')
safeExec("\"#{PROG_DIR}/addTaxon.pl\" \"#{progSettings.giTaxIdNuclDatabase.to_s}\" \"#{sequence.megan2Path}.noTax\" \"#{seqFileName}\";", log, sequence,
safeExec("perl \"#{PROG_DIR}/addTaxon.pl\" \"#{progSettings.giTaxIdNuclDatabase.to_s}\" \"#{sequence.megan2Path}.noTax\" \"#{seqFileName}\";", log, sequence,
'Adding taxon to end of BLAST contigs file not performed')
end

Expand All @@ -523,12 +528,12 @@ class OS
if ($1.to_f < 4.0)
safeExec("MEGAN +g -E -x \"import blastfile='#{sequence.megan2Path}' readfile='#{sequence.blast2Path}' meganfile='#{sequence.pipeEndPath}' minscore=#{sequence.minScoreByLength} toppercent=#{sequence.topPercent} winscore=#{sequence.winScore} minsupport=#{[(sequence.minSupport*`egrep -cv '>' "#{sequence.blast2Path}"`.chomp.to_i).to_i, 5].max} summaryonly=false usecompression=true usecogs=#{sequence.useCogs} usegos=#{sequence.useGos} useseed=false; #{MEGAN_EXPANSION*sequence.expansionNumber} uncollapse all; update; exportgraphics format='#{sequence.imageFileType}' file='#{sequence.megan2Path + '.' + sequence.imageFileType.downcase}' REPLACE=true; quit;\";", log, sequence,
'MEGAN processing of BLASTed contigs not performed')
exitStatus, results = safeExec("MEGAN -f \"#{sequence.pipeEndPath}.rma\";", log, sequence,
exitStatus, results = safeExec("MEGAN -f \"#{sequence.pipeEndPath}\";", log, sequence,
'Opening MEGAN file not performed')
else
safeExec("MEGAN +g -E -x \"import blastfile='#{sequence.megan2Path}' fastafile='#{sequence.blast2Path}' meganfile='#{sequence.pipeEndPath}' minscore=#{sequence.minScoreByLength} toppercent=#{sequence.topPercent} winscore=#{sequence.winScore} minsupport=#{[(sequence.minSupport*`egrep -cv '>' "#{sequence.blast2Path}"`.chomp.to_i).to_i, 5].max} usecogs=#{sequence.useCogs} useseed=#{sequence.useSeed} usekegg=#{sequence.useKegg}; update; set context=seedviewer; #{MEGAN_EXPANSION*sequence.expansionNumber} select nodes=all; uncollapse subtrees; update; exportimage format='#{sequence.imageFileType}' file='#{sequence.megan2Path + '.' + sequence.imageFileType.downcase}' REPLACE=true; quit;\";", log, sequence,
safeExec("MEGAN +g -E -x \"import blastfile='#{sequence.megan2Path}' fastafile='#{sequence.blast2Path}' meganfile='#{sequence.pipeEndPath}' minscore=#{sequence.minScoreByLength} toppercent=#{sequence.topPercent} winscore=#{sequence.winScore} minsupport=#{[(sequence.minSupport*`egrep -cv '>' "#{sequence.blast2Path}"`.chomp.to_i).to_i, 5].max} usecogs=#{sequence.useCogs} useseed=#{sequence.useSeed} usekegg=#{sequence.useKegg}; #{MEGAN_EXPANSION*sequence.expansionNumber} select nodes=all; uncollapse subtrees; update; exportimage file= '#{sequence.megan2Path + '.' + sequence.imageFileType.downcase}' format= '#{sequence.imageFileType}' REPLACE=true; quit;\";", log, sequence,
'MEGAN processing of BLASTed contigs not performed')
exitStatus, results = safeExec("MEGAN -f \"#{sequence.pipeEndPath}.rma\";", log, sequence,
exitStatus, results = safeExec("MEGAN -f \"#{sequence.pipeEndPath}\";", log, sequence,
'Opening MEGAN file not performed')
end
if exitStatus == 0
Expand All @@ -547,6 +552,7 @@ class OS
desc 'Install latest version of human genome database.'
task :hgInstall do
if progSettings.humanGenomeDatabase.to_s.chomp.empty?
puts 'installing hg database'
progSettings.humanGenomeDatabase=File.dirname(findFile('chr*.fa', locate))
if progSettings.humanGenomeDatabase.to_s.chomp == '.'
hg = ''
Expand Down Expand Up @@ -586,6 +592,7 @@ class OS
desc 'Install latest version of Novoalign.'
task :novoalignInstall do
if !command? 'novoalign'
puts 'installing novoalign'
novoalign = ''
Net::HTTP.start('www.novocraft.com', 80) { |http|
http.get('/main/releases.php', 'Referer' => 'http://www.novocraft.com/').body =~ /(V\d+\.\d+\.\d+)/
Expand Down Expand Up @@ -620,6 +627,7 @@ class OS
desc 'Install latest version of Bowtie.'
task :bowtieInstall do
if !command? 'bowtie'
puts 'installing bowtie'
bowtie = ''
Net::HTTP.start('bowtie-bio.sourceforge.net', 80) { |http|
http.get('/index.shtml').body =~ /https:\/\/sourceforge\.net\/projects\/bowtie-bio\/files\/bowtie\/(\d*\.\d*\.\d*)/
Expand Down Expand Up @@ -649,6 +657,7 @@ class OS
desc 'Install latest version of Samtools.'
task :samtoolsInstall do
if !command? 'samtools'
puts 'installing samtools'
samtools = ''
File.open('samtools.tar.bz2', 'w'){ |file|
if useRepo == true
Expand All @@ -675,6 +684,7 @@ class OS
desc 'Install latest version of Tophat.'
task :tophatInstall => [:samtoolsInstall, :bowtieInstall] do
if !command? 'tophat'
puts 'installing tophat'
tophat = ''
Net::HTTP.start('tophat.cbcb.umd.edu', 80) { |http|
http.get('/index.html').body =~ /\.\/downloads\/(tophat-\d+\.\d+\.\d+\.tar\.gz)/
Expand Down Expand Up @@ -702,6 +712,7 @@ class OS
desc 'Install latest version of ABySS with Google Sparsehash.'
task :abyssInstall do
if !command? 'ABYSS'
puts 'installing ABYSS'
Net::HTTP.start('code.google.com', 80) { |http|
http.get('/p/google-sparsehash/').body =~ /(http:\/\/google-sparsehash\.googlecode\.com)(\/files\/sparsehash-\d+\.\d+\.tar\.gz)/
}
Expand Down Expand Up @@ -747,6 +758,7 @@ class OS
task :blastInstall do
blast = ''
if !command? 'blastn'
puts 'installing BLAST+'
ftp = Net::FTP::new('ftp.ncbi.nlm.nih.gov')
ftp.login()
ftp.passive=true
Expand All @@ -762,7 +774,6 @@ class OS
cd ReleaseMT/build;
make all_r;
#sudo rm /usr/bin/blastdb_aliastool /usr/bin/dustmasker /usr/bin/rpstblastn /usr/bin/blastdbcheck /usr/bin/gene_info_reader /usr/bin/segmasker /usr/bin/blastdbcmd /usr/bin/gumbelparams /usr/bin/seqdb_demo /usr/bin/blast_formatter /usr/bin/srsearch /usr/bin/blastn /usr/bin/makeblastdb /usr/bin/tblastn /usr/bin/blastp /usr/bin/makembindex /usr/bin/tblastx /usr/bin/blastx /usr/bin/project_tree_builder /usr/bin/convert2blastmask /usr/bin/psiblast /usr/bin/windowmasker /usr/bin/datatool /usr/bin/rpsblast
ln -s /usr/bin/#{blast}/c++/ReleaseMT/bin/blastdb_aliastool /usr/bin;
ln -s /usr/bin/#{blast}/c++/ReleaseMT/bin/dustmasker /usr/bin;
ln -s /usr/bin/#{blast}/c++/ReleaseMT/bin/rpstblastn /usr/bin;
Expand Down Expand Up @@ -795,6 +806,7 @@ class OS
task :ntInstall => :blastInstall do
if ENV['BLASTDB'].to_s.empty?
if progSettings.ntDatabase.to_s.empty?
puts 'installing NT database'
progSettings.ntDatabase = findFile('nt.nal', locate).to_s.chomp('.nal')
if progSettings.ntDatabase.to_s.empty?
progSettings.ntDatabase='/usr/share/nt/nt'
Expand All @@ -814,6 +826,7 @@ class OS
desc 'Install latest version of MEGAN.'
task :meganInstall do
if !command? 'MEGAN'
puts 'installing MEGAN'
megan = ''
Net::HTTP.start('ab.inf.uni-tuebingen.de', 80) { |http|
http.get('/software/megan/welcome.html').body =~ /(\/data\/software\/[^"]*\/download\/welcome\.html)/
Expand All @@ -836,7 +849,8 @@ class OS
rm #{megan};
}
megan = `which MEGAN`.chomp
`ln -s "#{findFile(MEGAN)}" /usr/bin` if megan.empty?
locate = (command? 'locate') && (!ENV['LOCATE_PATH'].to_s.empty? or File.exists? '/var/lib/mlocate/mlocate.db')
`ln -s "#{findFile('MEGAN', locate)}" /usr/bin` if megan.empty?
megan = `which MEGAN`.chomp
if (arch == 64) # If CPU architecture is 64-bit, allow for more than 2GB of RAM and force 64-bit Java.
text = File.new(megan).read.gsub(/"\$prg_dir\/\$progname" "-server" "-Xms\d+." "-Xmx\d+."/, "\"$prg_dir/$progname\" \"-server\" \"-d64\" \"-Xms#{memInGigs}G\" \"-Xmx#{memInGigs}G\"")
Expand All @@ -849,6 +863,7 @@ class OS
task :giTaxIdNuclInstall do
progSettings.giTaxIdNuclDatabase=findFile('gi_taxid_nucl.dmp', locate)
if progSettings.giTaxIdNuclDatabase.to_s.chomp == '.'
puts 'installing NT gi to TaxID database'
ftp = Net::FTP::new('ftp://ftp.ncbi.nih.gov')
ftp.login()
ftp.passive=true
Expand All @@ -864,6 +879,7 @@ class OS
task :parallelIteratorInstall do
`perl -MParallel::Iterator -e 1`
if $?.exitstatus != 0
puts 'installing Parallel::Iterator'
`perl -MCPAN -e 'install Parallel::Iterator'`
end
end
Expand All @@ -872,6 +888,7 @@ class OS
task :otherIndex do
puts 'OtherIndex'
if !indexName.empty? and !indexGlob.empty?
puts 'installing other indices'
buildNovoIndex(indexName, indexGlob)
buildBowtieIndex(indexName, indexGlob)
end
Expand Down Expand Up @@ -901,12 +918,14 @@ class OS

desc 'Automatically saving any settings changes which may have been made'
task :reserialize do
# Reserialize object in case any changes have been made
seqFile = File.open(".#{seqName}", 'w')
# Reserialize object in case any changes have been made
if !installMode
seqFile = File.open(".#{ENV['seq']}", 'w')
YAML.dump(sequence, seqFile)
seqFile.close
end
progSettingsFile = File.open(File.expand_path("~/.#{PROG_NAME}"), 'w')
YAML.dump(sequence, seqFile) if !installMode
YAML.dump(progSettings, progSettingsFile)
seqFile.close
progSettingsFile.close
end

Expand Down

0 comments on commit 4e8f9e3

Please sign in to comment.