Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Pipeline now capable of iterating. Working on memory issue.

  • Loading branch information...
commit 0ef84d8f2e7902bc3cf134cfd044ecd3ffdc8652 1 parent 8e7d7c7
@JesseJ authored
View
11 pipeline0.01/src/combiner.rb
@@ -9,26 +9,27 @@ def initialize(files, run)
# Combines the files and writes it to a new psms file, returns the new file name.
def combine
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Combining search engine hits...\n"
all_hits = [] #Scans to use in the next search iteration.
@files.each do |file|
+ file_name = file.chomp(".psms").split("_")[-1]
File.open(file, "r").each do |line|
parts = line.split("\t")
next if parts[0] == "PSMId"
id = parts[0].split(".")[1..3].join(".")
- score = parts[1]
qvalue = parts[2]
prob = parts[3]
- rest = parts[4...-1].join("\t")
+ peptide = parts[4]
+ proteins = parts[5...-1].join("\t")
- all_hits << [id, score, qvalue, prob, rest]
+ all_hits << [id, file_name, qvalue, prob, peptide.chomp, proteins.chomp]
end
end
- combined_hits = all_hits.sort_by {|x| x[0]}
+ combined_hits = all_hits.sort_by {|x| [x[0], x[4]]}
combined_file = "#{$path}../data/combined_#{@run}.psms"
File.open(combined_file, "w") do |file|
View
9 pipeline0.01/src/mzml_to_other.rb
@@ -6,29 +6,30 @@ class MzmlToOther
#type == The extension type, e.g. mgf
#file == A string containing the file location
#hardklor == true or false, whether or not to run Hardklor. Doesn't work with mzML.
- def initialize(type, file, hardklor)
+ def initialize(type, file, run, hardklor)
@type = type
@file = file
@hardklor = hardklor
+ @run = run
end
#Converts file into type. Determines whether to run convert_mzXML or convert_mzML
#based on the extension of file.
def convert
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Transforming #{File.extname(@file)} file to .#{@type} format..."
runHardklor if @hardklor && @file.downcase.include?(".mzxml")
if @type == "mgf" || @type == "ms2"
Ms::Msrun.open(@file) do |ms|
- file = @file.chomp(File.extname(@file)) + ".#{@type}"
+ file = @file.chomp(File.extname(@file)) + "_#{@run}.#{@type}"
File.open(file, 'w') do |f|
f.puts eval "ms.to_#{@type}"
end
end
else
- # If ms-msrun can't do it, then this probably will.
+ # If ms-msrun can't do it, then this probably will. Do something here to include run number.
system("/usr/local/src/tpp-4.3.1/build/linux/MzXML2Search -#{@type} #{@file}")
end
end
View
2  pipeline0.01/src/percolator.rb
@@ -11,7 +11,7 @@ def initialize(files, type)
#Runs percolator on the given files
def run
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Running Percolator...\n\n"
database = extractDatabase(@type).chomp("fasta") + "yml"
View
43 pipeline0.01/src/pipeline.rb
@@ -15,33 +15,46 @@
type = "human"
-#Main
+# This is the main class of the pipeline.
class Pipeline
- #This is the main class of the pipeline.
- #file == A string containing the location of the raw file
- #type == The type of input, e.g. human or bovin
- def initialize(file, type)
+ # file == A string containing the location of the raw file
+ # type == The type of input, e.g. human or bovin
+ def initialize(file, database)
@file = file
- @type = type
+ @database = database
end
def run
puts "\nHere we go!\n"
RawToMzml.new("#{@file}").to_mzML
- MzmlToOther.new("mgf", "#{@file}.mzML", false).convert
- MzmlToOther.new("ms2", "#{@file}.mzML", false).convert
- output = Search.new("#{@file}", @type, "trypsin", 1, :omssa => true, :xtandem => true, :tide => true, :mascot => true).run
- output = Percolator.new(output, @type).run
- #Refiner.new(output, 0, "#{@file}.mzML").refine
- a = "#{$path}../data/test_"
- file = Combiner.new(["#{a}tide_1.psms", "#{a}omssa_1.psms", "#{a}tandem_1.psms", "#{a}mascot_1.psms"], 1).combine
- Refiner.new(file, 0.5, "#{@file}.mzML").refine
+ [1,2,3,4].each do |i|
+ MzmlToOther.new("mgf", "#{@file}.mzML", i, false).convert
+ MzmlToOther.new("ms2", "#{@file}.mzML", i, false).convert
+ output = Search.new("#{@file}_#{i}", @database, "trypsin", :omssa => true, :xtandem => true, :tide => true, :mascot => true).run
+ output = Percolator.new(output, @database).run
+ GC.start
+ file = Combiner.new(output, i).combine
+ Refiner.new(file, 0.8, "#{@file}.mzML", i).refine
+ GC.start
+ end
+
+# RawToMzml.new("#{@file}").to_mzML
+# MzmlToOther.new("mgf", "#{@file}.mzML", 1, false).convert
+# MzmlToOther.new("ms2", "#{@file}.mzML", 1, false).convert
+# output = Search.new("#{@file}_1", @database, "trypsin", :omssa => true, :xtandem => true, :tide => true, :mascot => true).run
+# output = Percolator.new(output, @database).run
+# file = Combiner.new(output, 1).combine
+# Refiner.new(file, 0.9, "#{@file}.mzML", 1).refine
+
+# a = "#{$path}../data/test_1_"
+# file = Combiner.new(["#{a}tide.psms", "#{a}omssa.psms", "#{a}tandem.psms", "#{a}mascot.psms"], 1).combine
+# Refiner.new(file, 0.8, "#{@file}.mzML", 1).refine
notifyCompletion
end
- #Displays a randomly chosen exclamation of joy.
+ # Displays a randomly chosen exclamation of joy.
def notifyCompletion
done = rand(13)
puts "\nBoo-yah!" if done == 0
View
4 pipeline0.01/src/raw_to_mzml.rb
@@ -8,7 +8,7 @@ def initialize(file)
#Converts file to mzXML
def to_mzXML
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Transforming raw file to mzXML format...\n\n"
system("wine readw.exe --mzXML #{@file}.raw 2>/dev/null")
@@ -17,7 +17,7 @@ def to_mzXML
#Converts file to mzML. There must also be msconvert_server.rb running on
#a Windows machine with msconvert.exe for this to work.
def to_mzML
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Transforming raw file to mzML format...\n\n"
host = '192.168.101.180' #The address of the Windows machine I'm using.
View
11 pipeline0.01/src/refiner.rb
@@ -4,15 +4,16 @@ class Refiner
# file == combiner.rb output
# cutoff == The cutoff value. Only those above the cutoff value are kept
# mzFile == The mzML or mzXML file that was used
- def initialize(file, cutoff, mzFile)
+ def initialize(file, cutoff, mzFile, run)
@file = file
@cutoff = cutoff
@mzFile = mzFile
+ @run = run
end
# Determines which scans to include and creates a new (mgf and/or ms2) file.
def refine
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Refining search...\n"
write_to_msms(refine_scans)
@@ -38,9 +39,9 @@ def refine_scans
# Writes the given scans to mgf and ms2
def write_to_msms(selected_scans)
Ms::Msrun.open(@mzFile) do |ms|
- fName = @mzFile.chomp(File.extname(@mzFile))
- ms.to_mgf(:output => fName + ".ms2", :selected_scans => selected_scans)
- ms.to_ms2(:output => fName + ".ms2", :selected_scans => selected_scans)
+ fName = @mzFile.chomp(File.extname(@mzFile)) + "_#{@run + 1}"
+ ms.to_mgf(:output => fName + ".mgf", :included_scans => selected_scans)
+ ms.to_ms2(:output => fName + ".ms2", :included_scans => selected_scans)
end
end
end
View
27 pipeline0.01/src/search.rb
@@ -14,9 +14,8 @@ class Search
#enzyme == the enzyme to use in the search, e.g. trypsin
#run == which run, or iteration, this is
#opts: All option values are either true or false.
- def initialize(file, database, enzyme, run, opts={})
+ def initialize(file, database, enzyme, opts={})
@opts = opts
- @run = run
@enzyme = enzyme
@database = database
@file = file
@@ -26,7 +25,7 @@ def initialize(file, database, enzyme, run, opts={})
#Runs all the selected search engines and returns the names of the output files.
def run
- puts "\n----------------"
+ puts "\n--------------------------------"
puts "Running search engines...\n\n"
threads = []
@@ -78,10 +77,10 @@ def createTandemInput(decoy)
if decoy
notes['protein, taxon'] = "#{@database}-r"
- notes['output, path'] = "#{@file}-decoy_tandem_#{@run}.xml"
+ notes['output, path'] = "#{@file}-decoy_tandem.xml"
else
notes['protein, taxon'] = "#{@database}"
- notes['output, path'] = "#{@file}-target_tandem_#{@run}.xml"
+ notes['output, path'] = "#{@file}-target_tandem.xml"
end
xml.bioml do
@@ -94,8 +93,8 @@ def createTandemInput(decoy)
end
def runOMSSA
- target = "#{@file}-target_omssa_#{@run}.pep.xml"
- decoy = "#{@file}-decoy_omssa_#{@run}.pep.xml"
+ target = "#{@file}-target_omssa.pep.xml"
+ decoy = "#{@file}-decoy_omssa.pep.xml"
#Target search
exec("#{$path}../../omssa/omssacl -fm #{@file}.mgf -op #{target} -e #{getOMSSAEnzyme} -d #{extractDatabase(@database)}") if fork == nil
@@ -110,8 +109,8 @@ def runTide
database = extractDatabase(@database)
databaseR = extractDatabase(@database + "-r")
path = "#{$path}../../crux/tide/"
- tFile = "#{@file}-target_tide_#{@run}"
- dFile = "#{@file}-decoy_tide_#{@run}"
+ tFile = "#{@file}-target_tide"
+ dFile = "#{@file}-decoy_tide"
pidF = fork {exec("#{path}tide-index --fasta #{database} --enzyme #{@enzyme} --digestion full-digest")}
pidR = fork {exec("#{path}tide-index --fasta #{databaseR} --enzyme #{@enzyme} --digestion full-digest")}
@@ -159,7 +158,7 @@ def runMascot
end
threads.each {|thread| thread.join}
- @outputFiles << ["#{@file}-target_mascot_#{@run}.pep.xml", "#{@file}-decoy_mascot_#{@run}.pep.xml"]
+ @outputFiles << ["#{@file}-target_mascot.pep.xml", "#{@file}-decoy_mascot.pep.xml"]
end
#Not the best name. Just a factored-out method runMascot.
@@ -209,8 +208,8 @@ def automateMascot(a, page, yml, type)
form = export_page.form('Re-format')
form.field_with(:name => 'export_format').options[2].select
page = a.submit(form, form.buttons[1])
- File.open("#{@file}-target_mascot_#{@run}.pep.xml", 'w') {|f| f.write(page.body)} if type == :target
- File.open("#{@file}-decoy_mascot_#{@run}.pep.xml", 'w') {|f| f.write(page.body)} if type == :decoy
+ File.open("#{@file}-target_mascot.pep.xml", 'w') {|f| f.write(page.body)} if type == :target
+ File.open("#{@file}-decoy_mascot.pep.xml", 'w') {|f| f.write(page.body)} if type == :decoy
end
end
@@ -225,8 +224,8 @@ def runSpectraST
def convertTandemOutput
#Convert to pepXML format
- file1 = "#{@file}-target_tandem_#{@run}.xml"
- file2 = "#{@file}-decoy_tandem_#{@run}.xml"
+ file1 = "#{@file}-target_tandem.xml"
+ file2 = "#{@file}-decoy_tandem.xml"
pepFile1 = file1.chomp(".xml") + ".pep.xml"
pepFile2 = file2.chomp(".xml") + ".pep.xml"
@outputFiles << [pepFile1, pepFile2]
Please sign in to comment.
Something went wrong with that request. Please try again.