Skip to content

Commit

Permalink
Added more documentation and switched to yard's documentation format
Browse files Browse the repository at this point in the history
  • Loading branch information
JesseJ committed Aug 25, 2010
1 parent bd8c8d2 commit 8f0909b
Show file tree
Hide file tree
Showing 19 changed files with 230 additions and 128 deletions.
37 changes: 4 additions & 33 deletions KatamariDotei/bin/database_formatter.rb
Expand Up @@ -12,7 +12,10 @@
exit
end

#Performs all the necessary formatting of databases for the Pipeline.
# Performs all the necessary formatting of databases for the Pipeline.
#
# @author Jesse Jashinsky (Aug 2010)
# @todo Add function to take care of updating taxonomy.xml and creation of the reverse folder if none exists. Is it possible to automate the adding of the Mascot databases?
class DatabaseFormatter
#fileName == The name of the file in the databases folder
def initialize(fileName)
Expand All @@ -32,38 +35,6 @@ def formatDatabase

puts "Creating yaml files..."
exec("#{$path}../../ms-error_rate/bin/fasta_to_peptide_centric_db.rb #{targetDatabase}") if fork == nil
exec("#{$path}../../ms-error_rate/bin/fasta_to_peptide_centric_db.rb #{decoyDatabase}") if fork == nil

begin
Process.wait while true

rescue SystemCallError
end

# puts "Creating sqlite databases. You should go do something else, because this will take a while."
# create_database(targetDatabase.chomp("fasta"))
# create_database(targetDatabase.chomp("fasta.reverse"))
end

def create_database(name)
array = []

File.open(name + "yml", "r").each_line do |line|
parts = line.split(": ")
array << [parts[0], parts[1]]
end

db = Amalgalite::Database.new("#{name}db")
db.execute("CREATE TABLE peps(peptide, proteins)")
# array.each {|x| db.execute("INSERT INTO peps(peptide, proteins) VALUES('#{x[0]}', '#{x[1]}')"); p x}

db.prepare("INSERT INTO peps(peptide, proteins) VALUES(?, ?)") do |stmt|
array.each do |x|
p x
stmt.execute(x[0], x[1])
end
end

end
end

Expand Down
11 changes: 8 additions & 3 deletions KatamariDotei/lib/combiner.rb
@@ -1,16 +1,21 @@

# Combines multiple .psms files into one .psms file using ....
#
# @author Jesse Jashinsky (Aug 2010)
# @todo Create specialized statistical function for combining files. Currently employs averaging.
class Combiner
# files == Percolator.run output
# raw_name == The name of the raw file
# run = The run number
# @param [Array(String)] files Percolator.run output
# @param [String] raw_name the name of the raw file
# @param [String] run the run or iteration
def initialize(files, raw_name, run)
@files = files
@raw_name = raw_name
@run = run
end

# Combines the files and writes it to a new psms file, returns the new file name.
#
# @return [String] the combined psms file
def combine
puts "\n--------------------------------"
puts "Combining search engine hits...\n"
Expand Down
31 changes: 17 additions & 14 deletions KatamariDotei/lib/format/format.rb
@@ -1,14 +1,15 @@
require 'yaml'

module PercolatorInput #This is to prevent confusion between this Format and mzIdentML Format.
#A base class for other file formats to be used in Search2Tab. Other formats are meant to inherit from this class, thus Format is basically useless by itself.
#Contains methods that can be used by all formats.
#Takes strings containing the target and decoy output file locations and the forward and reverse FASTA databases.
module PercolatorInput # This is to prevent confusion between this Format and mzIdentML Format.
# A base class for other file formats to be used in Search2Tab. Other formats are meant to inherit from this class, thus Format is basically useless by itself.
# Contains methods that can be used by all formats.
# Takes strings containing the target and decoy output file locations and the forward and reverse FASTA databases.
#
# @author Jesse Jashinsky (Aug 2010)
class Format
#target == A string containing the file location of the target pepXML
#decoy == A string containing the file location of the decoy pepXML
#database == A hash of target {peptide => proteins}
#revDatabase == A hash of decoy {peptide => proteins}
# @param [String] target the file location of the target file
# @param [String] decoy the file location of the decoy file
# @param [Hash] proteins a hash of peptides to proteins
def initialize(target, decoy, proteins)
@target = target
@decoy = decoy
Expand All @@ -17,28 +18,30 @@ def initialize(target, decoy, proteins)
@proteins = proteins
end

#Returns the file name without things like "target" or ".pep.xml" in the name
# @return [String] file location without extension and target
def fileWithoutExtras
""
end

# @return [String] the target file location
def target
""
end

# @return [String] the decoy file location
def decoy
""
end

def scores
""
end

# @return [Array(String)] an array of spectral matches
def matches
[]
end

#Obtains all the proteins that the given peptide maps to.
# Obtains all the proteins that the given peptide maps to.
#
# @param [String] peptide the peptide
# @return [String] a tab seperated list of protein IDs
def proteins(peptide)
proteins = @proteins[peptide]
return proteins if proteins != nil
Expand Down
37 changes: 26 additions & 11 deletions KatamariDotei/lib/format/mzidentml.rb
Expand Up @@ -3,36 +3,41 @@

module PercolatorInput
# An mzIdentML Format object.
#
# @author Jesse Jashinsky (Aug 2010)
class MzIdentML < Format
# target == A string containing the file location of the target pepXML
# decoy == A string containing the file location of the decoy pepXML
# database == A hash of target {peptide => proteins}
# revDatabase == A hash of decoy {peptide => proteins}
def initialize(target, decoy, database)
# @param [String] target the file location of the target mzIdentML
# @param [String] decoy the file location of the decoy mzIdentML
# @param [Hash] proteins a hash of peptides to proteins
def initialize(target, decoy, proteins)
super
@fileName = ""
end

# This method can likely be simplified
# @return [String] file location without extension and target
def fileWithoutExtras
if @fileName == ""
parts = @target.split("/")[-1].split("-")
@fileName = "#{$path}../data/percolator/" + parts[0] + parts[1][6..parts[1].length-1].chomp(File.extname(@target))
@fileName = "#{$path}../data/percolator/" + parts[0] + parts[1][6..-1].chomp(File.extname(@target))
@peptides = {}
end

@fileName
end


# @return [String] the target file location
def target
@target
end

# @return [String] the decoy file location
def decoy
@decoy
end

# Creates and returns a header for the tab file.
#
# @return [String] the header
def header
temp = ""
result = "SpecId\tLabel\tCharge\t"
Expand All @@ -48,7 +53,8 @@ def header

result += "Peptide\t" + "Proteins"
end


# @return [Array(String)] an array of spectral matches
def matches
parse if @matches == []

Expand All @@ -58,7 +64,8 @@ def matches

private

# Returns a Nokogiri object
# @param [String] file the location of the pepXML file
# @return [Nokogiri] a Nokogiri object
def nokogiriDoc(file)
Nokogiri::XML(IO.read("#{file}"))
end
Expand Down Expand Up @@ -86,7 +93,13 @@ def parse
end
end

# Parses the pepXML file and returns an PSM object (A line for the .tab file)
# Parses the mzIdentML file and returns an PSM object (A line for the .tab file)
#
# @param [Nokogiri] doc the whole doc
# @param [Nokogiri] query a Nokogiri object from a search query
# @param [String] label a string of either a 1 or a -1
# @param [String] listNum can't seem to remmeber what this is
# @param [Number] rank the rank of the search hit
def psm(doc, query, label, listNum, rank)
#Required Stuff
hit = query.xpath(".//xmlns:SpectrumIdentificationItem[@id=\"SII_#{listNum}_#{rank}\"]")
Expand All @@ -113,6 +126,8 @@ def psm(doc, query, label, listNum, rank)
end

# Loads the peptides from the mzIdentML file into the peptides hash.
#
# @param [Nokogiri] doc the whole doc
def load_peptides(doc)
@peptides = {}
doc.xpath("//xmlns:Peptide").each do |peptide|
Expand Down
37 changes: 24 additions & 13 deletions KatamariDotei/lib/format/pepxml.rb
Expand Up @@ -3,32 +3,37 @@

module PercolatorInput
# A pepXML Format object.
#
# @author Jesse Jashinsky (Aug 2010)
class PepXML < Format
# target == A string containing the file location of the target pepXML
# decoy == A string containing the file location of the decoy pepXML
# database == A hash of target {peptide => proteins}
# revDatabase == A hash of decoy {peptide => proteins}
def initialize(target, decoy, database)
# @param [String] target the file location of the target pepXML
# @param [String] decoy the file location of the decoy pepXML
# @param [Hash] proteins a hash of peptides to proteins
def initialize(target, decoy, proteins)
super
end

#This method can likely be simplified
# @return [String] file location without extension and target
def fileWithoutExtras
parts = @target.split("/")[-1].split("-")
fileName = "#{$path}../data/percolator/" + parts[0] + parts[1][6..parts[1].length-1].chomp(".pep.xml")
fileName = "#{$path}../data/percolator/" + parts[0] + parts[1][6..-1].chomp(".pep.xml")

fileName
end


# @return [String] the target file location
def target
@target
end

# @return [String] the decoy file location
def decoy
@decoy
end

#Creates and returns a header for the tab file.
# Creates and returns a header for the tab file.
#
# @return [String] the header
def header
temp = ""
result = "SpecId\tLabel\tCharge\t"
Expand All @@ -44,7 +49,8 @@ def header

result += "Peptide\t" + "Proteins"
end


# @return [Array(String)] an array of spectral matches
def matches
parse if @matches == []

Expand All @@ -54,7 +60,8 @@ def matches

private

#Returns a Nokogiri object
# @param [String] file the location of the pepXML file
# @return [Nokogiri] a Nokogiri object
def nokogiriDoc(file)
doc = Nokogiri::XML(IO.read("#{file}"))

Expand All @@ -64,7 +71,7 @@ def nokogiriDoc(file)
doc
end

#Parses out everyhting in the pepXML file
# Parses out everyhting in the pepXML file
def parse
#Target
doc = nokogiriDoc(@target)
Expand All @@ -85,7 +92,11 @@ def parse
GC.start
end

#Parses the pepXML file and returns an PSM object (A line for the .tab file)
# Parses the pepXML file and returns an PSM object (A line for the .tab file)
#
# @param [Nokogiri] query a Nokogiri object from a search query
# @param [String] label a string of either a 1 or a -1
# @param [Number] rank the rank of the search hit
def psm(query, label, rank)
#Required Stuff
spect = query.xpath("./@spectrum").to_s.chomp(" ") #X! Tandem has a space at the end that messes things up
Expand Down
5 changes: 4 additions & 1 deletion KatamariDotei/lib/format/search2tab.rb
Expand Up @@ -2,8 +2,10 @@
require "#{$path}format/mzidentml.rb"

# Turns a search engine output (e.g. pepXML) into a tab-delimited file for Percolator.
#
# @author Jesse Jashinsky (Aug 2010)
class Search2Tab
# format == A Format object
# @param [Format] format a Format object
def initialize(format)
@format = format
end
Expand All @@ -14,6 +16,7 @@ def convert
tab = File.new("#{name}.tab", "w")
matches = @format.matches

# The tab files requires a header followed by spectral matches.
tab.puts @format.header
matches.each {|match| tab.puts match}

Expand Down
20 changes: 15 additions & 5 deletions KatamariDotei/lib/helper_methods.rb
@@ -1,6 +1,7 @@
#Methods that are needed by different classes.

#Prevents "no process" error
# Prevents "no process" error. Simply loops and waits for all processes to finish, then once it waits
# on a nonexistent process the rescue quietly catches the error, and things proceed as normal.
def waitForAllProcesses
begin
Process.wait while true
Expand All @@ -10,7 +11,9 @@ def waitForAllProcesses
end
end

#Prevents "no process" error
# Prevents "no process" error. Waits on a specific process, but if the process is already finished, then the error is quietly rescued.
#
# @param [Number] pid the pid, or Process ID
def waitForProcess(pid)
begin
waitpid(pid, 0)
Expand All @@ -19,20 +22,27 @@ def waitForProcess(pid)
end
end

#Obtains the file location based on the database type, such as "human" or "mouse"
# Obtains the file location of the FASTA database from the taxonomy file.
#
# @param [String] type the type of database, e.g. "human" or "mouse"
def extractDatabase(type)
Nokogiri::XML(IO.read("#{$path}../../databases/taxonomy.xml")).xpath("//taxon[@label=\"#{type}\"]//file/@URL").to_s
end

# Returns true if the string s is true, false otherwise.
# Converts a string that represents true or false into an actual true or false value.
#
# @return [Boolean] true if the string s is true, false otherwise.
def s_true(s)
s = s.strip.downcase

return true if s == "t" || s == "true"
false
end

# Takes an xpath string and returns the config value
# Refactored method for the xml config file.
#
# @param [String] path an xpath string
# @return [String] the config value
def config_value(path)
$config.xpath(path).to_s
end

0 comments on commit 8f0909b

Please sign in to comment.