Skip to content

Commit

Permalink
Prerelease version 0.0.2a. Refactors the complexity metric from a sim…
Browse files Browse the repository at this point in the history
…milarity metric to a dissimilarity metric, which better captures the uniqueness of a group of NGS reads.
  • Loading branch information
MatthewRalston committed Dec 5, 2015
1 parent 85031a8 commit df41d35
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 13 deletions.
4 changes: 2 additions & 2 deletions lib/NGSCI/calculator.rb
Expand Up @@ -126,7 +126,7 @@ def sci(reads)
reads = reads.group_by(&:start).map{|k,v| v.max{|x,y| x.length <=> y.length}}
d = summed_dissimilarity(reads)
uniquereads = reads.size
return [numreads,uniquereads,(d.to_f/@read_length).round(4),(uniquereads*d/@denominator).round(4)]
return [numreads,uniquereads,(d.to_f/@read_length).round(4),(100*uniquereads*d/@denominator).round(4)]
end


Expand Down Expand Up @@ -187,7 +187,7 @@ def max_summed_dissimilarity(read_length)
# Denomiator = read length * max_summed_dissimilarity / (read_length * read_length)
#
# @param [Integer] read_length The read length
# @return [Float] denominator The denominator including normalization factors for the complexity index
# @return [Float] denominator The denominator including normalization factors for the complexity index 349184
def denominator_calc(read_length)
read_length*max_summed_dissimilarity(read_length)
end
Expand Down
2 changes: 1 addition & 1 deletion lib/NGSCI/version.rb
@@ -1,3 +1,3 @@
module NGSCI
VERSION = "0.0.1.a"
VERSION = "0.0.2.a"
end
20 changes: 10 additions & 10 deletions spec/lib/calculator_spec.rb
Expand Up @@ -165,7 +165,7 @@
@read_length = 76
@calc = NGSCI::Calculator.new(testbam,testfasta)
end
it "returns a float denominator" do
it "returns a float" do
expect(@calc.max_summed_dissimilarity(@read_length)).to be_kind_of Integer
end
end
Expand Down Expand Up @@ -200,10 +200,9 @@ def tri(x,n=0)
end
context "when averaging per read" do
it "is equal to 1/3 times (read_length - 1)" do
(32..200).each do |x|
read_length = x
@calc = NGSCI::Calculator.new(testbam,testfasta)
calculated_max_summed_dissimilarity = @calc.max_summed_dissimilarity(read_length)/read_length/(read_length)
@calc = NGSCI::Calculator.new(testbam,testfasta)
(32..200).each do |read_length|
calculated_max_summed_dissimilarity = @calc.max_summed_dissimilarity(read_length)/(read_length*read_length)
expect(calculated_max_summed_dissimilarity).to eq((read_length-1)/3)
end
end
Expand All @@ -220,11 +219,12 @@ def tri(x,n=0)
read_length = 76
expect(@calc.denominator_calc(read_length)).to be_kind_of Integer
end
it "returns the correct denominator for each read length" do
(32..200).each do |x|
read_length = x
expect(@calc.denominator_calc(read_length)).to eq((read_length^3)*(read_length-1)/3 )
end
end
it "is the max_summed_dissimilarity * read length" do
@calc = NGSCI::Calculator.new(testbam,testfasta)
(32..200).each do |read_length|
max_sum_dissim = @calc.max_summed_dissimilarity(read_length)
expect(@calc.denominator_calc(read_length)).to eq(read_length*max_sum_dissim)
end
end
end
Expand Down

0 comments on commit df41d35

Please sign in to comment.