Skip to content

Commit

Permalink
Statsample::Test now working with daru
Browse files Browse the repository at this point in the history
  • Loading branch information
v0dro committed May 23, 2015
1 parent 3e4ce53 commit c88baf5
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 57 deletions.
16 changes: 8 additions & 8 deletions lib/statsample/test/levene.rb
Expand Up @@ -30,9 +30,9 @@ class Levene
# Input could be an array of vectors or a dataset
def initialize(input, opts=Hash.new())
if input.is_a? Daru::DataFrame
@vectors=input.vectors.values
@vectors = input.to_hash.values
else
@vectors=input
@vectors = input
end
@name=_("Levene Test")
opts.each{|k,v|
Expand All @@ -48,17 +48,18 @@ def report_building(builder) # :nodoc:
builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @d1, @d2, f, probability]
end
def compute
n=@vectors.inject(0) {|ac,v| ac+v.n_valid}
n=@vectors.inject(0) { |ac,v| ac + v.n_valid}

zi=@vectors.collect {|vector|
zi=@vectors.collect do |vector|
mean=vector.mean
Daru::Vector.new(vector.collect {|v| (v-mean).abs })
}
Daru::Vector.new(vector.collect { |v| (v - mean).abs })
end

total_mean = Daru::Vector.new(
zi.inject([]) do |ac,vector|
ac + vector.only_valid(:array)
end).mean
end
).mean

k = @vectors.size
sum_num = zi.inject(0) do |ac,vector|
Expand All @@ -82,7 +83,6 @@ def compute
def probability
p_using_cdf(Distribution::F.cdf(f, @d1, @d2), :right)
end

end
end
end
54 changes: 27 additions & 27 deletions lib/statsample/test/umannwhitney.rb
Expand Up @@ -116,33 +116,33 @@ def self.distribution_permutations(n1,n2)
# Params: Two Statsample::Vectors
#
def initialize(v1,v2, opts=Hash.new)
@v1=v1
@v2=v2
@n1=v1.valid_data.size
@n2=v2.valid_data.size
data=(v1.valid_data+v2.valid_data).to_numeric
groups=(([0]*@n1)+([1]*@n2)).to_vector
ds={'g'=>groups, 'data'=>data}.to_dataset
@t=nil
@ties=data.data.size!=data.data.uniq.size
if(@ties)
adjust_for_ties(ds['data'])
@v1 = v1
@v2 = v2
v1_valid = v1.only_valid.reset_index!
v2_valid = v2.only_valid.reset_index!
@n1 = v1_valid.size
@n2 = v2_valid.size
data = Daru::Vector.new(v1_valid.to_a + v2_valid.to_a)
groups = Daru::Vector.new(([0] * @n1) + ([1] * @n2))
ds = Daru::DataFrame.new({:g => groups, :data => data})
@t = nil
@ties = data.to_a.size != data.to_a.uniq.size
if @ties
adjust_for_ties(ds[:data])
end
ds['ranked']=ds['data'].ranked(:numeric)

@n=ds.cases
ds[:ranked] = ds[:data].ranked
@n = ds.nrows

@r1=ds.filter{|r| r['g']==0}['ranked'].sum
@r2=((ds.cases*(ds.cases+1)).quo(2))-r1
@u1=r1-((@n1*(@n1+1)).quo(2))
@u2=r2-((@n2*(@n2+1)).quo(2))
@u=(u1<u2) ? u1 : u2
opts_default={:name=>_("Mann-Whitney's U")}
@opts=opts_default.merge(opts)
@r1 = ds.filter_rows { |r| r[:g] == 0}[:ranked].sum
@r2 = ((ds.nrows * (ds.nrows + 1)).quo(2)) - r1
@u1 = r1 - ((@n1 * (@n1 + 1)).quo(2))
@u2 = r2 - ((@n2 * (@n2 + 1)).quo(2))
@u = (u1 < u2) ? u1 : u2
opts_default = { :name=>_("Mann-Whitney's U") }
@opts = opts_default.merge(opts)
opts_default.keys.each {|k|
send("#{k}=", @opts[k])
}

}
end
def report_building(generator) # :nodoc:
generator.section(:name=>@name) do |s|
Expand All @@ -160,8 +160,8 @@ def report_building(generator) # :nodoc:
# Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000.
# Uses u_sampling_distribution_as62
def probability_exact
dist=UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
sum=0
dist = UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
sum = 0
(0..@u.to_i).each {|i|
sum+=dist[i]
}
Expand All @@ -172,8 +172,8 @@ def probability_exact
# == Reference:
# * http://europe.isixsigma.com/library/content/c080806a.asp
def adjust_for_ties(data)
@t=data.frequencies.find_all{|k,v| v>1}.inject(0) {|a,v|
a+(v[1]**3-v[1]).quo(12)
@t = data.frequencies.find_all { |k,v| v > 1 }.inject(0) { |a,v|
a + (v[1]**3 - v[1]).quo(12)
}
end

Expand Down
2 changes: 1 addition & 1 deletion test/test_reliability_icc.rb
Expand Up @@ -114,7 +114,7 @@ class StatsampleReliabilityIccTestCase < Minitest::Test

begin
require 'rserve'
require 'statsample/rserve_extension'
require 'daru/extensions/rserve'
context 'McGraw and Wong' do
teardown do
@r = $reliability_icc[:r].close unless $reliability_icc[:r].nil?
Expand Down
18 changes: 9 additions & 9 deletions test/test_stest.rb
Expand Up @@ -24,26 +24,26 @@ def test_chi_square_matrix_only_observed
end

def test_u_mannwhitney
a = [1, 2, 3, 4, 5, 6].to_numeric
b = [0, 5, 7, 9, 10, 11].to_numeric
a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
b = Daru::Vector.new([0, 5, 7, 9, 10, 11])
assert_equal(7.5, Statsample::Test.u_mannwhitney(a, b).u)
assert_equal(7.5, Statsample::Test.u_mannwhitney(b, a).u)
a = [1, 7, 8, 9, 10, 11].to_numeric
b = [2, 3, 4, 5, 6, 12].to_numeric
a = Daru::Vector.new([1, 7, 8, 9, 10, 11])
b = Daru::Vector.new([2, 3, 4, 5, 6, 12])
assert_equal(11, Statsample::Test.u_mannwhitney(a, b).u)
end

def test_levene
a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10].to_numeric
b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120].to_numeric
a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
levene = Statsample::Test::Levene.new([a, b])
assert_levene(levene)
end

def test_levene_dataset
a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10].to_numeric
b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120].to_numeric
ds = { 'a' => a, 'b' => b }.to_dataset
a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
ds = Daru::DataFrame.new({ :a => a, :b => b })
levene = Statsample::Test::Levene.new(ds)
assert_levene(levene)
end
Expand Down
24 changes: 12 additions & 12 deletions test/test_test_t.rb
Expand Up @@ -4,24 +4,24 @@ class StatsampleTestTTestCase < Minitest::Test
include Math
context T do
setup do
@a = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99].to_numeric
@b = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98].to_numeric
@a = Daru::Vector.new([30.02, 29.99, 30.11, 29.97, 30.01, 29.99])
@b = Daru::Vector.new([29.89, 29.93, 29.72, 29.98, 30.02, 29.98])
@x1 = @a.mean
@x2 = @b.mean
@s1 = @a.sd
@s2 = @b.sd
@n1 = @a.n
@n2 = @b.n
@n1 = @a.size
@n2 = @b.size
end
should 'calculate correctly standard t' do
t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.n)), @a.n - 1)
assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.n))), t.t)
assert_equal(@a.n - 1, t.df)
t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.size)), @a.size - 1)
assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.size))), t.t)
assert_equal(@a.size - 1, t.df)
assert(t.summary.size > 0)
end
should 'calculate correctly t for one sample' do
t1 = [6, 4, 6, 7, 4, 5, 5, 12, 6, 1].to_numeric
t2 = [9, 6, 5, 10, 10, 8, 7, 10, 6, 5].to_numeric
t1 = Daru::Vector.new([6, 4, 6, 7, 4, 5, 5, 12, 6, 1])
t2 = Daru::Vector.new([9, 6, 5, 10, 10, 8, 7, 10, 6, 5])
d = t1 - t2
t = Statsample::Test::T::OneSample.new(d)
assert_in_delta(-2.631, t.t, 0.001)
Expand All @@ -48,14 +48,14 @@ class StatsampleTestTTestCase < Minitest::Test
assert_in_delta(0.09095, t.probability_not_equal_variance, 0.001)
end
should 'be the same using shorthand' do
v = 100.times.map { rand(100) }.to_numeric
v = Daru::Vector.new(100.times.map { rand(100) })
assert_equal(Statsample::Test.t_one_sample(v).t, T::OneSample.new(v).t)
end
should 'calculate all values for one sample T test' do
u = @a.mean + (1 - rand * 2)
tos = T::OneSample.new(@a, u: u)
assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.n))), tos.t)
assert_equal(@a.n - 1, tos.df)
assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.size))), tos.t)
assert_equal(@a.size - 1, tos.df)
assert(tos.summary.size > 0)
end
end
Expand Down

0 comments on commit c88baf5

Please sign in to comment.