diff --git a/lib/statsample/bivariate.rb b/lib/statsample/bivariate.rb index b84f915..d0e1572 100644 --- a/lib/statsample/bivariate.rb +++ b/lib/statsample/bivariate.rb @@ -156,15 +156,14 @@ def covariance_matrix_optimized(ds) # Order of rows and columns depends on Dataset#fields order def covariance_matrix(ds) - vars,cases=ds.fields.size,ds.cases + vars,cases = ds.vectors.size, ds.nrows if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases) cm=covariance_matrix_optimized(ds) else cm=covariance_matrix_pairwise(ds) - end cm.extend(Statsample::CovariateMatrix) - cm.fields=ds.fields + cm.fields=ds.vectors.to_a cm end @@ -243,14 +242,19 @@ def correlation_matrix_pairwise(ds) # Retrieves the n valid pairwise. def n_valid_matrix(ds) - ds.collect_matrix do |row,col| - if row==col - ds[row].valid_data.size - else - rowa,rowb=Statsample.only_valid_clone(ds[row],ds[col]) - rowa.size + vectors = ds.vectors.to_a + m = vectors.collect do |row| + vectors.collect do |col| + if row==col + ds[row].only_valid.size + else + rowa,rowb = Statsample.only_valid_clone(ds[row],ds[col]) + rowa.size + end end end + + Matrix.rows m end # Matrix of correlation probabilities. @@ -384,8 +388,8 @@ def sum_of_codeviated(v1,v2) # Report the minimum number of cases valid of a covariate matrix # based on a dataset def min_n_valid(ds) - min=ds.cases - m=n_valid_matrix(ds) + min = ds.nrows + m = n_valid_matrix(ds) for x in 0...m.row_size for y in 0...m.column_size min=m[x,y] if m[x,y] < min @@ -393,8 +397,6 @@ def min_n_valid(ds) end min end - - end end end diff --git a/lib/statsample/daru.rb b/lib/statsample/daru.rb index 029a0fc..8764b9b 100644 --- a/lib/statsample/daru.rb +++ b/lib/statsample/daru.rb @@ -26,13 +26,7 @@ def to_multiset_by_split_one_field(field) #puts "Ingreso a los dataset" ms.datasets.each do |k,ds| ds.update - # puts "idx #{self[field].index_of(k)}" ds.rename self[field].index_of(k) - # ds.vectors.each do |k1,v1| - # v1.type = self[k1].type - # v1.name = self[k1].name - # v1.labels = self[k1].to_hash - # end end ms @@ -69,12 +63,6 @@ def to_multiset_by_split_multiple_fields(*fields) self[f].index_of(sk) end.join("-") ) - - # ds.vectors.each do |k1,v1| - # v1.type = ds[k1].type - # v1.name = ds[k1].name - # v1.labels = ds[k1].to_hash - # end end ms end diff --git a/lib/statsample/regression/multiple/baseengine.rb b/lib/statsample/regression/multiple/baseengine.rb index 5f287a1..f2fdf82 100644 --- a/lib/statsample/regression/multiple/baseengine.rb +++ b/lib/statsample/regression/multiple/baseengine.rb @@ -19,13 +19,12 @@ def self.univariate? end def initialize(ds, y_var, opts = Hash.new) @ds=ds - @predictors_n=@ds.fields.size-1 - @total_cases=@ds.cases - @cases=@ds.cases + @predictors_n=@ds.vectors.size-1 + @total_cases=@ds.nrows + @cases=@ds.nrows @y_var=y_var @r2=nil - @name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var] - + @name=_("Multiple Regression: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var] opts_default={:digits=>3} @opts=opts_default.merge opts @@ -33,7 +32,6 @@ def initialize(ds, y_var, opts = Hash.new) @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } - end # Calculate F Test def anova @@ -45,15 +43,17 @@ def se_estimate end # Retrieves a vector with predicted values for y def predicted - @total_cases.times.collect { |i| - invalid=false - vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]} - if invalid - nil - else - process(vect) + Daru::Vector.new( + @total_cases.times.collect do |i| + invalid = false + vect = @dep_columns.collect {|v| invalid = true if v[i].nil?; v[i]} + if invalid + nil + else + process(vect) + end end - }.to_vector(:numeric) + ) end # Retrieves a vector with standarized values for y def standarized_predicted @@ -61,15 +61,17 @@ def standarized_predicted end # Retrieves a vector with residuals values for y def residuals - (0...@total_cases).collect{|i| - invalid=false - vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]} - if invalid or @ds[@y_var][i].nil? - nil - else - @ds[@y_var][i] - process(vect) + Daru::Vector.new( + (0...@total_cases).collect do |i| + invalid=false + vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]} + if invalid or @ds[@y_var][i].nil? + nil + else + @ds[@y_var][i] - process(vect) + end end - }.to_vector(:numeric) + ) end # R Multiple def r @@ -131,12 +133,10 @@ def probability # Tolerance for a given variable # http://talkstats.com/showthread.php?t=5056 def tolerance(var) - ds=assign_names(@dep_columns) - ds.each{|k,v| - ds[k]=v.to_vector(:numeric) - } - lr=self.class.new(ds.to_dataset,var) - 1-lr.r2 + ds = assign_names(@dep_columns) + ds.each { |k,v| ds[k] = Daru::Vector.new(v) } + lr = self.class.new(Daru::DataFrame.new(ds),var) + 1 - lr.r2 end # Tolerances for each coefficient def coeffs_tolerances @@ -165,12 +165,12 @@ def se_r2 def estimated_variance_covariance_matrix #mse_p=mse columns=[] - @ds_valid.fields.each{|k| - v=@ds_valid[k] - columns.push(v.data) unless k==@y_var + @ds_valid.vectors.each{|k| + v = @ds_valid[k] + columns.push(v.to_a) unless k == @y_var } columns.unshift([1.0]*@valid_cases) - x=Matrix.columns(columns) + x=::Matrix.columns(columns) matrix=((x.t*x)).inverse * mse matrix.collect {|i| Math::sqrt(i) if i>=0 } end diff --git a/lib/statsample/regression/multiple/gslengine.rb b/lib/statsample/regression/multiple/gslengine.rb index ad6492c..5dc57ca 100644 --- a/lib/statsample/regression/multiple/gslengine.rb +++ b/lib/statsample/regression/multiple/gslengine.rb @@ -19,33 +19,34 @@ module Multiple class GslEngine < BaseEngine def initialize(ds,y_var, opts=Hash.new) super - @ds=ds.dup_only_valid - @ds_valid=@ds - @valid_cases=@ds_valid.cases - @dy=@ds[@y_var] - @ds_indep=ds.dup(ds.fields-[y_var]) + @ds = ds.dup_only_valid + @ds_valid = @ds + @valid_cases = @ds_valid.nrows + @dy = @ds[@y_var] + @ds_indep = ds.dup(ds.vectors.to_a - [y_var]) # Create a custom matrix columns=[] @fields=[] - max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size) - constant_col=@ds.fields.size-1 - for i in 0...@ds.cases + max_deps = GSL::Matrix.alloc(@ds.nrows, @ds.vectors.size) + constant_col=@ds.vectors.size-1 + for i in 0...@ds.nrows max_deps.set(i,constant_col,1) end - j=0 - @ds.fields.each{|f| - if f!=@y_var - @ds[f].each_index{|i1| + j = 0 + @ds.vectors.each do |f| + if f != @y_var + @ds[f].each_index do |i1| max_deps.set(i1,j,@ds[f][i1]) - } + end + columns.push(@ds[f].to_a) @fields.push(f) - j+=1 + j += 1 end - } - @dep_columns=columns.dup - @lr_s=nil - c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl) + end + @dep_columns = columns.dup + @lr_s = nil + c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.to_gsl) @constant=c[constant_col] @coeffs_a=c.to_a.slice(0...constant_col) @coeffs=assign_names(@coeffs_a) @@ -97,7 +98,7 @@ def lr_s @lr_s end def build_standarized - @ds_s=@ds.standarize + @ds_s=@ds.standardize @lr_s=GslEngine.new(@ds_s,@y_var) end def process_s(v) @@ -114,17 +115,15 @@ def standarized_residuals # Standard error for coeffs def coeffs_se - out={} - evcm=estimated_variance_covariance_matrix - @ds_valid.fields.each_with_index do |f,i| - - mi=i+1 - next if f==@y_var - out[f]=evcm[mi,mi] + out = {} + evcm = estimated_variance_covariance_matrix + @ds_valid.vectors.to_a.each_with_index do |f,i| + mi = i+1 + next if f == @y_var + out[f] = evcm[mi,mi] end out end - end end end diff --git a/lib/statsample/regression/multiple/matrixengine.rb b/lib/statsample/regression/multiple/matrixengine.rb index 86ddc52..9c780f3 100644 --- a/lib/statsample/regression/multiple/matrixengine.rb +++ b/lib/statsample/regression/multiple/matrixengine.rb @@ -59,8 +59,6 @@ def initialize(matrix,y_var, opts=Hash.new) @matrix_y = @matrix_cor.submatrix(@fields, [y_var]) @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var]) - - @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0]) @x_sd=@n_predictors.times.inject({}) {|ac,i| @@ -77,14 +75,14 @@ def initialize(matrix,y_var, opts=Hash.new) @y_mean=0.0 @name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var] - opts_default={:digits=>3} - opts=opts_default.merge opts + opts_default = {:digits=>3} + opts = opts_default.merge opts opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } result_matrix=@matrix_x_cov.inverse * @matrix_y_cov - if matrix._type==:covariance + if matrix._type == :covariance @coeffs=result_matrix.column(0).to_a @coeffs_stan=coeffs.collect {|k,v| coeffs[k]*@x_sd[k].quo(@y_sd) @@ -116,12 +114,12 @@ def r end # Value of constant def constant - c=coeffs - @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])} + c = coeffs + @y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])} end # Hash of b or raw coefficients def coeffs - assign_names(@coeffs) + assign_names(@coeffs) end # Hash of beta or standarized coefficients @@ -185,7 +183,7 @@ def constant_se sd[:constant]=0 fields=[:constant]+@matrix_cov.fields-[@y_var] # Recreate X'X using the variance-covariance matrix - xt_x=Matrix.rows(fields.collect {|i| + xt_x=::Matrix.rows(fields.collect {|i| fields.collect {|j| if i==:constant or j==:constant cov=0 diff --git a/lib/statsample/regression/multiple/rubyengine.rb b/lib/statsample/regression/multiple/rubyengine.rb index 13c1718..f856aad 100644 --- a/lib/statsample/regression/multiple/rubyengine.rb +++ b/lib/statsample/regression/multiple/rubyengine.rb @@ -17,67 +17,64 @@ module Multiple class RubyEngine < MatrixEngine def initialize(ds,y_var, opts=Hash.new) - matrix=ds.correlation_matrix - fields_indep=ds.fields-[y_var] - default={ - :y_mean=>ds[y_var].mean, - :x_mean=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac}, - :y_sd=>ds[y_var].sd, - :x_sd=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac}, - :cases=>Statsample::Bivariate.min_n_valid(ds) + matrix = Statsample::Bivariate.correlation_matrix ds + fields_indep=ds.vectors.to_a - [y_var] + default= { + :y_mean => ds[y_var].mean, + :x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac}, + :y_sd => ds[y_var].sd, + :x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac}, + :cases => Statsample::Bivariate.min_n_valid(ds) } - opts=opts.merge(default) + opts = opts.merge(default) super(matrix, y_var, opts) - @ds=ds - @dy=ds[@y_var] - @ds_valid=ds.dup_only_valid - @total_cases=@ds.cases - @valid_cases=@ds_valid.cases - @ds_indep = ds.dup(ds.fields-[y_var]) + @ds = ds + @dy = ds[@y_var] + @ds_valid = ds.dup_only_valid + @total_cases = @ds.nrows + @valid_cases = @ds_valid.nrows + @ds_indep = ds.dup(ds.vectors.to_a - [y_var]) set_dep_columns end def set_dep_columns - @dep_columns=[] - @ds_indep.each_vector{|k,v| - @dep_columns.push(v.data_with_nils) - } + @dep_columns = [] + @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) } end def fix_with_mean i=0 - @ds_indep.each do |row| + @ds_indep.each(:row) do |row| empty=[] row.each do |k,v| empty.push(k) if v.nil? end + if empty.size==1 @ds_indep[empty[0]][i]=@ds[empty[0]].mean end - i+=1 + i += 1 end - @ds_indep.update_valid_data + @ds_indep.update set_dep_columns end def fix_with_regression - i=0 - @ds_indep.each{|row| - empty=[] - row.each{|k,v| - empty.push(k) if v.nil? - } + i = 0 + @ds_indep.each(:row) do |row| + empty = [] + row.each { |k,v| empty.push(k) if v.nil? } if empty.size==1 - field=empty[0] - lr=MultipleRegression.new(@ds_indep,field) - fields=[] + field = empty[0] + lr = MultipleRegression.new(@ds_indep,field) + fields = [] @ds_indep.fields.each{|f| - fields.push(row[f]) unless f==field + fields.push(row[f]) unless f == field } @ds_indep[field][i]=lr.process(fields) end i+=1 - } - @ds_indep.update_valid_data + end + @ds_indep.update set_dep_columns end # Standard error for constant diff --git a/test/test_multiset.rb b/test/test_multiset.rb index 7543ac0..0e47477 100644 --- a/test/test_multiset.rb +++ b/test/test_multiset.rb @@ -127,7 +127,6 @@ def test_each } xp, yp, zp = {}, {}, {} @ms.each {|k, ds| - # puts "k #{k} ds #{ds}" xp[k] = ds[:x] yp[k] = ds[:y] zp[k] = ds[:z] diff --git a/test/test_regression.rb b/test/test_regression.rb index d973e79..8c23bc0 100644 --- a/test/test_regression.rb +++ b/test/test_regression.rb @@ -3,21 +3,21 @@ class StatsampleRegressionTestCase < Minitest::Test context 'Example with missing data' do setup do - @x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_numeric + @x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857]) - @y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_numeric - @ds = { 'x' => @x, 'y' => @y }.to_dataset - @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, 'y') + @y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil]) + @ds = Daru::DataFrame.new({ :x => @x, :y => @y }) + @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y) end should 'have correct values' do assert_in_delta(0.455, @lr.r2, 0.001) assert_in_delta(0.427, @lr.r2_adjusted, 0.001) assert_in_delta(0.1165, @lr.se_estimate, 0.001) assert_in_delta(15.925, @lr.f, 0.0001) - assert_in_delta(0.675, @lr.standarized_coeffs['x'], 0.001) - assert_in_delta(0.778, @lr.coeffs['x'], 0.001, 'coeff x') + assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001) + assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x') assert_in_delta(0.132, @lr.constant, 0.001, 'constant') - assert_in_delta(0.195, @lr.coeffs_se['x'], 0.001, 'coeff x se') + assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se') assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se') end end @@ -26,24 +26,24 @@ class StatsampleRegressionTestCase < Minitest::Test a, b = rand, rand - x1 = samples.times.map { rand }.to_numeric - x2 = samples.times.map { rand }.to_numeric - x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.to_numeric - y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.to_numeric + x1 = Daru::Vector.new(samples.times.map { rand }) + x2 = Daru::Vector.new(samples.times.map { rand }) + x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }) + y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }) - ds = { 'x1' => x1, 'x2' => x2, 'x3' => x3, 'y' => y }.to_dataset + ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y }) assert_raise(Statsample::Regression::LinearDependency) { - Statsample::Regression::Multiple::RubyEngine.new(ds, 'y') + Statsample::Regression::Multiple::RubyEngine.new(ds, :y) } end def test_parameters - @x = [13, 20, 10, 33, 15].to_vector(:numeric) - @y = [23, 18, 35, 10, 27].to_vector(:numeric) + @x =Daru::Vector.new([13, 20, 10, 33, 15]) + @y =Daru::Vector.new([23, 18, 35, 10, 27]) reg = Statsample::Regression::Simple.new_from_vectors(@x, @y) _test_simple_regression(reg) - ds = { 'x' => @x, 'y' => @y }.to_dataset - reg = Statsample::Regression::Simple.new_from_dataset(ds, 'x', 'y') + ds = Daru::DataFrame.new({ :x => @x, :y => @y }) + reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y) _test_simple_regression(reg) reg = Statsample::Regression.simple(@x, @y) _test_simple_regression(reg) @@ -57,11 +57,11 @@ def _test_simple_regression(reg) end def test_summaries - a = 10.times.map { rand(100) }.to_numeric - b = 10.times.map { rand(100) }.to_numeric - y = 10.times.map { rand(100) }.to_numeric - ds = { 'a' => a, 'b' => b, 'y' => y }.to_dataset - lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y') + a = Daru::Vector.new(10.times.map { rand(100) }) + b = Daru::Vector.new(10.times.map { rand(100) }) + y = Daru::Vector.new(10.times.map { rand(100) }) + ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y }) + lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y) assert(lr.summary.size > 0) end @@ -87,12 +87,12 @@ def test_multiple_dependent end def test_multiple_regression_pairwise_2 - @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:numeric) - @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:numeric) - @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:numeric) - @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:numeric) - ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset - lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y') + @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3]) + @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2]) + @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7]) + @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil]) + ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y }) + lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y) assert_in_delta(2407.436, lr.sst, 0.001) assert_in_delta(0.752, lr.r, 0.001, 'pairwise r') assert_in_delta(0.565, lr.r2, 0.001) @@ -103,12 +103,12 @@ def test_multiple_regression_pairwise_2 def test_multiple_regression_gsl if Statsample.has_gsl? - @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric) - @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric) - @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric) - @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric) - ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset - lr = Statsample::Regression::Multiple::GslEngine.new(ds, 'y') + @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7]) + @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4]) + @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]) + @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30]) + ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y }) + lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y) assert(lr.summary.size > 0) model_test(lr, 'gsl') predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198] @@ -127,8 +127,8 @@ def test_multiple_regression_gsl end def model_test_matrix(lr, name = 'undefined') - stan_coeffs = { 'a' => 0.151, 'b' => -0.547, 'c' => 0.997 } - unstan_coeffs = { 'a' => 0.695, 'b' => -4.286, 'c' => 0.266 } + stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 } + unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 } unstan_coeffs.each_key{|k| assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}") @@ -145,15 +145,15 @@ def model_test_matrix(lr, name = 'undefined') assert_in_delta(20.908, lr.f, 0.001) assert_in_delta(0.001, lr.probability, 0.001) - assert_in_delta(0.226, lr.tolerance('a'), 0.001) + assert_in_delta(0.226, lr.tolerance(:a), 0.001) - coeffs_se = { 'a' => 1.171, 'b' => 1.129, 'c' => 0.072 } + coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 } ccoeffs_se = lr.coeffs_se coeffs_se.each_key{|k| assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001) } - coeffs_t = { 'a' => 0.594, 'b' => -3.796, 'c' => 3.703 } + coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 } ccoeffs_t = lr.coeffs_t coeffs_t.each_key{|k| assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001) @@ -174,32 +174,37 @@ def model_test(lr, name = 'undefined') end def test_regression_matrix - @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric) - @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric) - @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric) - @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric) - ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset + @a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7]) + @b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4]) + @c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100]) + @y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30]) + ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y }) cor = Statsample::Bivariate.correlation_matrix(ds) - lr = Statsample::Regression::Multiple::MatrixEngine.new(cor, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size, y_sd: @y.sd, x_sd: { 'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd }) + lr = Statsample::Regression::Multiple::MatrixEngine.new( + cor, :y, y_mean: @y.mean, + x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, + cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd }) assert_nil(lr.constant_se) assert_nil(lr.constant_t) model_test_matrix(lr, 'correlation matrix') covariance = Statsample::Bivariate.covariance_matrix(ds) - lr = Statsample::Regression::Multiple::MatrixEngine.new(covariance, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size) + lr = Statsample::Regression::Multiple::MatrixEngine.new( + covariance, :y, y_mean: @y.mean, + x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size) assert(lr.summary.size > 0) model_test(lr, 'covariance matrix') end def test_regression_rubyengine - @a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric) - @b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric) - @c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric) - @y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric) - ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset - lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y') + @a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7]) + @b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4]) + @c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100]) + @y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30]) + ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y }) + lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y) assert_equal(11, lr.total_cases) assert_equal(10, lr.valid_cases) model_test(lr, 'rubyengine with missing data')