Skip to content

Commit

Permalink
Statsample::Regression now works with daru
Browse files Browse the repository at this point in the history
  • Loading branch information
v0dro committed May 21, 2015
1 parent 915a113 commit f11d2f4
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 180 deletions.
28 changes: 15 additions & 13 deletions lib/statsample/bivariate.rb
Expand Up @@ -156,15 +156,14 @@ def covariance_matrix_optimized(ds)
# Order of rows and columns depends on Dataset#fields order

def covariance_matrix(ds)
vars,cases=ds.fields.size,ds.cases
vars,cases = ds.vectors.size, ds.nrows
if !ds.has_missing_data? and Statsample.has_gsl? and prediction_optimized(vars,cases) < prediction_pairwise(vars,cases)
cm=covariance_matrix_optimized(ds)
else
cm=covariance_matrix_pairwise(ds)

end
cm.extend(Statsample::CovariateMatrix)
cm.fields=ds.fields
cm.fields=ds.vectors.to_a
cm
end

Expand Down Expand Up @@ -243,14 +242,19 @@ def correlation_matrix_pairwise(ds)

# Retrieves the n valid pairwise.
def n_valid_matrix(ds)
ds.collect_matrix do |row,col|
if row==col
ds[row].valid_data.size
else
rowa,rowb=Statsample.only_valid_clone(ds[row],ds[col])
rowa.size
vectors = ds.vectors.to_a
m = vectors.collect do |row|
vectors.collect do |col|
if row==col
ds[row].only_valid.size
else
rowa,rowb = Statsample.only_valid_clone(ds[row],ds[col])
rowa.size
end
end
end

Matrix.rows m
end

# Matrix of correlation probabilities.
Expand Down Expand Up @@ -384,17 +388,15 @@ def sum_of_codeviated(v1,v2)
# Report the minimum number of cases valid of a covariate matrix
# based on a dataset
def min_n_valid(ds)
min=ds.cases
m=n_valid_matrix(ds)
min = ds.nrows
m = n_valid_matrix(ds)
for x in 0...m.row_size
for y in 0...m.column_size
min=m[x,y] if m[x,y] < min
end
end
min
end


end
end
end
Expand Down
12 changes: 0 additions & 12 deletions lib/statsample/daru.rb
Expand Up @@ -26,13 +26,7 @@ def to_multiset_by_split_one_field(field)
#puts "Ingreso a los dataset"
ms.datasets.each do |k,ds|
ds.update
# puts "idx #{self[field].index_of(k)}"
ds.rename self[field].index_of(k)
# ds.vectors.each do |k1,v1|
# v1.type = self[k1].type
# v1.name = self[k1].name
# v1.labels = self[k1].to_hash
# end
end

ms
Expand Down Expand Up @@ -69,12 +63,6 @@ def to_multiset_by_split_multiple_fields(*fields)
self[f].index_of(sk)
end.join("-")
)

# ds.vectors.each do |k1,v1|
# v1.type = ds[k1].type
# v1.name = ds[k1].name
# v1.labels = ds[k1].to_hash
# end
end
ms
end
Expand Down
64 changes: 32 additions & 32 deletions lib/statsample/regression/multiple/baseengine.rb
Expand Up @@ -19,21 +19,19 @@ def self.univariate?
end
def initialize(ds, y_var, opts = Hash.new)
@ds=ds
@predictors_n=@ds.fields.size-1
@total_cases=@ds.cases
@cases=@ds.cases
@predictors_n=@ds.vectors.size-1
@total_cases=@ds.nrows
@cases=@ds.nrows
@y_var=y_var
@r2=nil
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]

@name=_("Multiple Regression: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]

opts_default={:digits=>3}
@opts=opts_default.merge opts

@opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}

end
# Calculate F Test
def anova
Expand All @@ -45,31 +43,35 @@ def se_estimate
end
# Retrieves a vector with predicted values for y
def predicted
@total_cases.times.collect { |i|
invalid=false
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
if invalid
nil
else
process(vect)
Daru::Vector.new(
@total_cases.times.collect do |i|
invalid = false
vect = @dep_columns.collect {|v| invalid = true if v[i].nil?; v[i]}
if invalid
nil
else
process(vect)
end
end
}.to_vector(:numeric)
)
end
# Retrieves a vector with standarized values for y
def standarized_predicted
predicted.standarized
end
# Retrieves a vector with residuals values for y
def residuals
(0...@total_cases).collect{|i|
invalid=false
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
if invalid or @ds[@y_var][i].nil?
nil
else
@ds[@y_var][i] - process(vect)
Daru::Vector.new(
(0...@total_cases).collect do |i|
invalid=false
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
if invalid or @ds[@y_var][i].nil?
nil
else
@ds[@y_var][i] - process(vect)
end
end
}.to_vector(:numeric)
)
end
# R Multiple
def r
Expand Down Expand Up @@ -131,12 +133,10 @@ def probability
# Tolerance for a given variable
# http://talkstats.com/showthread.php?t=5056
def tolerance(var)
ds=assign_names(@dep_columns)
ds.each{|k,v|
ds[k]=v.to_vector(:numeric)
}
lr=self.class.new(ds.to_dataset,var)
1-lr.r2
ds = assign_names(@dep_columns)
ds.each { |k,v| ds[k] = Daru::Vector.new(v) }
lr = self.class.new(Daru::DataFrame.new(ds),var)
1 - lr.r2
end
# Tolerances for each coefficient
def coeffs_tolerances
Expand Down Expand Up @@ -165,12 +165,12 @@ def se_r2
def estimated_variance_covariance_matrix
#mse_p=mse
columns=[]
@ds_valid.fields.each{|k|
v=@ds_valid[k]
columns.push(v.data) unless k==@y_var
@ds_valid.vectors.each{|k|
v = @ds_valid[k]
columns.push(v.to_a) unless k == @y_var
}
columns.unshift([1.0]*@valid_cases)
x=Matrix.columns(columns)
x=::Matrix.columns(columns)
matrix=((x.t*x)).inverse * mse
matrix.collect {|i| Math::sqrt(i) if i>=0 }
end
Expand Down
53 changes: 26 additions & 27 deletions lib/statsample/regression/multiple/gslengine.rb
Expand Up @@ -19,33 +19,34 @@ module Multiple
class GslEngine < BaseEngine
def initialize(ds,y_var, opts=Hash.new)
super
@ds=ds.dup_only_valid
@ds_valid=@ds
@valid_cases=@ds_valid.cases
@dy=@ds[@y_var]
@ds_indep=ds.dup(ds.fields-[y_var])
@ds = ds.dup_only_valid
@ds_valid = @ds
@valid_cases = @ds_valid.nrows
@dy = @ds[@y_var]
@ds_indep = ds.dup(ds.vectors.to_a - [y_var])
# Create a custom matrix
columns=[]
@fields=[]
max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size)
constant_col=@ds.fields.size-1
for i in 0...@ds.cases
max_deps = GSL::Matrix.alloc(@ds.nrows, @ds.vectors.size)
constant_col=@ds.vectors.size-1
for i in 0...@ds.nrows
max_deps.set(i,constant_col,1)
end
j=0
@ds.fields.each{|f|
if f!=@y_var
@ds[f].each_index{|i1|
j = 0
@ds.vectors.each do |f|
if f != @y_var
@ds[f].each_index do |i1|
max_deps.set(i1,j,@ds[f][i1])
}
end

columns.push(@ds[f].to_a)
@fields.push(f)
j+=1
j += 1
end
}
@dep_columns=columns.dup
@lr_s=nil
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
end
@dep_columns = columns.dup
@lr_s = nil
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.to_gsl)
@constant=c[constant_col]
@coeffs_a=c.to_a.slice(0...constant_col)
@coeffs=assign_names(@coeffs_a)
Expand Down Expand Up @@ -97,7 +98,7 @@ def lr_s
@lr_s
end
def build_standarized
@ds_s=@ds.standarize
@ds_s=@ds.standardize
@lr_s=GslEngine.new(@ds_s,@y_var)
end
def process_s(v)
Expand All @@ -114,17 +115,15 @@ def standarized_residuals

# Standard error for coeffs
def coeffs_se
out={}
evcm=estimated_variance_covariance_matrix
@ds_valid.fields.each_with_index do |f,i|

mi=i+1
next if f==@y_var
out[f]=evcm[mi,mi]
out = {}
evcm = estimated_variance_covariance_matrix
@ds_valid.vectors.to_a.each_with_index do |f,i|
mi = i+1
next if f == @y_var
out[f] = evcm[mi,mi]
end
out
end

end
end
end
Expand Down
16 changes: 7 additions & 9 deletions lib/statsample/regression/multiple/matrixengine.rb
Expand Up @@ -59,8 +59,6 @@ def initialize(matrix,y_var, opts=Hash.new)
@matrix_y = @matrix_cor.submatrix(@fields, [y_var])
@matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])



@y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])

@x_sd=@n_predictors.times.inject({}) {|ac,i|
Expand All @@ -77,14 +75,14 @@ def initialize(matrix,y_var, opts=Hash.new)
@y_mean=0.0
@name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]

opts_default={:digits=>3}
opts=opts_default.merge opts
opts_default = {:digits=>3}
opts = opts_default.merge opts
opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
result_matrix=@matrix_x_cov.inverse * @matrix_y_cov

if matrix._type==:covariance
if matrix._type == :covariance
@coeffs=result_matrix.column(0).to_a
@coeffs_stan=coeffs.collect {|k,v|
coeffs[k]*@x_sd[k].quo(@y_sd)
Expand Down Expand Up @@ -116,12 +114,12 @@ def r
end
# Value of constant
def constant
c=coeffs
@y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
c = coeffs
@y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
end
# Hash of b or raw coefficients
def coeffs
assign_names(@coeffs)
assign_names(@coeffs)
end
# Hash of beta or standarized coefficients

Expand Down Expand Up @@ -185,7 +183,7 @@ def constant_se
sd[:constant]=0
fields=[:constant]+@matrix_cov.fields-[@y_var]
# Recreate X'X using the variance-covariance matrix
xt_x=Matrix.rows(fields.collect {|i|
xt_x=::Matrix.rows(fields.collect {|i|
fields.collect {|j|
if i==:constant or j==:constant
cov=0
Expand Down

0 comments on commit f11d2f4

Please sign in to comment.