Skip to content

Commit

Permalink
Added 'BucketizedDimension' allowing buckets to be created from bucke…
Browse files Browse the repository at this point in the history
…tizing or categorizing the value of a selected measure.
  • Loading branch information
Nathan committed Apr 8, 2010
1 parent 49a46a4 commit f5ce37c
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 21 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rdoc
@@ -1,3 +1,10 @@
==0.1.8
* Added 'bucketize' aliased as 'categorize' to cubicle DSL to allow dimensions to be created from bucketizing
(categorizing based on name) a measure value.

==0.1.7
* Minor bug fixes

==0.1.6
*Added generic 'condition' argument to member definitions, which allows the specification of a JavaScript expression
which will determine if the measure (or dimension) expression will be evaluated. This capability was available
Expand Down
2 changes: 1 addition & 1 deletion cubicle.gemspec
Expand Up @@ -5,7 +5,7 @@

Gem::Specification.new do |s|
s.name = %q{cubicle}
s.version = "0.1.7"
s.version = "0.1.8"

s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Nathan Stults"]
Expand Down
1 change: 1 addition & 0 deletions lib/cubicle.rb
Expand Up @@ -11,6 +11,7 @@
"measure",
"calculated_measure",
"dimension",
"bucketized_dimension",
"ratio",
"duration",
"query/dsl/time_intelligence",
Expand Down
2 changes: 1 addition & 1 deletion lib/cubicle/aggregation/aggregation_manager.rb
Expand Up @@ -39,7 +39,7 @@ def execute_query(query,options={})
aggregation = aggregation_for(query)
#if the query exactly matches the aggregation in terms of requested members, we can issue a simple find
#otherwise, a second map reduce is required to reduce the data set one last time
if ((aggregation.name.split("_")[-1].split(".")) - query.member_names - [:all_measures]).blank?
if query.all_dimensions? || ((aggregation.name.split("_")[-1].split(".")) - query.member_names - [:all_measures]).blank?
filter = prepare_filter(query,options[:where] || {})
else
aggregation = aggregate(query,:source_collection=>aggregation.name)
Expand Down
7 changes: 7 additions & 0 deletions lib/cubicle/aggregation/dsl.rb
Expand Up @@ -100,6 +100,13 @@ def ratio(member_name, numerator, denominator)
measures << Ratio.new(member_name, numerator, denominator)
end
def bucketize(dimension_name, source_measure_name, bucket_range, options={}, &block)
source_measure = measures[source_measure_name]
raise "#{source_measure_name} does not appear to be a valid measure name. bucketize/categorize declarations must be placed AFTER any measures it uses have been defined." unless source_measure
dimensions << BucketizedDimension.new(dimension_name, source_measure.to_js_value, bucket_range, options, &block)
end
alias categorize bucketize
def aggregation(*member_list)
member_list = member_list[0] if member_list[0].is_a?(Array)
aggregations << member_list
Expand Down
70 changes: 70 additions & 0 deletions lib/cubicle/bucketized_dimension.rb
@@ -0,0 +1,70 @@
module Cubicle

class BucketizedDimension < Dimension

attr_accessor :step, :bump, :value_expression, :bucket_range, :formatter

def initialize(dimension_name, value_expression, bucket_range, options={}, &block)
super(dimension_name, options)
@value_expression = value_expression
@bucket_range = bucket_range
@formatter = block
@step = options(:bucket_size, :step) || 1
@bump = options(:range_start_bump, :bump) || 1
end

def to_js_value
generate_buckets
"(function(val){if(val==null || isNaN(val))return null; #{@buckets.join(';')}})(#{value_expression})"
end

private
def generate_buckets
@buckets = []
prev = :begin
@bucket_range.step(step) do |next_val|
@buckets << bucket_javascript(prev,next_val)
prev = next_val
end
@buckets << bucket_javascript(prev, :end)
end

def bucket_javascript(min,max)
min += bump unless min.is_a?(Symbol) || max.is_a?(Symbol) || min==@bucket_range.begin
if (min == :begin)
"if (val < #{max}) return '#{label_for(min,max)}'"
elsif (max == :end)
"if (val > #{min}) return '#{label_for(min,max)}'"
else
"if (val >= #{min} && val <= #{max}) return '#{label_for(min,max)}'"
end
end

def label_for(min,max)
return formatter.call(min,max) if formatter
if min == :begin
"< #{max}"
elsif max == :end
"> #{min}"
else
"#{min} - #{max}"
end
end


end

<<BUCKETIZE
(function(val){
if(val == null || isNaN(val)) return null;
if(val < 5) return '< 5';
if (val >= 6 && val <= 10) return '6 - 10'
if (val >= 11 && val <= 15) return '11 - 15'
return '> 10';
})(val)
BUCKETIZE



end

2 changes: 1 addition & 1 deletion lib/cubicle/member.rb
Expand Up @@ -39,7 +39,7 @@ def initialize(*args)

def options(*args)
return @options if args.empty?
args.collect {|opt|found=@options.delete(opt)}.pop
args.collect {|opt|found=@options.delete(opt)}.compact.pop
end

def matches(member_name)
Expand Down
2 changes: 1 addition & 1 deletion lib/cubicle/version.rb
@@ -1,3 +1,3 @@
module Cubicle
VERSION = '0.1.7'
VERSION = '0.1.8'
end
24 changes: 24 additions & 0 deletions test/cubicle/bucketized_dimension_test.rb
@@ -0,0 +1,24 @@
require "test_helper"

class BucketizedDimensionTest < ActiveSupport::TestCase
context "BucketizedDimension.to_js_value" do
should "generate an appropriately structured bucketizing function" do
dim = Cubicle::BucketizedDimension.new(:test, "this.value", 0..10, :step=>5, :bump=>1)
assert_equal "(function(val){if(val==null || isNaN(val))return null; if (val < 0) return '< 0';if (val >= 0 && val <= 5) return '0 - 5';if (val >= 6 && val <= 10) return '6 - 10';if (val > 10) return '> 10'})(this.value)",
dim.to_js_value
end
should "use the block to generate the bucket labels if provided" do
dim = Cubicle::BucketizedDimension.new(:test, "this.value", 0..10, :step=>5, :bump=>1) do |min,max|
"#{min}:#{max}"
end
assert_equal "(function(val){if(val==null || isNaN(val))return null; if (val < 0) return 'begin:0';if (val >= 0 && val <= 5) return '0:5';if (val >= 6 && val <= 10) return '6:10';if (val > 10) return '10:end'})(this.value)",
dim.to_js_value
end
should "handle decimal steps and bumps with an integer range" do
dim = Cubicle::BucketizedDimension.new(:test, "this.value", 1..2, :step=>0.5, :bump=>0.01)
puts dim.to_js_value
assert_equal "(function(val){if(val==null || isNaN(val))return null; if (val < 1.0) return '< 1.0';if (val >= 1.0 && val <= 1.5) return '1.0 - 1.5';if (val >= 1.51 && val <= 2.0) return '1.51 - 2.0';if (val > 2.0) return '> 2.0'})(this.value)",
dim.to_js_value
end
end
end
9 changes: 7 additions & 2 deletions test/cubicle/cubicle_aggregation_test.rb
Expand Up @@ -12,6 +12,7 @@ class CubicleAggregationTest < ActiveSupport::TestCase
@results = DefectCubicle.query
end
should "return a collection of appropriate aggregated values based on the cubicle parameters" do
puts @results.inspect
assert_equal 4, @results.length

assert_equal "2009-12-09", @results[0]["manufacture_date"]
Expand All @@ -27,6 +28,7 @@ class CubicleAggregationTest < ActiveSupport::TestCase
assert_equal 0.43, @results[0]["total_cost"]
assert_equal 0.43, @results[0]["avg_cost"]
assert_equal 0, @results[0]["preventable_pct"]
assert_equal "< $1", @results[0]["avg_cost_category"]

assert_equal "2010-01-01", @results[1]["manufacture_date"]
assert_equal "2010-01", @results[1]["month"]
Expand All @@ -38,9 +40,10 @@ class CubicleAggregationTest < ActiveSupport::TestCase
assert_equal 2, @results[1]["total_defects"]
assert_equal 1, @results[1]["preventable_defects"]
assert_equal 1, @results[1]["conditioned_preventable"]
assert_in_delta 12.97, @results[1]["total_cost"], 0.0001
assert_in_delta 6.485, @results[1]["avg_cost"],0.0001
assert_in_delta 12.19 + 6.50, @results[1]["total_cost"], 0.0001
assert_in_delta (12.19 + 6.50)/2.0, @results[1]["avg_cost"],0.0001
assert_equal 0.5, @results[1]["preventable_pct"]
assert_equal "> $5", @results[1]["avg_cost_category"]

assert_equal "2010-01-05", @results[2]["manufacture_date"]
assert_equal "2010-01", @results[2]["month"]
Expand All @@ -55,6 +58,7 @@ class CubicleAggregationTest < ActiveSupport::TestCase
assert_equal 0.02, @results[2]["total_cost"]
assert_equal 0.02, @results[2]["avg_cost"]
assert_equal 1, @results[2]["preventable_pct"]
assert_equal "< $1", @results[2]["avg_cost_category"]

assert_equal "2010-02-01", @results[3]["manufacture_date"]
assert_equal "2010-02", @results[3]["month"]
Expand All @@ -69,6 +73,7 @@ class CubicleAggregationTest < ActiveSupport::TestCase
assert_equal 2.94, @results[3]["total_cost"]
assert_equal 2.94, @results[3]["avg_cost"]
assert_equal 1, @results[3]["preventable_pct"]
assert_equal "$2.51 - $3.0", @results[3]["avg_cost_category"]
end
end

Expand Down
26 changes: 13 additions & 13 deletions test/cubicle/cubicle_query_test.rb
Expand Up @@ -107,8 +107,8 @@ class CubicleQueryTest < ActiveSupport::TestCase
assert_equal 3, @results[0]["total_defects"]
assert_equal 2, @results[0]["preventable_defects"]
assert_equal 2, @results[0]["conditioned_preventable"]
assert_equal 15.91, @results[0]["total_cost"]
assert_equal 15.91/3, @results[0]["avg_cost"]
assert_equal 21.63, @results[0]["total_cost"]
assert_equal 21.63/3, @results[0]["avg_cost"]
assert_equal 2/3.0, @results[0]["preventable_pct"]
end

Expand Down Expand Up @@ -147,8 +147,8 @@ class CubicleQueryTest < ActiveSupport::TestCase
assert_equal 3, @results[0]["total_defects"]
assert_equal 2, @results[0]["preventable_defects"]
assert_equal 2, @results[0]["conditioned_preventable"]
assert_in_delta 15.91, @results[0]["total_cost"],0.0001
assert_in_delta 15.91/3, @results[0]["avg_cost"],0.0001
assert_in_delta 21.63, @results[0]["total_cost"],0.0001
assert_in_delta 21.63/3, @results[0]["avg_cost"],0.0001
assert_in_delta 2/3.0, @results[0]["preventable_pct"],0.0001
end

Expand Down Expand Up @@ -184,8 +184,8 @@ class CubicleQueryTest < ActiveSupport::TestCase
assert_equal 3, @results[0]["total_defects"]
assert_equal 2, @results[0]["preventable_defects"]
assert_equal 2, @results[0]["conditioned_preventable"]
assert_in_delta 15.91, @results[0]["total_cost"],0.0001
assert_in_delta 15.91/3, @results[0]["avg_cost"],0.0001
assert_in_delta 21.63, @results[0]["total_cost"],0.0001
assert_in_delta 21.63/3, @results[0]["avg_cost"],0.0001
assert_equal 2/3.0, @results[0]["preventable_pct"]
end

Expand All @@ -204,8 +204,8 @@ class CubicleQueryTest < ActiveSupport::TestCase
assert_equal 3, @results[0]["total_defects"]
assert_equal 2, @results[0]["preventable_defects"]
assert_equal 2, @results[0]["conditioned_preventable"]
assert_equal 15.91, @results[0]["total_cost"]
assert_equal 15.91/3, @results[0]["avg_cost"]
assert_equal 21.63, @results[0]["total_cost"]
assert_equal 21.63/3, @results[0]["avg_cost"]
assert_equal 2/3.0, @results[0]["preventable_pct"]
end

Expand Down Expand Up @@ -266,7 +266,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
should "present YTD data based on Time.now" do
assert_equal 1, @results.length
assert_in_delta 12.97, @results[0]["total_cost"],0.0001
assert_in_delta 18.69, @results[0]["total_cost"],0.0001
end
end
context "when requesting MTD in a non-transient query do" do
Expand All @@ -279,7 +279,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
should "present MTD data based on Time.now" do
assert_equal 1, @results.length
assert_in_delta 12.99, @results[0]["total_cost"],0.0001
assert_in_delta 18.71, @results[0]["total_cost"],0.0001
end
end
context "when requesting MTD in a transient query do" do
Expand All @@ -293,7 +293,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
should "present MTD data based on Time.now" do
assert_equal 1, @results.length
assert_in_delta 12.99, @results[0]["total_cost"],0.0001
assert_in_delta 18.71, @results[0]["total_cost"],0.0001
end
end
context "when requesting for_the_last_complete 1.months" do
Expand Down Expand Up @@ -377,8 +377,8 @@ class CubicleQueryTest < ActiveSupport::TestCase
assert_equal 3, results[0]["total_defects"]
assert_equal 2, results[0]["preventable_defects"]
assert_equal 2, results[0]["conditioned_preventable"]
assert_in_delta 15.91, results[0]["total_cost"],0.0001
assert_in_delta 15.91/3, results[0]["avg_cost"],0.0001
assert_in_delta 21.63, results[0]["total_cost"],0.0001
assert_in_delta 21.63/3, results[0]["avg_cost"],0.0001
assert_in_delta 2/3.0, results[0]["preventable_pct"],0.0001
end
should "respect the alias in the order by clause" do
Expand Down
17 changes: 16 additions & 1 deletion test/cubicles/defect_cubicle.rb
Expand Up @@ -27,9 +27,24 @@ class DefectCubicle
duration :total_duration, :ms1 => :ms3, :in=>:days
duration :conditional_duration, :ms1 => :ms3, :in=>:days, :condition=>"this.defect_id != 2"
elapsed :ms3, :in=>:days
age_since :avg_time_since_ms3, :ms3, :in=>:days
age_since :avg_time_since_ms3,:ms3, :in=>:days

#bucketized fields
categorize :avg_cost_category,
:avg_cost, 1..5, :bucket_size=>0.5, :range_start_bump=>0.01 do |bucket_start,bucket_end|
if bucket_start == :begin
'< $1'
elsif bucket_end == :end
'> $5'
else
"$#{bucket_start} - $#{bucket_end}"
end
end



#pre-cached aggregations
aggregation :month, :year, :product
aggregation :month, :region

end
2 changes: 1 addition & 1 deletion test/models/defect.rb
Expand Up @@ -58,7 +58,7 @@ def self.create_test_data
:plant=>{:name=>"Plant1",:address=>{:region=>"West",:location=>"San Francisco, Ca"}},
:operator=>"Franny",
:outcome=>"Repaired",
:cost=>0.78,
:cost=>6.50,
:root_cause=>:act_of_god

Defect.create :defect_id=>"2",
Expand Down

0 comments on commit f5ce37c

Please sign in to comment.