Permalink
Browse files

==0.1.25

  *Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
  stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
  added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
  Cubicle.clear_temp_tables()
  • Loading branch information...
Nathan
Nathan committed May 20, 2010
1 parent 532a255 commit 85a66be428a1be0eab37f55912ade5359c2d342e
View
@@ -1,3 +1,9 @@
==0.1.25
*Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
Cubicle.clear_temp_tables()
==0.1.24 ==0.1.24
*Added more detail to the profile for the 'find' action *Added more detail to the profile for the 'find' action
View
@@ -5,11 +5,11 @@
Gem::Specification.new do |s| Gem::Specification.new do |s|
s.name = %q{cubicle} s.name = %q{cubicle}
s.version = "0.1.24" s.version = "0.1.25"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Nathan Stults"] s.authors = ["Nathan Stults"]
s.date = %q{2010-05-13} s.date = %q{2010-05-19}
s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.} s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
s.email = %q{hereiam@sonic.net} s.email = %q{hereiam@sonic.net}
s.extra_rdoc_files = [ s.extra_rdoc_files = [
View
@@ -52,7 +52,11 @@ def self.mongo
def self.logger def self.logger
@logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log")) @logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
end end
def self.clear_temp_tables
self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
end
end end
#Turn off HTML escaping in Mustache #Turn off HTML escaping in Mustache
@@ -46,7 +46,7 @@ def execute_query(query,options={})
if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank? if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
filter = prepare_filter(query,options[:where] || {}) filter = prepare_filter(query,options[:where] || {})
else else
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members") reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
end end
end end
@@ -57,7 +57,7 @@ def execute_query(query,options={})
@profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do @profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
count = reduction.count count = reduction.count
results = reduction.find(filter,find_options).to_a results = reduction.find(filter,find_options).to_a
reduction.drop if reduction.name =~ /^tmp.mr.*/ #reduction.drop if reduction.name =~ /^tmp.mr.*/
Cubicle::Data::Table.new(query, results, count) Cubicle::Data::Table.new(query, results, count)
end end
View
@@ -9,7 +9,7 @@ def self.aggregate(data,measures)
measures.each do |measure| measures.each do |measure|
if (row.include?(measure.name)) if (row.include?(measure.name))
val = row[measure.name] val = row[measure.name]
aggregated[measure.name] << val if val.kind_of?(Numeric) (aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
end end
end end
end end
@@ -26,7 +26,7 @@ def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures) level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
data.each do |tuple| data.each do |tuple|
member_name = tuple.delete(dim_name.to_s) || "Unknown" member_name = tuple.delete(dim_name.to_s) || "Unknown"
level[member_name] << tuple (level[member_name] ||= []) << tuple
end end
level.each do |key,value| level.each do |key,value|
View
@@ -23,16 +23,15 @@ def flatten(member_name = nil, opts={}, &block)
default_val = opts[:default] || @missing_member_default || 0 default_val = opts[:default] || @missing_member_default || 0
self.values.inject([]) do |output, data| self.values.inject([]) do |output, data|
data.inject(output) do |flattened, value| value = data.measure_values
value.missing_member_default = default_val if value.respond_to?(:missing_member_default) value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
if block_given? if block_given?
flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block)) flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
end
flat_val ||= value[member_name] if member_name && value.include?(member_name)
flat_val ||= default_val
flattened << flat_val
end end
flat_val ||= value[member_name] if member_name && value.include?(member_name)
flat_val ||= default_val
output << flat_val
end end
end end
@@ -49,7 +48,7 @@ def []=(key,val)
def hierarchy def hierarchy
parent_level || self parent_level || self
end end
private private
def prepare_level_member(member,member_name,parent_level) def prepare_level_member(member,member_name,parent_level)
member.class_eval("include Cubicle::Data::Member") member.class_eval("include Cubicle::Data::Member")
View
@@ -9,7 +9,7 @@ def initialize(query,query_results,total_count = nil)
@time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension @time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
@time_period = query.time_period if query.respond_to?(:time_period) @time_period = query.time_period if query.respond_to?(:time_period)
@time_range = query.time_range if query.respond_to?(:time_range) @time_range = query.time_range if query.respond_to?(:time_range)
extract_data(query_results) extract_data(query,query_results)
@total_count = total_count if total_count @total_count = total_count if total_count
end end
@@ -45,16 +45,37 @@ def total_pages
private private
def extract_data(data) def extract_data(query,data)
data.each do |result| data.each do |result|
new = result.dup new = result.dup
self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value"))) self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
#these should be processed first, because they are often used as parts of the other calc measures
measures.select{|m|m.distinct_count?}.each do |m| finalize_aggregations(self[-1])
m.finalize_aggregation(self[-1])
apply_aliases(query,self[-1])
end
end
def finalize_aggregations(row)
#these should be processed first, because they are often used as parts of the other calc measures
measures.select{|m|m.distinct_count?}.each do |m|
m.finalize_aggregation(row)
end
measures.select{|m|!m.distinct_count?}.each do |m|
m.finalize_aggregation(row)
end
end
def apply_aliases(query,row)
members = query.dimensions + query.measures
members.select{|m|m.alias_list}.each do |m|
m.alias_list.each do |m_alias|
row[m_alias.to_s] = row[m.name.to_s]
end end
measures.select{|m|!m.distinct_count?}.each do |m| end
m.finalize_aggregation(self[-1]) if (query.respond_to?(:query_aliases) && query.query_aliases)
query.query_aliases.each do |key,value|
row[key.to_s] = row[value.to_s]
end end
end end
end end
@@ -1,7 +1,6 @@
class OrderedHashWithIndifferentAccess < OrderedHash class OrderedHashWithIndifferentAccess < OrderedHash
def initialize(initial_data={},&block) def initialize(initial_data={})
merge!(initial_data.stringify_keys) merge!(initial_data.stringify_keys)
super(&block) if block
end end
View
@@ -2,7 +2,7 @@ module Cubicle
class Query class Query
include Dsl include Dsl
attr_reader :time_period, :transient, :aggregation, :named_expressions attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
attr_accessor :source_collection_name attr_accessor :source_collection_name
def initialize(aggregation) def initialize(aggregation)
@@ -104,7 +104,9 @@ def detect_time_period(dimension_name = (time_dimension ? time_dimension.name :
def convert_dimension(dimension) def convert_dimension(dimension)
return dimension if transient? return dimension if transient?
Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}") d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
d.alias_list = dimension.alias_list
d
end end
def convert_measure(measure) def convert_measure(measure)
@@ -133,7 +135,9 @@ def convert_measure(measure)
count_field = expression + "_count" count_field = expression + "_count"
expression = "#{expression}*#{count_field}" expression = "#{expression}*#{count_field}"
end end
Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?) m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
m.alias_list = measure.alias_list
m
end end
def unalias(*name_or_names) def unalias(*name_or_names)
@@ -65,7 +65,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end end
should "return the specified subset of data, including all measures" do should "return the specified subset of data, including all measures" do
assert_equal 4, @results.length assert_equal 4, @results.length
assert_equal "2009-12-09", @results[0]["manufacture_date"] assert_equal "2009-12-09", @results[0]["date"]
assert_equal 1, @results[0]["total_defects"] assert_equal 1, @results[0]["total_defects"]
assert_equal 0, @results[0]["preventable_defects"] assert_equal 0, @results[0]["preventable_defects"]
assert_equal 0.43, @results[0]["total_cost"] assert_equal 0.43, @results[0]["total_cost"]
@@ -163,6 +163,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
should "return a filtered subset of data" do should "return a filtered subset of data" do
assert_equal 1, @results.length assert_equal 1, @results.length
assert_equal "2009-12-09", @results[0]["manufacture_date"] assert_equal "2009-12-09", @results[0]["manufacture_date"]
assert_equal "2009-12-09", @results[0]["date"]
assert_equal 1, @results[0]["total_defects"] assert_equal 1, @results[0]["total_defects"]
assert_equal 0, @results[0]["preventable_defects"] assert_equal 0, @results[0]["preventable_defects"]
assert_equal 0.43, @results[0]["total_cost"] assert_equal 0.43, @results[0]["total_cost"]
@@ -413,6 +414,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end end
assert_equal 1, results.length assert_equal 1, results.length
assert_equal "Sad Day Moonshine", results[0]["product"] assert_equal "Sad Day Moonshine", results[0]["product"]
assert_equal "Sad Day Moonshine", results[0]["my_crazy_product"]
assert_equal 3, results[0]["total_defects"] assert_equal 3, results[0]["total_defects"]
assert_equal 2, results[0]["preventable_defects"] assert_equal 2, results[0]["preventable_defects"]
assert_equal 2, results[0]["conditioned_preventable"] assert_equal 2, results[0]["conditioned_preventable"]
@@ -20,14 +20,7 @@ class LevelTest < ActiveSupport::TestCase
assert_not_equal true, l.leaf_level? assert_not_equal true, l.leaf_level?
end end
end end
context "CubeDataLevel.flatten" do
should "Flatten using a provided member name" do
l = Cubicle::Data::Level.new(Cubicle::Dimension.new(:happy))
l[:a] = [{:a=>3,:b=>2}]
l[:b] = [{:a=>4,:b=>1}]
assert_equal [2,1], l.flatten(:b)
end
end
context "CubeDataLevel[]=" do context "CubeDataLevel[]=" do
should "Make any passed in value into a configured Cubicle::DataLevel::Member" do should "Make any passed in value into a configured Cubicle::DataLevel::Member" do
level = Cubicle::Data::Level.new(Cubicle::Dimension.new(:baby)) level = Cubicle::Data::Level.new(Cubicle::Dimension.new(:baby))

0 comments on commit 85a66be

Please sign in to comment.