Permalink
Browse files

==0.1.25

  *Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
  stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
  added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
  Cubicle.clear_temp_tables()
  • Loading branch information...
1 parent 532a255 commit 85a66be428a1be0eab37f55912ade5359c2d342e @PlasticLizard committed May 20, 2010
View
@@ -1,3 +1,9 @@
+==0.1.25
+ *Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
+ stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
+ added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
+ Cubicle.clear_temp_tables()
+
==0.1.24
*Added more detail to the profile for the 'find' action
View
@@ -5,11 +5,11 @@
Gem::Specification.new do |s|
s.name = %q{cubicle}
- s.version = "0.1.24"
+ s.version = "0.1.25"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Nathan Stults"]
- s.date = %q{2010-05-13}
+ s.date = %q{2010-05-19}
s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
s.email = %q{hereiam@sonic.net}
s.extra_rdoc_files = [
View
@@ -52,7 +52,11 @@ def self.mongo
def self.logger
@logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
- end
+ end
+
+ def self.clear_temp_tables
+ self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
+ end
end
#Turn off HTML escaping in Mustache
@@ -46,7 +46,7 @@ def execute_query(query,options={})
if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
filter = prepare_filter(query,options[:where] || {})
else
- reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members")
+ reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
end
end
@@ -57,7 +57,7 @@ def execute_query(query,options={})
@profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
count = reduction.count
results = reduction.find(filter,find_options).to_a
- reduction.drop if reduction.name =~ /^tmp.mr.*/
+ #reduction.drop if reduction.name =~ /^tmp.mr.*/
Cubicle::Data::Table.new(query, results, count)
end
View
@@ -9,7 +9,7 @@ def self.aggregate(data,measures)
measures.each do |measure|
if (row.include?(measure.name))
val = row[measure.name]
- aggregated[measure.name] << val if val.kind_of?(Numeric)
+ (aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
end
end
end
@@ -26,7 +26,7 @@ def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
data.each do |tuple|
member_name = tuple.delete(dim_name.to_s) || "Unknown"
- level[member_name] << tuple
+ (level[member_name] ||= []) << tuple
end
level.each do |key,value|
View
@@ -23,16 +23,15 @@ def flatten(member_name = nil, opts={}, &block)
default_val = opts[:default] || @missing_member_default || 0
self.values.inject([]) do |output, data|
- data.inject(output) do |flattened, value|
- value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
-
- if block_given?
- flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
- end
- flat_val ||= value[member_name] if member_name && value.include?(member_name)
- flat_val ||= default_val
- flattened << flat_val
+ value = data.measure_values
+ value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
+
+ if block_given?
+ flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
end
+ flat_val ||= value[member_name] if member_name && value.include?(member_name)
+ flat_val ||= default_val
+ output << flat_val
end
end
@@ -49,7 +48,7 @@ def []=(key,val)
def hierarchy
parent_level || self
end
-
+
private
def prepare_level_member(member,member_name,parent_level)
member.class_eval("include Cubicle::Data::Member")
View
@@ -9,7 +9,7 @@ def initialize(query,query_results,total_count = nil)
@time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
@time_period = query.time_period if query.respond_to?(:time_period)
@time_range = query.time_range if query.respond_to?(:time_range)
- extract_data(query_results)
+ extract_data(query,query_results)
@total_count = total_count if total_count
end
@@ -45,16 +45,37 @@ def total_pages
private
- def extract_data(data)
+ def extract_data(query,data)
data.each do |result|
new = result.dup
self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
- #these should be processed first, because they are often used as parts of the other calc measures
- measures.select{|m|m.distinct_count?}.each do |m|
- m.finalize_aggregation(self[-1])
+
+ finalize_aggregations(self[-1])
+
+ apply_aliases(query,self[-1])
+ end
+ end
+
+ def finalize_aggregations(row)
+ #these should be processed first, because they are often used as parts of the other calc measures
+ measures.select{|m|m.distinct_count?}.each do |m|
+ m.finalize_aggregation(row)
+ end
+ measures.select{|m|!m.distinct_count?}.each do |m|
+ m.finalize_aggregation(row)
+ end
+ end
+
+ def apply_aliases(query,row)
+ members = query.dimensions + query.measures
+ members.select{|m|m.alias_list}.each do |m|
+ m.alias_list.each do |m_alias|
+ row[m_alias.to_s] = row[m.name.to_s]
end
- measures.select{|m|!m.distinct_count?}.each do |m|
- m.finalize_aggregation(self[-1])
+ end
+ if (query.respond_to?(:query_aliases) && query.query_aliases)
+ query.query_aliases.each do |key,value|
+ row[key.to_s] = row[value.to_s]
end
end
end
@@ -1,7 +1,6 @@
class OrderedHashWithIndifferentAccess < OrderedHash
- def initialize(initial_data={},&block)
+ def initialize(initial_data={})
merge!(initial_data.stringify_keys)
- super(&block) if block
end
View
@@ -2,7 +2,7 @@ module Cubicle
class Query
include Dsl
- attr_reader :time_period, :transient, :aggregation, :named_expressions
+ attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
attr_accessor :source_collection_name
def initialize(aggregation)
@@ -104,7 +104,9 @@ def detect_time_period(dimension_name = (time_dimension ? time_dimension.name :
def convert_dimension(dimension)
return dimension if transient?
- Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
+ d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
+ d.alias_list = dimension.alias_list
+ d
end
def convert_measure(measure)
@@ -133,7 +135,9 @@ def convert_measure(measure)
count_field = expression + "_count"
expression = "#{expression}*#{count_field}"
end
- Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
+ m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
+ m.alias_list = measure.alias_list
+ m
end
def unalias(*name_or_names)
@@ -65,7 +65,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
should "return the specified subset of data, including all measures" do
assert_equal 4, @results.length
- assert_equal "2009-12-09", @results[0]["manufacture_date"]
+ assert_equal "2009-12-09", @results[0]["date"]
assert_equal 1, @results[0]["total_defects"]
assert_equal 0, @results[0]["preventable_defects"]
assert_equal 0.43, @results[0]["total_cost"]
@@ -163,6 +163,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
should "return a filtered subset of data" do
assert_equal 1, @results.length
assert_equal "2009-12-09", @results[0]["manufacture_date"]
+ assert_equal "2009-12-09", @results[0]["date"]
assert_equal 1, @results[0]["total_defects"]
assert_equal 0, @results[0]["preventable_defects"]
assert_equal 0.43, @results[0]["total_cost"]
@@ -413,6 +414,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
assert_equal 1, results.length
assert_equal "Sad Day Moonshine", results[0]["product"]
+ assert_equal "Sad Day Moonshine", results[0]["my_crazy_product"]
assert_equal 3, results[0]["total_defects"]
assert_equal 2, results[0]["preventable_defects"]
assert_equal 2, results[0]["conditioned_preventable"]
@@ -20,14 +20,7 @@ class LevelTest < ActiveSupport::TestCase
assert_not_equal true, l.leaf_level?
end
end
- context "CubeDataLevel.flatten" do
- should "Flatten using a provided member name" do
- l = Cubicle::Data::Level.new(Cubicle::Dimension.new(:happy))
- l[:a] = [{:a=>3,:b=>2}]
- l[:b] = [{:a=>4,:b=>1}]
- assert_equal [2,1], l.flatten(:b)
- end
- end
+
context "CubeDataLevel[]=" do
should "Make any passed in value into a configured Cubicle::DataLevel::Member" do
level = Cubicle::Data::Level.new(Cubicle::Dimension.new(:baby))

0 comments on commit 85a66be

Please sign in to comment.