Permalink
Browse files

==0.1.25

  *Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
  stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
  added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
  Cubicle.clear_temp_tables()
  • Loading branch information...
Nathan
Nathan committed May 20, 2010
1 parent 532a255 commit 85a66be428a1be0eab37f55912ade5359c2d342e
View
@@ -1,3 +1,9 @@
==0.1.25
*Modified data extraction to ensure that member and query level aliases are respected in the final result. Also
stopped automatically dropping temporary map reduce tables during query execution for performance reasons. Instead,
added a static method to the Cubicle module, clear_temp_tables, which can be called at application startup or teardown:
Cubicle.clear_temp_tables()
==0.1.24
*Added more detail to the profile for the 'find' action
View
@@ -5,11 +5,11 @@
Gem::Specification.new do |s|
s.name = %q{cubicle}
s.version = "0.1.24"
s.version = "0.1.25"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Nathan Stults"]
s.date = %q{2010-05-13}
s.date = %q{2010-05-19}
s.description = %q{Cubicle provides a dsl and aggregation caching framework for automating the generation, execution and caching of map reduce queries when using MongoDB in Ruby. Cubicle also includes a MongoMapper plugin for quickly performing ad-hoc, multi-level group-by queries against a MongoMapper model.}
s.email = %q{hereiam@sonic.net}
s.extra_rdoc_files = [
View
@@ -52,7 +52,11 @@ def self.mongo
def self.logger
@logger ||= (Cubicle.mongo.logger || Logger.new("cubicle.log"))
end
end
def self.clear_temp_tables
self.mongo.database.collection_names.each{|cn|self.mongo.database[cn].drop if cn =~ /tmp.mr.mapreduce/i}
end
end
#Turn off HTML escaping in Mustache
@@ -46,7 +46,7 @@ def execute_query(query,options={})
if query.all_dimensions? || (agg_data.member_names - query.member_names - [:all_measures]).blank?
filter = prepare_filter(query,options[:where] || {})
else
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members")
reduction = aggregate(query,:source_collection=>agg_data.target_collection_name, :reason=>"Last mile reduction - source aggregation has too many members (#{agg_data.member_names.join(",").inspect})")
end
end
@@ -57,7 +57,7 @@ def execute_query(query,options={})
@profiler.measure(:find, :source=>reduction.name, :reason=>"Fetch final query results", :query=>find_options) do
count = reduction.count
results = reduction.find(filter,find_options).to_a
reduction.drop if reduction.name =~ /^tmp.mr.*/
#reduction.drop if reduction.name =~ /^tmp.mr.*/
Cubicle::Data::Table.new(query, results, count)
end
View
@@ -9,7 +9,7 @@ def self.aggregate(data,measures)
measures.each do |measure|
if (row.include?(measure.name))
val = row[measure.name]
aggregated[measure.name] << val if val.kind_of?(Numeric)
(aggregated[measure.name] ||= []) << val if val.kind_of?(Numeric)
end
end
end
@@ -26,7 +26,7 @@ def self.extract_dimensions(dimension_names, data, table,parent_level=nil)
level = parent_level ? Cubicle::Data::Level.new(dim,parent_level) : Cubicle::Data::Hierarchy.new(dim,data.measures)
data.each do |tuple|
member_name = tuple.delete(dim_name.to_s) || "Unknown"
level[member_name] << tuple
(level[member_name] ||= []) << tuple
end
level.each do |key,value|
View
@@ -23,16 +23,15 @@ def flatten(member_name = nil, opts={}, &block)
default_val = opts[:default] || @missing_member_default || 0
self.values.inject([]) do |output, data|
data.inject(output) do |flattened, value|
value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
if block_given?
flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
end
flat_val ||= value[member_name] if member_name && value.include?(member_name)
flat_val ||= default_val
flattened << flat_val
value = data.measure_values
value.missing_member_default = default_val if value.respond_to?(:missing_member_default)
if block_given?
flat_val = block.arity == 1 ? (yield value) : (value.instance_eval(&block))
end
flat_val ||= value[member_name] if member_name && value.include?(member_name)
flat_val ||= default_val
output << flat_val
end
end
@@ -49,7 +48,7 @@ def []=(key,val)
def hierarchy
parent_level || self
end
private
def prepare_level_member(member,member_name,parent_level)
member.class_eval("include Cubicle::Data::Member")
View
@@ -9,7 +9,7 @@ def initialize(query,query_results,total_count = nil)
@time_dimension_name = query.time_dimension.name if query.respond_to?(:time_dimension) && query.time_dimension
@time_period = query.time_period if query.respond_to?(:time_period)
@time_range = query.time_range if query.respond_to?(:time_range)
extract_data(query_results)
extract_data(query,query_results)
@total_count = total_count if total_count
end
@@ -45,16 +45,37 @@ def total_pages
private
def extract_data(data)
def extract_data(query,data)
data.each do |result|
new = result.dup
self << OrderedHashWithIndifferentAccess.new(new.delete("_id").merge(new.delete("value")))
#these should be processed first, because they are often used as parts of the other calc measures
measures.select{|m|m.distinct_count?}.each do |m|
m.finalize_aggregation(self[-1])
finalize_aggregations(self[-1])
apply_aliases(query,self[-1])
end
end
def finalize_aggregations(row)
#these should be processed first, because they are often used as parts of the other calc measures
measures.select{|m|m.distinct_count?}.each do |m|
m.finalize_aggregation(row)
end
measures.select{|m|!m.distinct_count?}.each do |m|
m.finalize_aggregation(row)
end
end
def apply_aliases(query,row)
members = query.dimensions + query.measures
members.select{|m|m.alias_list}.each do |m|
m.alias_list.each do |m_alias|
row[m_alias.to_s] = row[m.name.to_s]
end
measures.select{|m|!m.distinct_count?}.each do |m|
m.finalize_aggregation(self[-1])
end
if (query.respond_to?(:query_aliases) && query.query_aliases)
query.query_aliases.each do |key,value|
row[key.to_s] = row[value.to_s]
end
end
end
@@ -1,7 +1,6 @@
class OrderedHashWithIndifferentAccess < OrderedHash
def initialize(initial_data={},&block)
def initialize(initial_data={})
merge!(initial_data.stringify_keys)
super(&block) if block
end
View
@@ -2,7 +2,7 @@ module Cubicle
class Query
include Dsl
attr_reader :time_period, :transient, :aggregation, :named_expressions
attr_reader :time_period, :transient, :aggregation, :named_expressions, :query_aliases
attr_accessor :source_collection_name
def initialize(aggregation)
@@ -104,7 +104,9 @@ def detect_time_period(dimension_name = (time_dimension ? time_dimension.name :
def convert_dimension(dimension)
return dimension if transient?
Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
d = Cubicle::Dimension.new(dimension.name, :expression=>"this._id.#{dimension.name}")
d.alias_list = dimension.alias_list
d
end
def convert_measure(measure)
@@ -133,7 +135,9 @@ def convert_measure(measure)
count_field = expression + "_count"
expression = "#{expression}*#{count_field}"
end
Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
m = Cubicle::Measure.new(measure.name, :expression=>expression,:aggregation_method=>aggregation, :distinct=>measure.distinct_count?)
m.alias_list = measure.alias_list
m
end
def unalias(*name_or_names)
@@ -65,7 +65,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
should "return the specified subset of data, including all measures" do
assert_equal 4, @results.length
assert_equal "2009-12-09", @results[0]["manufacture_date"]
assert_equal "2009-12-09", @results[0]["date"]
assert_equal 1, @results[0]["total_defects"]
assert_equal 0, @results[0]["preventable_defects"]
assert_equal 0.43, @results[0]["total_cost"]
@@ -163,6 +163,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
should "return a filtered subset of data" do
assert_equal 1, @results.length
assert_equal "2009-12-09", @results[0]["manufacture_date"]
assert_equal "2009-12-09", @results[0]["date"]
assert_equal 1, @results[0]["total_defects"]
assert_equal 0, @results[0]["preventable_defects"]
assert_equal 0.43, @results[0]["total_cost"]
@@ -413,6 +414,7 @@ class CubicleQueryTest < ActiveSupport::TestCase
end
assert_equal 1, results.length
assert_equal "Sad Day Moonshine", results[0]["product"]
assert_equal "Sad Day Moonshine", results[0]["my_crazy_product"]
assert_equal 3, results[0]["total_defects"]
assert_equal 2, results[0]["preventable_defects"]
assert_equal 2, results[0]["conditioned_preventable"]
@@ -20,14 +20,7 @@ class LevelTest < ActiveSupport::TestCase
assert_not_equal true, l.leaf_level?
end
end
context "CubeDataLevel.flatten" do
should "Flatten using a provided member name" do
l = Cubicle::Data::Level.new(Cubicle::Dimension.new(:happy))
l[:a] = [{:a=>3,:b=>2}]
l[:b] = [{:a=>4,:b=>1}]
assert_equal [2,1], l.flatten(:b)
end
end
context "CubeDataLevel[]=" do
should "Make any passed in value into a configured Cubicle::DataLevel::Member" do
level = Cubicle::Data::Level.new(Cubicle::Dimension.new(:baby))

0 comments on commit 85a66be

Please sign in to comment.