From af5916538315d8d37c69f94a37b9f3fbf020e649 Mon Sep 17 00:00:00 2001 From: Stephen Eley Date: Mon, 21 Mar 2011 16:56:56 -0400 Subject: [PATCH] Finished all BSON types, no kidding --- lib/crunch/bson.rb | 12 +++++ lib/crunch/bson/binary.rb | 64 ++++++++++++++++++++++++++ lib/crunch/bson/hash.rb | 33 +++++++++++--- lib/crunch/bson/javascript.rb | 55 ++++++++++++++++++++++ lib/crunch/bson/string.rb | 20 ++++---- lib/crunch/bson/timestamp.rb | 36 +++++++++++++++ spec/crunch/bson/binary_spec.rb | 71 +++++++++++++++++++++++++++++ spec/crunch/bson/hash_spec.rb | 53 ++++++++++++++++++--- spec/crunch/bson/javascript_spec.rb | 44 ++++++++++++++++++ spec/crunch/bson/string_spec.rb | 25 ++++------ spec/crunch/bson/timestamp_spec.rb | 28 ++++++++++++ 11 files changed, 401 insertions(+), 40 deletions(-) create mode 100644 lib/crunch/bson/binary.rb create mode 100644 lib/crunch/bson/javascript.rb create mode 100644 lib/crunch/bson/timestamp.rb create mode 100644 spec/crunch/bson/binary_spec.rb create mode 100644 spec/crunch/bson/javascript_spec.rb create mode 100644 spec/crunch/bson/timestamp_spec.rb diff --git a/lib/crunch/bson.rb b/lib/crunch/bson.rb index 31605f9..95d0996 100644 --- a/lib/crunch/bson.rb +++ b/lib/crunch/bson.rb @@ -1,4 +1,16 @@ +module Crunch + module BSON + # Special constants + MIN = :bson_min + MAX = :bson_max + end +end + require 'crunch/bson/numeric' require 'crunch/bson/string' +require 'crunch/bson/binary' require 'crunch/bson/object_id' +require 'crunch/bson/javascript' +require 'crunch/bson/timestamp' require 'crunch/bson/hash' + diff --git a/lib/crunch/bson/binary.rb b/lib/crunch/bson/binary.rb new file mode 100644 index 0000000..941698d --- /dev/null +++ b/lib/crunch/bson/binary.rb @@ -0,0 +1,64 @@ +# encoding: BINARY + +module Crunch + module BSON + # Represents a string of binary data in BSON. We need a separate type for this + # to differentiate it from ordinary strings, which must be in UTF-8. For + # performance reasons, this is a fairly 'dumb' class and doesn't do much + # calculation or validation. Users should use the `BSON.binary` method + # to generate binary BSON classes. + class Binary + GENERIC = "\x00" # Default subtype + FUNCTION = "\x01" # 'Function' subtype (not well explained in BSON spec) + OLD = "\x02" # Deprecated former binary subtype; encodes length at start of data + UUID = "\x03" # UUID subtype (no special treatment) + MD5 = "\x05" # MD5 subtype (no special treatment) + USER = "\x80" # User-defined data subtype (no special treatment) + + attr_reader :data, :subtype, :length + + # Creates a new BSON binary object from its parameter hash. The `:length` + # option is only likely to be given if we are being given input _from_ + # BSON source. Likewise, users are unlikely to provide the `:type` option + # under ordinary circumstances. (Though they can if they want to.) + # @param [String] data A binary encoded string. (Convert before passing!) + # @option opts [String] :subtype A single byte indicating the BSON binary subtype. Use the built-in constants for easy reference. Defaults to Binary::GENERIC. + # @option opts [String] :length Four little-endian bytes indicating the bytesize of the _data_ (not the full type string). + def initialize(data, opts={}) + @data = data + @subtype = opts[:subtype] || GENERIC + @length = opts[:length] || BSON.from_int(@data.bytesize) + end + + # Returns itself as a BSON-valid binary string. + # @see http://bsonspec.org/#/specification + def to_s + @string ||= @length << @subtype << @data + end + + # Returns a three-element array with: + # 1. The binary BSON type identifier, 5 + # 2. The length of the full binary string, including the length and subtype + # 3. The binary string itself + def element + @element ||= [5, to_s.bytesize, to_s] + end + end + + # Produces a BSON binary type, which is defined as: + # 1. A 32-bit number giving the length in bytes of the binary string (not including the subtype from #2); + # 2. A binary data subtype, which for right now we're simply forcing to the default of 0; + # 3. The binary data string itself, with no null terminators or such. + # @param [String] data The binary data; will be cast to Encoding::BINARY if it isn't already. + # @option opts [String] :subtype A single-byte binary subtype. (See the constants in the BSON::Binary class.) If you specify Binary::OLD special treatment will be given to the + # data string, adding its length again. + # @return [String] A binary BSON data string + def self.binary(str, opts={}) + str = BSON.from_int(str.bytesize) << str if opts[:subtype] == Binary::OLD + Binary.new(str.force_encoding(Encoding::BINARY), opts) + rescue NoMethodError + raise BSONError, "A binary string input is required; instead you gave: #{str}" + end + + end +end \ No newline at end of file diff --git a/lib/crunch/bson/hash.rb b/lib/crunch/bson/hash.rb index bfdf7d6..826e103 100644 --- a/lib/crunch/bson/hash.rb +++ b/lib/crunch/bson/hash.rb @@ -1,4 +1,6 @@ # encoding: BINARY +require 'date' + module Crunch module BSON # Returns the BSON document corresponding to the given Ruby hash. @@ -29,13 +31,8 @@ def self.from_element(value) case value when Float then [1, 8, from_float(value)] when String # Could be an actual string _OR_ binary data - if value.encoding == Encoding::BINARY - out = from_binary(value) - [5, out.bytesize, out] - else - out = from_string(value) - [2, out.bytesize, out] - end + out = from_string(value) + [2, out.bytesize, out] when Hash out = from_hash(value) [3, out.bytesize, out] @@ -53,14 +50,36 @@ def self.from_element(value) value.each_with_index {|val, i| h[i] = val} out = from_hash(h) [4, out.bytesize, out] + when BSON::Binary + value.element when ObjectID [7, 12, value.bin] when false [8, 1, "\x00"] when true [8, 1, "\x01"] + when Time + msec = (value.to_f * 1000).floor + [9, 8, from_int(msec, length: 8)] + when Date + msec = (value.to_datetime.to_time.to_i * 1000) # Stupid method chain, but casting to DateTime first ensures UTC zone + [9, 8, from_int(msec, length: 8)] when nil [10, 0, ""] + when Regexp + out = from_regex(value) + [11, out.bytesize, out] + when BSON::Javascript + value.element + when Symbol + out = from_string(value.to_s) + [14, out.bytesize, out] + when BSON::Timestamp + [17, 8, value.to_s] + when BSON::MIN + [255, 0, ''] + when BSON::MAX + [127, 0, ''] else raise BSONError, "Could not convert unknown data type to BSON: #{value}" end diff --git a/lib/crunch/bson/javascript.rb b/lib/crunch/bson/javascript.rb new file mode 100644 index 0000000..b8e7ff3 --- /dev/null +++ b/lib/crunch/bson/javascript.rb @@ -0,0 +1,55 @@ +# encoding: BINARY + +module Crunch + module BSON + + # Represents Javascript code in BSON. We need a separate type for this + # because the BSON spec says so, and because scope blocks can optionally + # be passed to pre-assign variables. Note that no validation on the Javascript + # nor the scope are provided. Users can use the `BSON.javascript` method + # as a convenience. + class Javascript + attr_reader :code, :scope + + # Returns a new Javascript object representing the code, and maybe the scope + # passed in. The BSON code returned by the Javascript#to_s method will vary + # in type based on whether a scope is given. + # @param [String] code A block of Javascript code + # @param [optional, Hash] scope A mapping of variables to values + def initialize(code, scope=nil) + @code, @scope = code, scope + end + + # Returns itself as a BSON-valid binary string. If the object has a scope, + # the structure of the string will conform to the `code_w_scope` specification; + # otherwise it will simply be a string. + # @see http://bsonspec.org/#/specification + def to_s + @string ||= if @scope + code = BSON.from_string(@code) + scope = BSON.from_hash(@scope) + length = code.bytesize + scope.bytesize + 4 + BSON.from_int(length) << code << scope + else + BSON.from_string(@code) + end + end + + # Returns a three-element array with: + # 1. The binary BSON type identifier: 13 with no scope, or 15 with scope + # 2. The length of the full BSON binary string + # 3. The BSON binary string itself + def element + @element ||= [@scope ? 15 : 13, to_s.bytesize, to_s] + end + + end + + # Converts the given code and scope to a BSON::Javascript object. + # Really just a shortcut to Javascript.new for consistency with other + # one-way BSON converters (`.cstring`, `.binary`, etc.) + def self.javascript(*args) + Javascript.new(*args) + end + end +end \ No newline at end of file diff --git a/lib/crunch/bson/string.rb b/lib/crunch/bson/string.rb index c85afc9..1fb61db 100644 --- a/lib/crunch/bson/string.rb +++ b/lib/crunch/bson/string.rb @@ -26,16 +26,16 @@ def self.from_string(str, opts={}) (from_int(out.bytesize, length: 4) + out).force_encoding(Encoding::BINARY) end - # Produces a BSON binary type, which is defined as: - # 1. A 32-bit number giving the length in bytes of the binary string (not including the subtype from #2); - # 2. A binary data subtype, which for right now we're simply forcing to the default of 0; - # 3. The binary data string itself, with no null terminators or such. - # @param [String] data The binary data; must be in Encoding::Binary. - # @return [String] A binary BSON data string - def self.from_binary(str) - from_int(str.bytesize) << 0 << str.force_encoding(Encoding::BINARY) - rescue NoMethodError - raise BSONError, "A binary string input is required; instead you gave: #{str}" + # Produces a BSON regex type, which is a concatenation of cstrings for the + # pattern and the options. The 'u' option is set as well if the encoding + # contains higher-order bytes. + def self.from_regex(regex, opts={}) + optstr, opts = "", regex.options + optstr << 'i' if (opts & 1) == 1 + optstr << 'm' if (opts & 4) == 4 + optstr << 'u' if regex.fixed_encoding? + optstr << 'x' if (opts & 2) == 2 + cstring(regex.source) + cstring(optstr, normalized: true) end end end diff --git a/lib/crunch/bson/timestamp.rb b/lib/crunch/bson/timestamp.rb new file mode 100644 index 0000000..ddec200 --- /dev/null +++ b/lib/crunch/bson/timestamp.rb @@ -0,0 +1,36 @@ +# encoding: BINARY +module Crunch + module BSON + + # Represents a BSON "timestamp" type, which MongoDB uses internally and users will + # most likely never need to worry about. We include it in our implementation for + # the sake of irrational completionism. + class Timestamp + + # If given no parameters, returns a "null" Timestamp (which will be filled in by + # the server.) If given a binary string, parses it into seconds and counter. + def initialize(str=nil) + if str + @string = str + else + @time, @counter, @string = Time.at(0), 0, "\x00\x00\x00\x00\x00\x00\x00\x00" + end + end + + def time + @time ||= Time.at(BSON.to_int(@string[0..3])) + end + + def counter + @counter ||= BSON.to_int(@string[4..7]) + end + + def to_s + @string + end + alias_method :bin, :to_s + + + end + end +end diff --git a/spec/crunch/bson/binary_spec.rb b/spec/crunch/bson/binary_spec.rb new file mode 100644 index 0000000..c183786 --- /dev/null +++ b/spec/crunch/bson/binary_spec.rb @@ -0,0 +1,71 @@ +#encoding: BINARY +require File.dirname(__FILE__) + '/../../spec_helper' +require 'digest/md5' + +module Crunch + module BSON + describe "- binary method" do + it "handles an empty binary string" do + BSON.binary('').to_s.should == "\x00\x00\x00\x00\x00" + end + + it "does NOT handle nil" do + ->{BSON.binary(nil)}.should raise_error(BSONError, /binary string/) + end + + it "does NOT convert other types" do + ->{BSON.binary(3.14)}.should raise_error(BSONError, /binary string/) + end + + it "returns a BSON::Binary object" do + BSON.binary("}\x99$\x00").should be_a(Binary) + end + + it "adds the subtype and length" do + BSON.binary("}\x99$\x00").to_s.should == "\x04\x00\x00\x00\x00}\x99$\x00" + end + end + + describe Binary do + before(:each) do + @pi = "\x1F\x85\xEBQ\xB8\x1E\t@" # 3.14 as an encoded float + + end + + it "can take a data string" do + Binary.new(@pi).to_s.should == "\x08\x00\x00\x00\x00#{@pi}" + end + + it "can take a Function type" do + Binary.new(@pi, subtype: Binary::FUNCTION).to_s.should == "\x08\x00\x00\x00\x01#{@pi}" + end + + it "can take an 'old' binary type" do + Binary.new("\x08\x00\x00\x00#{@pi}", subtype: Binary::OLD).to_s.should == "\x0C\x00\x00\x00\x02\x08\x00\x00\x00#{@pi}" + end + + it "can take a UUID type" do + Binary.new(@pi, subtype: Binary::UUID).to_s.should == "\x08\x00\x00\x00\x03#{@pi}" + end + + it "can take an MD5 type" do + md5 = Digest::MD5.new('foo') + Binary.new(md5.digest, subtype: Binary::MD5).to_s.should == "\x10\x00\x00\x00\x05\xD4\x1D\x8C\xD9\x8F\x00\xB2\x04\xE9\x80\t\x98\xEC\xF8B~" + end + + it "can take a user defined type" do + Binary.new(@pi, subtype: Binary::USER).to_s.should == "\x08\x00\x00\x00\x80#{@pi}" + end + + it "can take the length" do + Binary.new(@pi, subtype: Binary::GENERIC, length: "\x08\x00\x00\x00").to_s.should == "\x08\x00\x00\x00\x00#{@pi}" + end + + it "can return a full element" do + Binary.new("\x1F\x85\xEBQ\xB8\x1E\t@").element.should == [5, 13, "\x08\x00\x00\x00\x00\x1F\x85\xEBQ\xB8\x1E\t@"] + end + + end + + end +end \ No newline at end of file diff --git a/spec/crunch/bson/hash_spec.rb b/spec/crunch/bson/hash_spec.rb index 0291863..891856d 100644 --- a/spec/crunch/bson/hash_spec.rb +++ b/spec/crunch/bson/hash_spec.rb @@ -1,5 +1,6 @@ #encoding: BINARY require File.dirname(__FILE__) + '/../../spec_helper' +require 'date' module Crunch describe BSON do @@ -51,11 +52,11 @@ module Crunch end it "handles arrays" do - BSON.from_element([1, :foo, 'eleven', 3.5, nil, false]).should == [4, 55, "7\x00\x00\x00\x100\x00\x01\x00\x00\x00\x0E1\x00\x04\x00\x00\x00foo\x00\x022\x00\a\x00\x00\x00eleven\x00\x013\x00\x00\x00\x00\x00\x00\x00\f@\n4\x00\b5\x00\x00\x00"] + BSON.from_element([1, :foo, 'eleven'.force_encoding('ASCII'), 3.5, nil, false]).should == [4, 55, "7\x00\x00\x00\x100\x00\x01\x00\x00\x00\x0E1\x00\x04\x00\x00\x00foo\x00\x022\x00\a\x00\x00\x00eleven\x00\x013\x00\x00\x00\x00\x00\x00\x00\f@\n4\x00\b5\x00\x00\x00"] end it "can handle binary data" do - b = "\x1F\x85\xEBQ\xB8\x1E\t@".force_encoding('BINARY') # This is the BSON string for 3.14. Chosen arbitrarily. + b = BSON.binary("\x1F\x85\xEBQ\xB8\x1E\t@".force_encoding('BINARY')) BSON.from_element(b).should == [5, 13, "\x08\x00\x00\x00\x00\x1F\x85\xEBQ\xB8\x1E\t@"] end @@ -72,23 +73,61 @@ module Crunch BSON.from_element(true).should == [8, 1, 1.chr] end + it "can handle times" do + t = Time.gm(2011, 3, 21, 14, 19, 30, 115147) # Down to the microsecond level! + BSON.from_element(t).should == [9, 8, "\xC3\xED\xC8\xD8.\x01\x00\x00"] + end + + it "can handle dates" do + d = Date.parse("2011-03-21") + BSON.from_element(d).should == [9, 8, "\x00\b\xB6\xD5.\x01\x00\x00"] + end + + it "can handle datetimes" do + dt = DateTime.parse("2011-03-21 14:19:30") + BSON.from_element(dt).should == [9, 8, "P\xED\xC8\xD8.\x01\x00\x00"] + end + it "can handle nil" do BSON.from_element(nil).should == [10, 0, ""] end - it "can handle datetimes" do - pending + it "can handle regexes" do + r = /^this$/ + BSON.from_element(r).should == [11, 8, "^this$\x00\x00"] end - it "can handle regexes" do - pending + it "can handle regexes with options" do + r = /^this$/ix + BSON.from_element(r).should == [11, 10, "^this$\x00ix\x00"] end it "can handle Javascript" do - pending + j = BSON.javascript "function() { return this; }" + BSON.from_element(j).should == [13, 32, "\x1C\x00\x00\x00function() { return this; }\x00"] end + it "can handle Javascript with scope" do + j = BSON.javascript "function() { return this; }", this: 5 + BSON.from_element(j).should == [15, 51, "3\x00\x00\x00\x1C\x00\x00\x00function() { return this; }\x00\x0F\x00\x00\x00\x10this\x00\x05\x00\x00\x00\x00"] + end + it "can handle symbols" do + BSON.from_element(:bar).should == [14, 8, "\x04\x00\x00\x00bar\x00"] + end + + it "can handle a BSON Timestamp" do + ts = BSON::Timestamp.new + BSON.from_element(ts).should == [17, 8, "\x00\x00\x00\x00\x00\x00\x00\x00"] + end + + it "recognizes the MIN value" do + BSON.from_element(BSON::MIN).should == [255, 0, ''] + end + + it "recognizes the MAX value" do + BSON.from_element(BSON::MAX).should == [127, 0, ''] + end end diff --git a/spec/crunch/bson/javascript_spec.rb b/spec/crunch/bson/javascript_spec.rb new file mode 100644 index 0000000..6cccacd --- /dev/null +++ b/spec/crunch/bson/javascript_spec.rb @@ -0,0 +1,44 @@ +#encoding: BINARY + +require File.dirname(__FILE__) + '/../../spec_helper' + +module Crunch + module BSON + describe "- javascript method" do + before(:each) do + @code = "function() { return this; }" + @scope = {'this' => 5} + end + it "takes a string" do + BSON.javascript(@code).to_s.should == "\x1C\x00\x00\x00function() { return this; }\x00" + end + + it "can take a scope" do + BSON.javascript(@code, @scope).to_s.should == "3\x00\x00\x00\x1C\x00\x00\x00function() { return this; }\x00\x0F\x00\x00\x00\x10this\x00\x05\x00\x00\x00\x00" + end + end + + describe Javascript do + before(:each) do + @code = "function() { return this; }" + @scope = {'this' => 5} + end + + it "takes a code string" do + Javascript.new(@code).to_s.should == "\x1C\x00\x00\x00function() { return this; }\x00" + end + + it "can take a scope" do + Javascript.new(@code, @scope).to_s.should == "3\x00\x00\x00\x1C\x00\x00\x00function() { return this; }\x00\x0F\x00\x00\x00\x10this\x00\x05\x00\x00\x00\x00" + end + + it "can return its element without scope" do + Javascript.new(@code).element.should == [13, 32, "\x1C\x00\x00\x00function() { return this; }\x00"] + end + + it "can return its element with scope" do + Javascript.new(@code, @scope).element.should == [15, 51, "3\x00\x00\x00\x1C\x00\x00\x00function() { return this; }\x00\x0F\x00\x00\x00\x10this\x00\x05\x00\x00\x00\x00"] + end + end + end +end \ No newline at end of file diff --git a/spec/crunch/bson/string_spec.rb b/spec/crunch/bson/string_spec.rb index 656d634..055c6ee 100644 --- a/spec/crunch/bson/string_spec.rb +++ b/spec/crunch/bson/string_spec.rb @@ -71,28 +71,21 @@ module Crunch end - describe "- from_binary method" do - it "handles an empty binary string" do - BSON.from_binary('').should == "\x00\x00\x00\x00\x00".force_encoding('BINARY') + describe "- from_regex method" do + it "returns the pattern" do + BSON.from_regex(/^foo/).should == "^foo\x00\x00" end - it "does NOT handle nil" do - ->{BSON.from_binary(nil)}.should raise_error(BSONError, /binary string/) + it "returns the options" do + BSON.from_regex(/foo$/xim).should == "foo$\x00imx\x00" end - it "does NOT convert other types" do - ->{BSON.from_binary(3.14)}.should raise_error(BSONError, /binary string/) + it "sets the Unicode option if the pattern has non-ASCII characters in it" do + BSON.from_regex(/föo/ix).should == "föo\x00iux\x00" end - - it "is binary encoded" do - BSON.from_binary("}\x99$\x00".force_encoding('BINARY')).encoding.should == Encoding::BINARY - end - - it "adds the subtype and length" do - BSON.from_binary("}\x99$\x00".force_encoding('BINARY')).should == "\x04\x00\x00\x00\x00}\x99$\x00".force_encoding('BINARY') - end - + end + end end \ No newline at end of file diff --git a/spec/crunch/bson/timestamp_spec.rb b/spec/crunch/bson/timestamp_spec.rb new file mode 100644 index 0000000..d3a1ead --- /dev/null +++ b/spec/crunch/bson/timestamp_spec.rb @@ -0,0 +1,28 @@ +#encoding: BINARY +require File.dirname(__FILE__) + '/../../spec_helper' + +module Crunch + module BSON + describe Timestamp do + before(:each) do + @existing = "\xE5\xB5\x87M\x11\x00\x00\x00" + @date = Time.gm(2011, 3, 21, 20, 32, 37) + end + + it "is null if given no parameters" do + t = Timestamp.new + t.bin.should == "\x00\x00\x00\x00\x00\x00\x00\x00" + t.time.should == Time.at(0) + t.counter.should == 0 + end + + it "returns itself if given a string parameter" do + t = Timestamp.new(@existing) + t.to_s.should == @existing + t.time.utc.should == @date + t.counter.should == 17 + end + + end + end +end \ No newline at end of file