Skip to content

Commit

Permalink
Finished all BSON types, no kidding
Browse files Browse the repository at this point in the history
  • Loading branch information
SFEley committed Mar 21, 2011
1 parent 4d2c486 commit af59165
Show file tree
Hide file tree
Showing 11 changed files with 401 additions and 40 deletions.
12 changes: 12 additions & 0 deletions lib/crunch/bson.rb
@@ -1,4 +1,16 @@
module Crunch
module BSON
# Special constants
MIN = :bson_min
MAX = :bson_max
end
end

require 'crunch/bson/numeric'
require 'crunch/bson/string'
require 'crunch/bson/binary'
require 'crunch/bson/object_id'
require 'crunch/bson/javascript'
require 'crunch/bson/timestamp'
require 'crunch/bson/hash'

64 changes: 64 additions & 0 deletions lib/crunch/bson/binary.rb
@@ -0,0 +1,64 @@
# encoding: BINARY

module Crunch
module BSON
# Represents a string of binary data in BSON. We need a separate type for this
# to differentiate it from ordinary strings, which must be in UTF-8. For
# performance reasons, this is a fairly 'dumb' class and doesn't do much
# calculation or validation. Users should use the `BSON.binary` method
# to generate binary BSON classes.
class Binary
GENERIC = "\x00" # Default subtype
FUNCTION = "\x01" # 'Function' subtype (not well explained in BSON spec)
OLD = "\x02" # Deprecated former binary subtype; encodes length at start of data
UUID = "\x03" # UUID subtype (no special treatment)
MD5 = "\x05" # MD5 subtype (no special treatment)
USER = "\x80" # User-defined data subtype (no special treatment)

attr_reader :data, :subtype, :length

# Creates a new BSON binary object from its parameter hash. The `:length`
# option is only likely to be given if we are being given input _from_
# BSON source. Likewise, users are unlikely to provide the `:type` option
# under ordinary circumstances. (Though they can if they want to.)
# @param [String] data A binary encoded string. (Convert before passing!)
# @option opts [String] :subtype A single byte indicating the BSON binary subtype. Use the built-in constants for easy reference. Defaults to Binary::GENERIC.
# @option opts [String] :length Four little-endian bytes indicating the bytesize of the _data_ (not the full type string).
def initialize(data, opts={})
@data = data
@subtype = opts[:subtype] || GENERIC
@length = opts[:length] || BSON.from_int(@data.bytesize)
end

# Returns itself as a BSON-valid binary string.
# @see http://bsonspec.org/#/specification
def to_s
@string ||= @length << @subtype << @data
end

# Returns a three-element array with:
# 1. The binary BSON type identifier, 5
# 2. The length of the full binary string, including the length and subtype
# 3. The binary string itself
def element
@element ||= [5, to_s.bytesize, to_s]
end
end

# Produces a BSON binary type, which is defined as:
# 1. A 32-bit number giving the length in bytes of the binary string (not including the subtype from #2);
# 2. A binary data subtype, which for right now we're simply forcing to the default of 0;
# 3. The binary data string itself, with no null terminators or such.
# @param [String] data The binary data; will be cast to Encoding::BINARY if it isn't already.
# @option opts [String] :subtype A single-byte binary subtype. (See the constants in the BSON::Binary class.) If you specify Binary::OLD special treatment will be given to the
# data string, adding its length again.
# @return [String] A binary BSON data string
def self.binary(str, opts={})
str = BSON.from_int(str.bytesize) << str if opts[:subtype] == Binary::OLD
Binary.new(str.force_encoding(Encoding::BINARY), opts)
rescue NoMethodError
raise BSONError, "A binary string input is required; instead you gave: #{str}"
end

end
end
33 changes: 26 additions & 7 deletions lib/crunch/bson/hash.rb
@@ -1,4 +1,6 @@
# encoding: BINARY
require 'date'

module Crunch
module BSON
# Returns the BSON document corresponding to the given Ruby hash.
Expand Down Expand Up @@ -29,13 +31,8 @@ def self.from_element(value)
case value
when Float then [1, 8, from_float(value)]
when String # Could be an actual string _OR_ binary data
if value.encoding == Encoding::BINARY
out = from_binary(value)
[5, out.bytesize, out]
else
out = from_string(value)
[2, out.bytesize, out]
end
out = from_string(value)
[2, out.bytesize, out]
when Hash
out = from_hash(value)
[3, out.bytesize, out]
Expand All @@ -53,14 +50,36 @@ def self.from_element(value)
value.each_with_index {|val, i| h[i] = val}
out = from_hash(h)
[4, out.bytesize, out]
when BSON::Binary
value.element
when ObjectID
[7, 12, value.bin]
when false
[8, 1, "\x00"]
when true
[8, 1, "\x01"]
when Time
msec = (value.to_f * 1000).floor
[9, 8, from_int(msec, length: 8)]
when Date
msec = (value.to_datetime.to_time.to_i * 1000) # Stupid method chain, but casting to DateTime first ensures UTC zone
[9, 8, from_int(msec, length: 8)]
when nil
[10, 0, ""]
when Regexp
out = from_regex(value)
[11, out.bytesize, out]
when BSON::Javascript
value.element
when Symbol
out = from_string(value.to_s)
[14, out.bytesize, out]
when BSON::Timestamp
[17, 8, value.to_s]
when BSON::MIN
[255, 0, '']
when BSON::MAX
[127, 0, '']
else
raise BSONError, "Could not convert unknown data type to BSON: #{value}"
end
Expand Down
55 changes: 55 additions & 0 deletions lib/crunch/bson/javascript.rb
@@ -0,0 +1,55 @@
# encoding: BINARY

module Crunch
module BSON

# Represents Javascript code in BSON. We need a separate type for this
# because the BSON spec says so, and because scope blocks can optionally
# be passed to pre-assign variables. Note that no validation on the Javascript
# nor the scope are provided. Users can use the `BSON.javascript` method
# as a convenience.
class Javascript
attr_reader :code, :scope

# Returns a new Javascript object representing the code, and maybe the scope
# passed in. The BSON code returned by the Javascript#to_s method will vary
# in type based on whether a scope is given.
# @param [String] code A block of Javascript code
# @param [optional, Hash] scope A mapping of variables to values
def initialize(code, scope=nil)
@code, @scope = code, scope
end

# Returns itself as a BSON-valid binary string. If the object has a scope,
# the structure of the string will conform to the `code_w_scope` specification;
# otherwise it will simply be a string.
# @see http://bsonspec.org/#/specification
def to_s
@string ||= if @scope
code = BSON.from_string(@code)
scope = BSON.from_hash(@scope)
length = code.bytesize + scope.bytesize + 4
BSON.from_int(length) << code << scope
else
BSON.from_string(@code)
end
end

# Returns a three-element array with:
# 1. The binary BSON type identifier: 13 with no scope, or 15 with scope
# 2. The length of the full BSON binary string
# 3. The BSON binary string itself
def element
@element ||= [@scope ? 15 : 13, to_s.bytesize, to_s]
end

end

# Converts the given code and scope to a BSON::Javascript object.
# Really just a shortcut to Javascript.new for consistency with other
# one-way BSON converters (`.cstring`, `.binary`, etc.)
def self.javascript(*args)
Javascript.new(*args)
end
end
end
20 changes: 10 additions & 10 deletions lib/crunch/bson/string.rb
Expand Up @@ -26,16 +26,16 @@ def self.from_string(str, opts={})
(from_int(out.bytesize, length: 4) + out).force_encoding(Encoding::BINARY)
end

# Produces a BSON binary type, which is defined as:
# 1. A 32-bit number giving the length in bytes of the binary string (not including the subtype from #2);
# 2. A binary data subtype, which for right now we're simply forcing to the default of 0;
# 3. The binary data string itself, with no null terminators or such.
# @param [String] data The binary data; must be in Encoding::Binary.
# @return [String] A binary BSON data string
def self.from_binary(str)
from_int(str.bytesize) << 0 << str.force_encoding(Encoding::BINARY)
rescue NoMethodError
raise BSONError, "A binary string input is required; instead you gave: #{str}"
# Produces a BSON regex type, which is a concatenation of cstrings for the
# pattern and the options. The 'u' option is set as well if the encoding
# contains higher-order bytes.
def self.from_regex(regex, opts={})
optstr, opts = "", regex.options
optstr << 'i' if (opts & 1) == 1
optstr << 'm' if (opts & 4) == 4
optstr << 'u' if regex.fixed_encoding?
optstr << 'x' if (opts & 2) == 2
cstring(regex.source) + cstring(optstr, normalized: true)
end
end
end
36 changes: 36 additions & 0 deletions lib/crunch/bson/timestamp.rb
@@ -0,0 +1,36 @@
# encoding: BINARY
module Crunch
module BSON

# Represents a BSON "timestamp" type, which MongoDB uses internally and users will
# most likely never need to worry about. We include it in our implementation for
# the sake of irrational completionism.
class Timestamp

# If given no parameters, returns a "null" Timestamp (which will be filled in by
# the server.) If given a binary string, parses it into seconds and counter.
def initialize(str=nil)
if str
@string = str
else
@time, @counter, @string = Time.at(0), 0, "\x00\x00\x00\x00\x00\x00\x00\x00"
end
end

def time
@time ||= Time.at(BSON.to_int(@string[0..3]))
end

def counter
@counter ||= BSON.to_int(@string[4..7])
end

def to_s
@string
end
alias_method :bin, :to_s


end
end
end
71 changes: 71 additions & 0 deletions spec/crunch/bson/binary_spec.rb
@@ -0,0 +1,71 @@
#encoding: BINARY
require File.dirname(__FILE__) + '/../../spec_helper'
require 'digest/md5'

module Crunch
module BSON
describe "- binary method" do
it "handles an empty binary string" do
BSON.binary('').to_s.should == "\x00\x00\x00\x00\x00"
end

it "does NOT handle nil" do
->{BSON.binary(nil)}.should raise_error(BSONError, /binary string/)
end

it "does NOT convert other types" do
->{BSON.binary(3.14)}.should raise_error(BSONError, /binary string/)
end

it "returns a BSON::Binary object" do
BSON.binary("}\x99$\x00").should be_a(Binary)
end

it "adds the subtype and length" do
BSON.binary("}\x99$\x00").to_s.should == "\x04\x00\x00\x00\x00}\x99$\x00"
end
end

describe Binary do
before(:each) do
@pi = "\x1F\x85\xEBQ\xB8\x1E\t@" # 3.14 as an encoded float

end

it "can take a data string" do
Binary.new(@pi).to_s.should == "\x08\x00\x00\x00\x00#{@pi}"
end

it "can take a Function type" do
Binary.new(@pi, subtype: Binary::FUNCTION).to_s.should == "\x08\x00\x00\x00\x01#{@pi}"
end

it "can take an 'old' binary type" do
Binary.new("\x08\x00\x00\x00#{@pi}", subtype: Binary::OLD).to_s.should == "\x0C\x00\x00\x00\x02\x08\x00\x00\x00#{@pi}"
end

it "can take a UUID type" do
Binary.new(@pi, subtype: Binary::UUID).to_s.should == "\x08\x00\x00\x00\x03#{@pi}"
end

it "can take an MD5 type" do
md5 = Digest::MD5.new('foo')
Binary.new(md5.digest, subtype: Binary::MD5).to_s.should == "\x10\x00\x00\x00\x05\xD4\x1D\x8C\xD9\x8F\x00\xB2\x04\xE9\x80\t\x98\xEC\xF8B~"
end

it "can take a user defined type" do
Binary.new(@pi, subtype: Binary::USER).to_s.should == "\x08\x00\x00\x00\x80#{@pi}"
end

it "can take the length" do
Binary.new(@pi, subtype: Binary::GENERIC, length: "\x08\x00\x00\x00").to_s.should == "\x08\x00\x00\x00\x00#{@pi}"
end

it "can return a full element" do
Binary.new("\x1F\x85\xEBQ\xB8\x1E\t@").element.should == [5, 13, "\x08\x00\x00\x00\x00\x1F\x85\xEBQ\xB8\x1E\t@"]
end

end

end
end

0 comments on commit af59165

Please sign in to comment.