diff --git a/README.md b/README.md index 01f1c15..7331933 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,27 @@ File.open("test.gif", "wb") { |f| f.write(header) } => "GIF89a\x10\x00\x10\x00\x80\x00\x00" ``` +## Note about bit and nibble formats + +BinaryStruct supports bit formats and nibble formats, however note that the +underlying Ruby methods, [pack](http://ruby-doc.org/core-2.2.0/Array.html#method-i-pack) +and [unpack](http://ruby-doc.org/core-2.2.0/String.html#method-i-unpack), support +less than 8 bits by reading an entire byte, even if all of the bits are not used. + +For example, + +```ruby +s = "\xFF\x00" # binary: 1111111100000000 +s.unpack("b8b8") # => ["11111111", "00000000"] +s.unpack("b4b4b4b4") # => ["1111", "0000", "", ""] +``` + +One might expect that the latter would read 4 bits, then the next 4 bits, etc, +yielding `["1111", "1111", "0000", "0000"]`, but that is not the case. Instead, +the first b4 reads a full byte's worth, then discards the unused 4 bits, and the +same happens for the next b4. The third and fourth b4 have nothing left to read, +and so just return empty strings. + ## Installation Add this line to your application's Gemfile: diff --git a/lib/binary_struct.rb b/lib/binary_struct.rb index da3add3..b479fff 100644 --- a/lib/binary_struct.rb +++ b/lib/binary_struct.rb @@ -5,16 +5,16 @@ class BinaryStruct SIZES = { 'A' => 1, # String with trailing NULs and spaces removed 'a' => 1, # String - 'B' => nil, # Extract bits from each character (MSB first) - 'b' => nil, # Extract bits from each character (LSB first) + 'B' => 1, # Extract bits from each character (MSB first) + 'b' => 1, # Extract bits from each character (LSB first) 'C' => 1, # Extract a character as an unsigned integer 'c' => 1, # Extract a character as a signed integer 'E' => nil, # Treat sizeof(double) characters as a double in little-endian byte order 'e' => nil, # Treat sizeof(float) characters as a float in little-endian byte order 'G' => nil, # Treat sizeof(double) characters as a double in network byte order 'g' => nil, # Treat sizeof(float) characters as a float in network byte order - 'H' => nil, # Extract hex nibbles from each character (most significant first) - 'h' => nil, # Extract hex nibbles from each character (least significant first) + 'H' => 1, # Extract hex nibbles from each character (most significant first) + 'h' => 1, # Extract hex nibbles from each character (least significant first) 'I' => 4, # Treat sizeof(int) successive characters as an unsigned native integer 'i' => 4, # Treat sizeof(int) successive characters as a signed native integer 'L' => 4, # Treat 4 successive characters as an unsigned native long integer @@ -39,7 +39,9 @@ class BinaryStruct 'Z' => 1, # String with trailing NULs removed } - STRING_FORMATS = %w(A a M m u) + STRING_FORMATS = %w(A a B b H h M m u) + BIT_FORMATS = %w(B b) + NIBBLE_FORMATS = %w(H h) ENDIAN_FORMATS = %w(I i L l Q q S s) ENDIAN_MODIFIERS = %w(> <) MODIFIERS = ENDIAN_MODIFIERS @@ -202,7 +204,14 @@ def self.get_size(definition) modifier, modcount = count[0, 1], count[1..-1] count = modcount if valid_definition_entry_modifier?(modifier) count = count.empty? ? 1 : count.to_i - size += (count * SIZES[type]) + size += + if BIT_FORMATS.include?(type) + (count / 8.0).ceil + elsif NIBBLE_FORMATS.include?(type) + (count / 2.0).ceil + else + count * SIZES[type] + end end size end diff --git a/spec/binary_struct_spec.rb b/spec/binary_struct_spec.rb index 8b9c572..30ef286 100644 --- a/spec/binary_struct_spec.rb +++ b/spec/binary_struct_spec.rb @@ -1,25 +1,33 @@ describe BinaryStruct do STRUCT_DEF = [ - 'Q', :quad, - 'L', 'long', - 'S', :short, - 'C', nil, + 'Q', :quad, + 'L', 'long', + 'S', :short, + 'C', nil, + 'b5', :binary, 'a0', 'none', - 'a', nil, + 'a', nil, 'a2', 'bc', ] - STRUCT_DEF_SIZE = 18 + STRUCT_DEF_SIZE = 19 STRUCT_DEF_ASTERISK = ['a*', :word] STRUCT_DEF_ASTERISK_SIZE = 0 # '*' is ignored STRUCT_DEF_UNRECOGNIZED_FORMAT = ['D', nil] - STRUCT_DEF_UNSUPPORTED_FORMAT = ['B', nil] + STRUCT_DEF_UNSUPPORTED_FORMAT = ['U', nil] STRUCT_DEF_UNSUPPORTED_COUNT_NEG = ['a-1', nil] STRUCT_DEF_UNSUPPORTED_COUNT_INV = ['aX', nil] - STRUCT_ENCODED_STR = "\000\111\222\333\444\555\666\777\000\111\222\333\000\111\0000BC".force_encoding("ASCII-8BIT") - STRUCT_DECODED_HASH = {:quad=>18426034930503010560, "long"=>3683797248, :short=>18688, "bc"=>"BC", "none"=>""} + STRUCT_ENCODED_STR = "\000\111\222\333\444\555\666\777\000\111\222\333\000\111\000\0320BC".force_encoding("ASCII-8BIT") + STRUCT_DECODED_HASH = { + :quad => 18_426_034_930_503_010_560, + "long" => 3_683_797_248, + :short => 18_688, + :binary => "01011", + "bc" => "BC", + "none" => "" + } it('.new') { expect { BinaryStruct.new }.not_to raise_error } it('.new with definition') { expect { BinaryStruct.new(STRUCT_DEF) }.not_to raise_error } diff --git a/spec/endian_spec.rb b/spec/endian_spec.rb index 3e10b09..7028364 100644 --- a/spec/endian_spec.rb +++ b/spec/endian_spec.rb @@ -1,14 +1,15 @@ describe BinaryStruct do BIG_STRUCT_DEF = [ - 'Q>', :quad, - 'L>', 'long', - 'S>', :short, - 'C', nil, - 'a0', 'none', - 'a', nil, - 'a2', 'bc', + 'Q>', :quad, + 'L>', 'long', + 'S>', :short, + 'C', nil, + 'b5', :binary, + 'a0', 'none', + 'a', nil, + 'a2', 'bc', ] - BIG_STRUCT_DEF_SIZE = 18 + BIG_STRUCT_DEF_SIZE = 19 BIG_E_QUAD_STRUCT_DEF = ['Q>2', :quad] BIG_E_QUAD_DEF_SIZE = 16 @@ -21,15 +22,16 @@ BIG_STRUCT_DEF_INVALID_ENDIAN_MODIFIER = ['Q_', nil] LIL_STRUCT_DEF = [ - 'Q<', :quad, - 'L<', 'long', - 'S<', :short, - 'C', nil, - 'a0', 'none', - 'a', nil, - 'a2', 'bc', + 'Q<', :quad, + 'L<', 'long', + 'S<', :short, + 'C', nil, + 'b5', :binary, + 'a0', 'none', + 'a', nil, + 'a2', 'bc', ] - LIL_STRUCT_DEF_SIZE = 18 + LIL_STRUCT_DEF_SIZE = 19 LIL_E_QUAD_STRUCT_DEF = ['Q<2', :quad] LIL_E_QUAD_DEF_SIZE = 16 @@ -40,18 +42,27 @@ LIL_STRUCT_DEF_UNRECOG_ENDIAN_FMT = ['Y<', nil] LIL_STRUCT_DEF_UNSUPPORTED_ENDIAN_ATTRIBUTE = ['A<', nil] - END_STRUCT_ENCODED_STR = "\000\111\222\333\444\555\666\777\000\111\222\333\000\111\0000BC" - - LIL_ENDIAN_STRUCT_DECODED_HASH = {:quad => 18_426_034_930_503_010_560, - "long" => 3_683_797_248, - :short => 18_688, - "bc" => "BC", - "none" => ""} - BIG_ENDIAN_STRUCT_DECODED_HASH = {:quad => 20_709_143_206_541_055, - "long" => 4_821_723, - :short => 73, - "none" => "", - "bc" => "BC"} + END_STRUCT_ENCODED_STR = "\000\111\222\333\444\555\666\777\000\111\222\333\000\111\000\0320BC" + + LIL_ENDIAN_STRUCT_DECODED_HASH = + { + :quad => 18_426_034_930_503_010_560, + "long" => 3_683_797_248, + :short => 18_688, + :binary => "01011", + "none" => "", + "bc" => "BC", + } + + BIG_ENDIAN_STRUCT_DECODED_HASH = + { + :quad => 20_709_143_206_541_055, + "long" => 4_821_723, + :short => 73, + :binary => "01011", + "none" => "", + "bc" => "BC" + } it('.new') { expect { BinaryStruct.new }.not_to raise_error } it('.new with big definition') { expect { BinaryStruct.new(BIG_STRUCT_DEF) }.not_to raise_error }