Skip to content

Commit

Permalink
metadata: New tag encoding scheme.
Browse files Browse the repository at this point in the history
EBML tags are encoded in a variable-length unsigned int (vuint),
which is clever but causes some tags to be encoded in two bytes
while there are really about 180 tags or so. Assuming that there
wouldn't be, say, over 1,000 tags in the future, we can use much
more efficient encoding scheme. The new scheme should support
at most 4,096 tags anyway.

This also flattens a scattered tag namespace (did you know that
0xa9 is followed by 0xb0?) and makes a room for autoserialized tags
in 0x00 through 0x1f.
  • Loading branch information
lifthrasiir committed Mar 3, 2015
1 parent ac20ded commit 38a965a
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 149 deletions.
43 changes: 35 additions & 8 deletions src/librbml/lib.rs
@@ -1,4 +1,4 @@
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
Expand Down Expand Up @@ -115,6 +115,7 @@ pub enum EbmlEncoderTag {
#[derive(Debug)]
pub enum Error {
IntTooBig(uint),
InvalidTag(uint),
Expected(String),
IoError(std::old_io::IoError),
ApplicationError(String)
Expand Down Expand Up @@ -142,7 +143,7 @@ pub mod reader {
EsMapLen, EsMapKey, EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64,
EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
EsEnumBody, EsUint, EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc,
Error, IntTooBig, Expected };
Error, IntTooBig, InvalidTag, Expected };

pub type DecodeResult<T> = Result<T, Error>;
// rbml reading
Expand All @@ -165,6 +166,18 @@ pub mod reader {
pub next: uint
}

pub fn tag_at(data: &[u8], start: uint) -> DecodeResult<Res> {
let v = data[start] as uint;
if v < 0xf0 {
Ok(Res { val: v, next: start + 1 })
} else if v > 0xf0 {
Ok(Res { val: ((v & 0xf) << 8) | data[start + 1] as uint, next: start + 2 })
} else {
// every tag starting with byte 0xf0 is an overlong form, which is prohibited.
Err(InvalidTag(v))
}
}

#[inline(never)]
fn vuint_at_slow(data: &[u8], start: uint) -> DecodeResult<Res> {
let a = data[start];
Expand Down Expand Up @@ -238,7 +251,7 @@ pub mod reader {
}

pub fn doc_at<'a>(data: &'a [u8], start: uint) -> DecodeResult<TaggedDoc<'a>> {
let elt_tag = try!(vuint_at(data, start));
let elt_tag = try!(tag_at(data, start));
let elt_size = try!(vuint_at(data, elt_tag.next));
let end = elt_size.next + elt_size.val;
Ok(TaggedDoc {
Expand All @@ -250,7 +263,7 @@ pub mod reader {
pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: uint) -> Option<Doc<'a>> {
let mut pos = d.start;
while pos < d.end {
let elt_tag = try_or!(vuint_at(d.data, pos), None);
let elt_tag = try_or!(tag_at(d.data, pos), None);
let elt_size = try_or!(vuint_at(d.data, elt_tag.next), None);
pos = elt_size.next + elt_size.val;
if elt_tag.val == tg {
Expand All @@ -276,7 +289,7 @@ pub mod reader {
{
let mut pos = d.start;
while pos < d.end {
let elt_tag = try_or!(vuint_at(d.data, pos), false);
let elt_tag = try_or!(tag_at(d.data, pos), false);
let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
pos = elt_size.next + elt_size.val;
let doc = Doc { data: d.data, start: elt_size.next, end: pos };
Expand All @@ -292,7 +305,7 @@ pub mod reader {
{
let mut pos = d.start;
while pos < d.end {
let elt_tag = try_or!(vuint_at(d.data, pos), false);
let elt_tag = try_or!(tag_at(d.data, pos), false);
let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
pos = elt_size.next + elt_size.val;
if elt_tag.val == tg {
Expand Down Expand Up @@ -718,6 +731,20 @@ pub mod writer {
size_positions: Vec<uint>,
}

fn write_tag<W: Writer>(w: &mut W, n: uint) -> EncodeResult {
if n < 0xf0 {
w.write_all(&[n as u8])
} else if 0x100 <= n && n < 0x1000 {
w.write_all(&[0xf0 | (n >> 8) as u8, n as u8])
} else {
Err(old_io::IoError {
kind: old_io::OtherIoError,
desc: "invalid tag",
detail: Some(format!("{}", n))
})
}
}

fn write_sized_vuint<W: Writer>(w: &mut W, n: uint, size: uint) -> EncodeResult {
match size {
1 => w.write_all(&[0x80u8 | (n as u8)]),
Expand Down Expand Up @@ -766,7 +793,7 @@ pub mod writer {
debug!("Start tag {:?}", tag_id);

// Write the enum ID:
try!(write_vuint(self.writer, tag_id));
try!(write_tag(self.writer, tag_id));

// Write a placeholder four-byte size.
self.size_positions.push(try!(self.writer.tell()) as uint);
Expand Down Expand Up @@ -795,7 +822,7 @@ pub mod writer {
}

pub fn wr_tagged_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult {
try!(write_vuint(self.writer, tag_id));
try!(write_tag(self.writer, tag_id));
try!(write_vuint(self.writer, b.len()));
self.writer.write_all(b)
}
Expand Down

0 comments on commit 38a965a

Please sign in to comment.