Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Philip Zeyliger committed Feb 3, 2010
1 parent a01ba18 commit 3832584
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions lang/py/src/avro/datafile.py
Expand Up @@ -79,11 +79,11 @@ def __init__(self, writer, datum_writer, writers_schema=None, codec=None):
self._block_count = 0
self._meta = {}


if writers_schema is not None:
if codec is None:
codec = 'null'
assert codec in VALID_CODECS, "Unknown codec: " + codec
if codec not in VALID_CODECS:
raise DataFileException("Unknown codec: " + codec)
self._sync_marker = DataFileWriter.generate_sync_marker()
self.set_meta('avro.codec', codec)
self.set_meta('avro.schema', str(writers_schema))
Expand Down Expand Up @@ -133,7 +133,6 @@ def _write_header(self):
self.datum_writer.write_data(META_SCHEMA, header, self.encoder)

# TODO(hammer): make a schema for blocks and use datum_writer
# TODO(hammer): use codec when writing the block contents
def _write_block(self):
if self.block_count > 0:
# write number of items in block
Expand All @@ -146,7 +145,7 @@ def _write_block(self):
elif self.get_meta(CODEC_KEY) == 'deflate':
# The first two characters and last character are zlib
# wrappers around deflate data.
compressed_data = zlib.compress(self.buffer_writer.getvalue())[2:-1]
compressed_data = zlib.compress(uncompressed_data)[2:-1]
else:
fail_msg = '"%s" codec is not supported.' % self.get_meta(CODEC_KEY)
raise DataFileException(fail_msg)
Expand Down Expand Up @@ -210,7 +209,6 @@ def __init__(self, reader, datum_reader):
self.codec = "null"
if self.codec not in VALID_CODECS:
raise DataFileException('Unknown codec: %s.' % self.codec)
self.codec = self.codec

# get file length
self._file_length = self.determine_file_length()
Expand Down Expand Up @@ -260,7 +258,8 @@ def _read_header(self):
self.reader.seek(0, 0)

# read header into a dict
header = self.datum_reader.read_data(META_SCHEMA, META_SCHEMA, self.raw_decoder)
header = self.datum_reader.read_data(
META_SCHEMA, META_SCHEMA, self.raw_decoder)

# check magic number
if header.get('magic') != MAGIC:
Expand All @@ -278,9 +277,11 @@ def _read_block_header(self):
self.block_count = self.raw_decoder.read_long()
if self.codec == "null":
# Skip a long; we don't need to use the length.
self.raw_decoder.read_long()
self.raw_decoder.skip_long()
self._datum_decoder = self._raw_decoder
else:
# Compressed data is stored as (length, data), which
# corresponds to have bytes is stored.
data = self.raw_decoder.read_bytes()
uncompressed = zlib.decompress(data, -15)
self._datum_decoder = io.BinaryDecoder(cStringIO.StringIO(uncompressed))
Expand Down

0 comments on commit 3832584

Please sign in to comment.