Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 45 additions & 10 deletions pyOneNote/FileNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@ def __init__(self, file):

class FileNodeList:
def __init__(self, file, document, file_chunk_reference):
file.seek(file_chunk_reference.stp)
# `stp` can be out-of-range (negative when interpreted signed, or > 2**63)
# on partially-corrupt or padded files. `file.seek` then raises:
# ValueError: cannot fit 'int' into an offset-sized integer
# which would otherwise abort parsing of the entire document. Treat such a
# reference as an empty list so the surrounding tree can still be parsed.
try:
file.seek(file_chunk_reference.stp)
except (OverflowError, ValueError, OSError):
self.end = file_chunk_reference.stp
self.fragments = []
return
self.end = file_chunk_reference.stp + file_chunk_reference.cb
self.fragments = []

Expand Down Expand Up @@ -469,15 +479,21 @@ def __init__(self, file, document):
self.document = document
self.current_revision = self.document.cur_revision

def _resolve_guid(self):
try:
return self.document._global_identification_table[self.current_revision][self.guidIndex]
except KeyError:
# 0xFFFFFF (16777215) is the documented "invalid" sentinel; other misses
# can also occur on cross-revision references when the global identification
# table for the current revision was not (yet) fully populated. Returning a
# readable placeholder here keeps the rest of the document parseable.
return '<unresolved guidIndex=0x{:06x}>'.format(self.guidIndex)

def __str__(self):
return '<ExtendedGUID> ({}, {})'.format(
self.document._global_identification_table[self.current_revision][self.guidIndex],
self.n)
return '<ExtendedGUID> ({}, {})'.format(self._resolve_guid(), self.n)

def __repr__(self):
return '<ExtendedGUID> ({}, {})'.format(
self.document._global_identification_table[self.current_revision][self.guidIndex],
self.n)
return '<ExtendedGUID> ({}, {})'.format(self._resolve_guid(), self.n)


class JCID:
Expand Down Expand Up @@ -569,11 +585,26 @@ def __init__(self, file, document):

class ObjectSpaceObjectStreamOfIDs:
def __init__(self, file, document):
self.header = ObjectSpaceObjectStreamHeader(file)
self.body = []
self.head = 0
try:
self.header = ObjectSpaceObjectStreamHeader(file)
except struct.error:
# Truncated stream at header read — synthesize an empty header so callers
# that check .header.OsidStreamNotPresent / .ExtendedStreamsPresent / .Count
# do not also need to special-case a missing attribute.
class _EmptyHeader:
Count = 0
ExtendedStreamsPresent = False
OsidStreamNotPresent = True
self.header = _EmptyHeader()
return
for i in range(self.header.Count):
self.body.append(CompactID(file, document))
try:
self.body.append(CompactID(file, document))
except struct.error:
# Truncated mid-stream — stop reading and let the caller use what we have.
break

def read(self):
res = None
Expand All @@ -596,7 +627,11 @@ def __init__(self, file):
class PropertySet:
def __init__(self, file, OIDs, OSIDs, ContextIDs, document):
self.current = file.tell()
self.cProperties, = struct.unpack('<H', file.read(2))
try:
self.cProperties, = struct.unpack('<H', file.read(2))
except struct.error:
# Truncated stream — treat as an empty property set.
self.cProperties = 0
self.rgPrids = []
self.indent = ''
self.document = document
Expand Down