From cc4cdad4f7dfa8b46d9a0f81f0b1445c62fdd510 Mon Sep 17 00:00:00 2001 From: orangeruan128 Date: Mon, 20 Apr 2026 12:28:19 +1000 Subject: [PATCH] Make property/file content stringification type-safe PrtFourBytesOfLengthFollowedByData.Data, FileDataStoreObject.FileData, and the OneDocument.get_json() file contents are documented to be byte buffers, but on real-world OneNote files they can already be Python strs by the time stringification happens (typically because earlier code paths decoded a UTF-16 buffer in-place, or a parser fallback returned a hex string). When that happens, calling .hex() raises: AttributeError: 'str' object has no attribute 'hex' This change keeps .hex() as the path for bytes/bytearray (no behavior change), and adds explicit fallbacks for str (pass through, or encode to bytes for the JSON file-contents case) and other types (repr). No new functionality, just defensive type coercion so a single corrupt property does not abort parsing of the whole document. --- pyOneNote/FileNode.py | 22 +++++++++++++++++++--- pyOneNote/OneDocument.py | 13 ++++++++++++- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/pyOneNote/FileNode.py b/pyOneNote/FileNode.py index a5f04d0..c890922 100644 --- a/pyOneNote/FileNode.py +++ b/pyOneNote/FileNode.py @@ -552,7 +552,12 @@ def __init__(self, file, fileNodeChunkReference): self.guidFooter = uuid.UUID(bytes_le=self.guidFooter) def __str__(self): - return self.FileData[:128].hex() + fd = self.FileData + if isinstance(fd, (bytes, bytearray)): + return fd[:128].hex() + if isinstance(fd, str): + return fd[:128] + return repr(fd)[:128] class ObjectSpaceObjectPropSet: @@ -668,7 +673,14 @@ def get_properties(self): try: propertyVal = self.rgData[i].Data.decode('utf-16') except: - propertyVal = self.rgData[i].Data.hex() + data = self.rgData[i].Data + if isinstance(data, (bytes, bytearray)): + propertyVal = data.hex() + elif isinstance(data, str): + # Already decoded somewhere upstream; pass through. + propertyVal = data + else: + propertyVal = repr(data) else: property_name_lower = propertyName.lower() if 'time' in property_name_lower: @@ -759,7 +771,11 @@ def __init__(self, file, propertySet): self.Data, = struct.unpack('{}s'.format(self.cb), file.read(self.cb)) def __str__(self): - return self.Data.hex() + if isinstance(self.Data, (bytes, bytearray)): + return self.Data.hex() + if isinstance(self.Data, str): + return self.Data + return repr(self.Data) class PropertyID: diff --git a/pyOneNote/OneDocument.py b/pyOneNote/OneDocument.py index 906a606..cad58ec 100644 --- a/pyOneNote/OneDocument.py +++ b/pyOneNote/OneDocument.py @@ -69,8 +69,19 @@ def get_global_identification_table(self): def get_json(self): files_in_hex = {} for key, file in self.get_files().items(): + content = file['content'] + if isinstance(content, (bytes, bytearray)): + content_hex = content.hex() + elif isinstance(content, str): + # Already a string; encode to bytes first to keep JSON output hex-only. + try: + content_hex = content.encode('latin-1').hex() + except UnicodeEncodeError: + content_hex = content.encode('utf-8', errors='replace').hex() + else: + content_hex = b''.hex() files_in_hex[key] = {'extension': file['extension'], - 'content': file['content'].hex(), + 'content': content_hex, 'identity': file['identity']} res = {