From cc4cdad4f7dfa8b46d9a0f81f0b1445c62fdd510 Mon Sep 17 00:00:00 2001
From: orangeruan128 <orangeruan128@users.noreply.github.com>
Date: Mon, 20 Apr 2026 12:28:19 +1000
Subject: [PATCH] Make property/file content stringification type-safe

PrtFourBytesOfLengthFollowedByData.Data, FileDataStoreObject.FileData,
and the OneDocument.get_json() file contents are documented to be byte
buffers, but on real-world OneNote files they can already be Python
strs by the time stringification happens (typically because earlier
code paths decoded a UTF-16 buffer in-place, or a parser fallback
returned a hex string). When that happens, calling .hex() raises:

    AttributeError: 'str' object has no attribute 'hex'

This change keeps .hex() as the path for bytes/bytearray (no behavior
change), and adds explicit fallbacks for str (pass through, or encode
to bytes for the JSON file-contents case) and other types (repr).

No new functionality, just defensive type coercion so a single corrupt
property does not abort parsing of the whole document.
---
 pyOneNote/FileNode.py    | 22 +++++++++++++++++++---
 pyOneNote/OneDocument.py | 13 ++++++++++++-
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/pyOneNote/FileNode.py b/pyOneNote/FileNode.py
index a5f04d0..c890922 100644
--- a/pyOneNote/FileNode.py
+++ b/pyOneNote/FileNode.py
@@ -552,7 +552,12 @@ def __init__(self, file, fileNodeChunkReference):
         self.guidFooter = uuid.UUID(bytes_le=self.guidFooter)
 
     def __str__(self):
-        return self.FileData[:128].hex()
+        fd = self.FileData
+        if isinstance(fd, (bytes, bytearray)):
+            return fd[:128].hex()
+        if isinstance(fd, str):
+            return fd[:128]
+        return repr(fd)[:128]
 
 
 class ObjectSpaceObjectPropSet:
@@ -668,7 +673,14 @@ def get_properties(self):
                         try:
                             propertyVal = self.rgData[i].Data.decode('utf-16')
                         except:
-                            propertyVal = self.rgData[i].Data.hex()
+                            data = self.rgData[i].Data
+                            if isinstance(data, (bytes, bytearray)):
+                                propertyVal = data.hex()
+                            elif isinstance(data, str):
+                                # Already decoded somewhere upstream; pass through.
+                                propertyVal = data
+                            else:
+                                propertyVal = repr(data)
                 else:
                     property_name_lower =  propertyName.lower()
                     if 'time' in property_name_lower:
@@ -759,7 +771,11 @@ def __init__(self, file, propertySet):
         self.Data, = struct.unpack('{}s'.format(self.cb), file.read(self.cb))
 
     def __str__(self):
-        return self.Data.hex()
+        if isinstance(self.Data, (bytes, bytearray)):
+            return self.Data.hex()
+        if isinstance(self.Data, str):
+            return self.Data
+        return repr(self.Data)
 
 
 class PropertyID:
diff --git a/pyOneNote/OneDocument.py b/pyOneNote/OneDocument.py
index 906a606..cad58ec 100644
--- a/pyOneNote/OneDocument.py
+++ b/pyOneNote/OneDocument.py
@@ -69,8 +69,19 @@ def get_global_identification_table(self):
     def get_json(self):
         files_in_hex = {}
         for key, file in self.get_files().items():
+            content = file['content']
+            if isinstance(content, (bytes, bytearray)):
+                content_hex = content.hex()
+            elif isinstance(content, str):
+                # Already a string; encode to bytes first to keep JSON output hex-only.
+                try:
+                    content_hex = content.encode('latin-1').hex()
+                except UnicodeEncodeError:
+                    content_hex = content.encode('utf-8', errors='replace').hex()
+            else:
+                content_hex = b''.hex()
             files_in_hex[key] = {'extension': file['extension'],
-                                 'content': file['content'].hex(),
+                                 'content': content_hex,
                                  'identity': file['identity']}
 
         res = {