Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
**v0.53.1**
* Expanded allowable range for `red-black-tree-mod`.
* Fix issue with `MessageBase.asEmailMessage()` that prevented embedded MSG files from being attached.
* Expand allowable versions of `BeautifulSoup4`.

**v0.53.0**
* Added tests for many functions in `extract_msg.utils`.
* Fix an issue in `extract_msg.utils.msgPathToString()` that prevented backslashes from being replaced with forward slashes.
* Change the behavior of `extract_msg.utils.minutesToDurationStr()` to properly use plurals.
* Fixed issue in `extract_msg.utils.unwrapMsg()` that would prevent it from working on signed messages due to an API change.
* Added new exception `MimetypeFailureError`.
* Modified the logic of `MessageBase.asEmailMessage()` to use `AttachmentBase/SignedAttachment.name` instead of `getFilename()` which only exists on AttachmentBase.
* Modified the logic of `MessageBase.htmlBodyPrepared()` to properly put the mimetype in image tags to ensure rendering. Logic was also modified to use `encode` instead of `prettify` to reduce computation and output size.

**v0.52.0**
* [[TeamMsgExtractor #444](https://github.com/TeamMsgExtractor/msg-extractor/issues/444)] Fix typo in string that prevented HTML body from generating from the plain text body properly.
* Adjusted the behavior of `MSGFile.areStringsUnicode` to prioritize the property specified by the parent MSG files for MSG files that are embedded. Additionally, added a fallback to rely on whether or not there is a stream using the `001F` type to determine the property value if it is entirely missing.
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.52.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.52.0/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.53.1-blue.svg
:target: https://pypi.org/project/extract-msg/0.53.1/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3810/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2024-10-22'
__version__ = '0.52.0'
__date__ = '2025-02-05'
__version__ = '0.53.1'

__all__ = [
# Modules:
Expand Down
5 changes: 5 additions & 0 deletions extract_msg/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ class InvalidPropertyIdError(ExMsgBaseException):
The provided property ID was invalid.
"""

class MimetypeFailureError(ExMsgBaseException):
"""
The mimetype was unable to be properly determined when it was mandatory.
"""

class NotWritableError(ExMsgBaseException):
"""
Modification was attempted on an instance that is not writable.
Expand Down
41 changes: 31 additions & 10 deletions extract_msg/msg_classes/message_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
)
from ..exceptions import (
ConversionError, DataNotFoundError, DeencapMalformedData,
DeencapNotEncapsulated, IncompatibleOptionsError, WKError
DeencapNotEncapsulated, IncompatibleOptionsError, MimetypeFailureError,
WKError
)
from .msg import MSGFile
from ..structures.report_tag import ReportTag
Expand Down Expand Up @@ -178,13 +179,10 @@ def asEmailMessage(self) -> EmailMessage:
if att.dataType:
if hasattr(att.dataType, 'asEmailMessage'):
# Replace the extension with '.eml'.
filename = att.getFilename()
filename = att.name or ''
if filename.lower().endswith('.msg'):
filename = filename[:-4] + '.eml'
msgMain.add_attachment(
att.data.asEmailMessage(),
filename = filename,
cid = att.contentId)
msgMain.attach(att.data.asEmailMessage())
else:
if issubclass(att.dataType, bytes):
data = att.data
Expand Down Expand Up @@ -1198,12 +1196,35 @@ def htmlBodyPrepared(self) -> Optional[bytes]:
for tag in tags:
# Iterate through the attachments until we get the right one.
cid = tag['src'][4:]
data = next((attachment.data for attachment in self.attachments if attachment.cid == cid), None)
att = next((attachment for attachment in self.attachments if hasattr(attachment, 'cid') and attachment.cid == cid), None)
# If we found anything, inject it.
if data:
tag['src'] = (b'data:image;base64,' + base64.b64encode(data)).decode('utf-8')
if att and isinstance(att.data, bytes):
# Try to get the mimetype. If we can't, see if the item has an
# extension and guess the mimtype for a few known ones.
mime = att.mimetype
if not mime:
ext = (att.name or '').split('.')[-1].lower()
if ext == 'png':
mime = 'image/png'
elif ext == 'jpg' or ext == 'jpeg':
mime = 'image/jpeg'
elif ext == 'gif':
mime = 'image/gif'
elif ext == 'tiff' or ext == 'tif':
mime = 'image/tif'
elif ext == 'bmp':
mime = 'image/bmp'
elif ext == 'svg':
mime = 'image/svg+xml'
# Final check.
if mime:
tag['src'] = (b'data:' + mime.encode() + b';base64,' + base64.b64encode(att.data)).decode('utf-8')
else:
# We don't know what to actually put for this item, and we
# really should never end up here, so throw an error.
raise MimetypeFailureError('Could not get the mimetype to use for htmlBodyPrepared.')

return soup.prettify('utf-8')
return soup.encode('utf-8')

@functools.cached_property
def htmlInjectableHeader(self) -> str:
Expand Down
14 changes: 11 additions & 3 deletions extract_msg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,17 @@ def minutesToDurationStr(minutes: int) -> str:
return '1 minute'
elif minutes < 60:
return f'{minutes} minutes'
elif minutes == 60:
return '1 hour'
elif minutes % 60 == 0:
return f'{minutes // 60} hours'
elif minutes < 120:
if minutes == 61:
return f'1 hour 1 minute'
else:
return f'1 hour {minutes - 60} minutes'
elif minutes % 60 == 1:
return f'{minutes // 60} hours 1 minute'
else:
return f'{minutes // 60} hours {minutes % 60} minutes'

Expand All @@ -709,8 +718,7 @@ def msgPathToString(inp: Union[str, Iterable[str]]) -> str:
"""
if not isinstance(inp, str):
inp = '/'.join(inp)
inp.replace('\\', '/')
return inp
return inp.replace('\\', '/')


def parseType(_type: int, stream: Union[int, bytes], encoding: str, extras: Sequence[bytes]):
Expand Down Expand Up @@ -1094,7 +1102,7 @@ def unwrapMsg(msg: MSGFile) -> Dict[str, List]:
msgFiles.append(att.data)
toProcess.append(att.data)
if isinstance(currentItem, MessageSignedBase):
raw += currentItem._rawAttachments
raw += currentItem.rawAttachments

return {
'attachments': attachments,
Expand Down
2 changes: 2 additions & 0 deletions extract_msg_tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
'OleWriterEditingTests',
'OleWriterExportTests',
'PropTests',
'UtilTests',
'ValidationTests',
]

from .attachment_tests import AttachmentTests
from .cmd_line_tests import CommandLineTests
from .ole_writer_tests import OleWriterEditingTests, OleWriterExportTests
from .prop_tests import PropTests
from .util_tests import UtilTests
from .validation_tests import ValidationTests
61 changes: 61 additions & 0 deletions extract_msg_tests/util_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
__all__ = [
'UtilTests',
]


import unittest

from extract_msg import utils


class UtilTests(unittest.TestCase):
def test_dictGetCasedKey(self):
caseDict = {'hello': 1, 'HeUtQjWkW': 2}

self.assertEqual(utils.dictGetCasedKey(caseDict, 'Hello'), 'hello')
self.assertEqual(utils.dictGetCasedKey(caseDict, 'heutqjwkw'), 'HeUtQjWkW')
with self.assertRaises(KeyError):
utils.dictGetCasedKey(caseDict, 'jjjjj')

def test_divide(self):
inputString = '12345678901234567890'
expectedOutputs = {
1: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'],
2: ['12', '34', '56', '78', '90', '12', '34', '56', '78', '90'],
3: ['123', '456', '789', '012', '345', '678', '90'],
4: ['1234', '5678', '9012', '3456', '7890'],
5: ['12345', '67890', '12345', '67890'],
6: ['123456', '789012', '345678', '90'],
7: ['1234567', '8901234', '567890'],
8: ['12345678', '90123456', '7890'],
9: ['123456789', '012345678', '90'],
10: ['1234567890', '1234567890'],
11: ['12345678901', '234567890'],
}

for divideBy, expectedResult in expectedOutputs.items():
self.assertListEqual(utils.divide(inputString, divideBy), expectedResult)

def test_makeWeakRef(self):
self.assertIsNone(utils.makeWeakRef(None))
class TestClass:
pass
self.assertIsNotNone(utils.makeWeakRef(TestClass()))

def test_minutesToDurationStr(self):
self.assertEqual(utils.minutesToDurationStr(0), '0 hours')
self.assertEqual(utils.minutesToDurationStr(1), '1 minute')
self.assertEqual(utils.minutesToDurationStr(2), '2 minutes')
self.assertEqual(utils.minutesToDurationStr(59), '59 minutes')
self.assertEqual(utils.minutesToDurationStr(60), '1 hour')
self.assertEqual(utils.minutesToDurationStr(61), '1 hour 1 minute')
self.assertEqual(utils.minutesToDurationStr(62), '1 hour 2 minutes')
self.assertEqual(utils.minutesToDurationStr(120), '2 hours')
self.assertEqual(utils.minutesToDurationStr(121), '2 hours 1 minute')
self.assertEqual(utils.minutesToDurationStr(122), '2 hours 2 minutes')

def test_msgPathToStr(self):
self.assertEqual(utils.msgPathToString('hello/world/one'), 'hello/world/one')
self.assertEqual(utils.msgPathToString('hello/world\\one'), 'hello/world/one')
self.assertEqual(utils.msgPathToString(['hello', 'world', 'one']), 'hello/world/one')
self.assertEqual(utils.msgPathToString(['hello\\world', 'one']), 'hello/world/one')
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ olefile==0.47
tzlocal>=4.2,<6
compressed-rtf>=1.0.6,<2
ebcdic>=1.1.1,<2
beautifulsoup4>=4.11.1,<4.13
beautifulsoup4>=4.11.1,<4.14
RTFDE>=0.1.1,<0.2
red-black-tree-mod==1.20
red-black-tree-mod>=1.20, <=1.23