Skip to content

Commit

Permalink
v0.28.0
Browse files Browse the repository at this point in the history
  • Loading branch information
TheElementalOfDestruction committed Jan 7, 2021
1 parent 5a16c7e commit 9b919f5
Show file tree
Hide file tree
Showing 14 changed files with 290 additions and 208 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,9 @@
**v0.28.0**
* [[TeamMsgExtractor #87](https://github.com/TeamMsgExtractor/msg-extractor/issues/87)] Added a new system to handle `NotImplementedError` and other exceptions. All msg classes now have an option called `attachmentErrorBehavior` that tells the class what to do if it has an error. The value should be one of three constants: `ATTACHMENT_ERROR_THROW`, `ATTACHMENT_ERROR_NOT_IMPLEMENTED`, or `ATTACHMENT_ERROR_BROKEN`. `ATTACHMENT_ERROR_THROW` tells the class to not catch and exceptions and just let the user handle them. `ATTACHMENT_ERROR_NOT_IMPLEMENTED` tells the class to catch `NotImplementedError` exceptions and put an instance of `UnsupportedAttachment` in place of a regular attachment. `ATTACHMENT_ERROR_BROKEN` tells the class to catch *all* exceptions and either replace the attachment with `UnsupportedAttachment` if it is a `NotImplementedError` or `BrokenAttachment` for all other exceptions. With both of those options, caught exceptions will be logged.
* In making the previous point work, much code from `Attachment` has been moved to a new class called `AttachmentBase`. Both `BrokenAttachment` and `UnsupportedAttachment` are subclasses of `AttachmentBase` meaning data can be extracted from their streams in the same way as a functioning attachment.
* [[TeamMsgExtractor #162](https://github.com/TeamMsgExtractor/msg-extractor/issues/162)] Pretty sure I actually got it this time. The execution flag should be applied by pip now.
* Fixed typos in some exceptions

**v0.27.16**
* [[TeamMsgExtractor #177](https://github.com/TeamMsgExtractor/msg-extractor/issues/177)] Fixed incorrect struct being used. It should be the correct one now, but further testing will be required to confirm this.
* Fixed log error message in `extract_msg.prop` to actually format a value into the message.
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Expand Up @@ -180,8 +180,8 @@ Credits
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.27.16-blue.svg
:target: https://pypi.org/project/extract-msg/0.27.16/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.28.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.28.0/

.. |PyPI1| image:: https://img.shields.io/badge/python-2.7+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-2715/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'The Elemental of Destruction & Matthew Walker'
__date__ = '2021-01-06'
__version__ = '0.27.16'
__date__ = '2021-01-07'
__version__ = '0.28.0'

import logging

Expand Down
5 changes: 3 additions & 2 deletions extract_msg/appointment.py
@@ -1,3 +1,4 @@
from extract_msg import constants
from extract_msg.attachment import Attachment
from extract_msg.message_base import MessageBase

Expand All @@ -6,8 +7,8 @@ class Appointment(MessageBase):
Parser for Microsoft Outlook Appointment files.
"""

def __init__(self, path, prefix = '', attachmentClass = Attachment, filename = None, delayAttachments = False, overrideEncoding = None):
MessageBase.__init__(self, path, prefix, attachmentClass, filename, delayAttachments, overrideEncoding)
def __init__(self, path, prefix = '', attachmentClass = Attachment, filename = None, delayAttachments = False, overrideEncoding = None, attachmentErrorBehavior = constants.ATTACHMENT_ERROR_THROW):
MessageBase.__init__(self, path, prefix, attachmentClass, filename, delayAttachments, overrideEncoding, attachmentErrorBehavior)

@property
def appointmentClassType(self):
Expand Down
200 changes: 20 additions & 180 deletions extract_msg/attachment.py
Expand Up @@ -3,6 +3,7 @@
import string

from extract_msg import constants
from extract_msg.attachment_base import AttachmentBase
from extract_msg.named import NamedAttachmentProperties
from extract_msg.prop import FixedLengthProp, VariableLengthProp
from extract_msg.properties import Properties
Expand All @@ -12,7 +13,7 @@
logger.addHandler(logging.NullHandler())


class Attachment(object):
class Attachment(AttachmentBase):
"""
Stores the attachment data of a Message instance.
Should the attachment be an embeded message, the
Expand All @@ -25,176 +26,28 @@ def __init__(self, msg, dir_):
:param msg: the Message instance that the attachment belongs to.
:param dir_: the directory inside the msg file where the attachment is located.
"""
object.__init__(self)
self.__msg = msg
self.__dir = dir_
self.__props = Properties(self._getStream('__properties_version1.0'),
constants.TYPE_ATTACHMENT)
self.__namedProperties = NamedAttachmentProperties(self)
AttachmentBase.__init__(self, msg, dir_)

# Get attachment data
if self.Exists('__substg1.0_37010102'):
self.__type = 'data'
self.__data = self._getStream('__substg1.0_37010102')
elif self.Exists('__substg1.0_3701000D'):
if (self.__props['37050003'].value & 0x7) != 0x5:
if (self.props['37050003'].value & 0x7) != 0x5:
raise NotImplementedError(
'Current version of extract_msg does not support extraction of containers that are not embedded msg files.')
# TODO add implementation
else:
self.__prefix = msg.prefixList + [dir_, '__substg1.0_3701000D']
self.__type = 'msg'
self.__data = openMsg(self.msg.path, self.__prefix, self.__class__, overrideEncoding = msg.overrideEncoding)
self.__data = openMsg(self.msg.path, self.__prefix, self.__class__, overrideEncoding = msg.overrideEncoding, attachmentErrorBehavior = msg.attachmentErrorBehavior)
elif (self.__props['37050003'].value & 0x7) == 0x7:
# TODO Handling for special attacment type 0x7
self.__type = 'web'
raise NotImplementedError('Attachments of type afByWebReference are not currently supported.')
else:
raise TypeError('Unknown attachment type.')

def _ensureSet(self, variable, streamID, stringStream = True):
"""
Ensures that the variable exists, otherwise will set it using the specified stream.
After that, return said variable.
If the specified stream is not a string stream, make sure to set :param string stream: to False.
"""
try:
return getattr(self, variable)
except AttributeError:
if stringStream:
value = self._getStringStream(streamID)
else:
value = self._getStream(streamID)
setattr(self, variable, value)
return value

def _ensureSetNamed(self, variable, propertyName):
"""
Ensures that the variable exists, otherwise will set it using the named property.
After that, return said variable.
"""
try:
return getattr(self, variable)
except AttributeError:
value = self.named.getNamedValue(propertyName)
setattr(self, variable, value)
return value

def _ensureSetProperty(self, variable, propertyName):
"""
Ensures that the variable exists, otherwise will set it using the property.
After that, return said variable.
"""
try:
return getattr(self, variable)
except AttributeError:
try:
value = self.props[propertyName].value
except (KeyError, AttributeError):
value = None
setattr(self, variable, value)
return value

def _getStream(self, filename):
return self.__msg._getStream([self.__dir, filename])

def _getStringStream(self, filename):
"""
Gets a string representation of the requested filename.
Checks for both ASCII and Unicode representations and returns
a value if possible. If there are both ASCII and Unicode
versions, then :param prefer: specifies which will be
returned.
"""
return self.__msg._getStringStream([self.__dir, filename])

def _getTypedData(self, id, _type = None):
"""
Gets the data for the specified id as the type that it is
supposed to be. :param id: MUST be a 4 digit hexadecimal
string.
If you know for sure what type the data is before hand,
you can specify it as being one of the strings in the
constant FIXED_LENGTH_PROPS_STRING or
VARIABLE_LENGTH_PROPS_STRING.
"""
verifyPropertyId(id)
id = id.upper()
found, result = self._getTypedStream('__substg1.0_' + id, _type)
if found:
return result
else:
found, result = self._getTypedProperty(id, _type)
return result if found else None

def _getTypedProperty(self, propertyID, _type = None):
"""
Gets the property with the specified id as the type that it
is supposed to be. :param id: MUST be a 4 digit hexadecimal
string.
If you know for sure what type the property is before hand,
you can specify it as being one of the strings in the
constant FIXED_LENGTH_PROPS_STRING or
VARIABLE_LENGTH_PROPS_STRING.
"""
verifyPropertyId(propertyID)
verifyType(_type)
propertyID = propertyID.upper()
for x in (propertyID + _type,) if _type is not None else self.props:
if x.startswith(propertyID):
prop = self.props[x]
return True, (prop.value if isinstance(prop, FixedLengthProp) else prop)
return False, None

def _getTypedStream(self, filename, _type = None):
"""
Gets the contents of the specified stream as the type that
it is supposed to be.
Rather than the full filename, you should only feed this
function the filename sans the type. So if the full name
is "__substg1.0_001A001F", the filename this function
should receive should be "__substg1.0_001A".
If you know for sure what type the stream is before hand,
you can specify it as being one of the strings in the
constant FIXED_LENGTH_PROPS_STRING or
VARIABLE_LENGTH_PROPS_STRING.
If you have not specified the type, the type this function
returns in many cases cannot be predicted. As such, when
using this function it is best for you to check the type
that it returns. If the function returns None, that means
it could not find the stream specified.
"""
return self.__msg._getTypedStream([self.__dir, filename], True, _type)

def _registerNamedProperty(self, entry, _type, name = None):
self.__namedProperties.defineProperty(entry, _type, name)

def Exists(self, filename):
"""
Checks if stream exists inside the attachment folder.
"""
return self.__msg.Exists([self.__dir, filename])

def sExists(self, filename):
"""
Checks if the string stream exists inside the attachment folder.
"""
return self.__msg.sExists([self.__dir, filename])

def ExistsTypedProperty(self, id, _type = None):
"""
Determines if the stream with the provided id exists. The return of this
function is 2 values, the first being a boolean for if anything was found,
and the second being how many were found.
"""
return self.__msg.ExistsTypedProperty(id, self.__dir, _type, True, self.__props)

def save(self, contentId = False, json = False, useFileName = False, raw = False, customPath = None, customFilename = None,
html = False, rtf = False):
# Check if the user has specified a custom filename
Expand Down Expand Up @@ -254,41 +107,13 @@ def data(self):
"""
return self.__data

@property
def dir(self):
"""
Returns the directory inside the msg file where the attachment is located.
"""
return self.__dir

@property
def longFilename(self):
"""
Returns the long file name of the attachment, if it exists.
"""
return self._ensureSet('_longFilename', '__substg1.0_3707')

@property
def msg(self):
"""
Returns the Message instance the attachment belongs to.
"""
return self.__msg

@property
def namedProperties(self):
"""
The NamedAttachmentProperties instance for this attachment.
"""
return self.__namedProperties

@property
def props(self):
"""
Returns the Properties instance of the attachment.
"""
return self.__props

@property
def shortFilename(self):
"""
Expand All @@ -302,3 +127,18 @@ def type(self):
Returns the (internally used) type of the data.
"""
return self.__type



class BrokenAttachment(AttachmentBase):
"""
An attachment that has suffered a fatal error. Will not generate from a
NotImplementedError exception.
"""
pass

class UnsupportedAttachment(AttachmentBase):
"""
An attachment whose type is not currently supported.
"""
pass

0 comments on commit 9b919f5

Please sign in to comment.