From af289af8534d0c82be8904f7c61dc20f0bfd1904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Gross?= Date: Wed, 28 Oct 2015 10:56:16 +0100 Subject: [PATCH 1/3] Use mail 'date' as file prefix if using utime_from_header. The goal here is to use a timestamp based on mail Date (or Delivery-date if not found) as filename prefix instead of current system time. This allows to sort box by most recent mail based on file name. This is useful in case of tools sorting mails using file name instead of mail headers. --- offlineimap/folder/Maildir.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 07d1b731f..49d7f8f0b 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -352,7 +352,17 @@ def savemessage(self, uid, content, flags, rtime): if self.utime_from_header: try: date = emailutil.get_message_date(content, 'Date') + if date is None: + # Give a try with Delivery-date + date = emailutil.get_message_date(content, 'Delivery-date') if date is not None: + # If a date is found, make the timestamp is used a file + # prefix. This ensure correct order when sorting by most + # recent mail based on filename. The timeseq is 0 here + # since chances having 2 message arriving at the exact same + # time is very thin. + messagename = '%s_0.%s' % (date, '.'.join(messagename.split('.')[1:])) + tmpname = self.save_to_tmp_file(messagename, content) os.utime(os.path.join(self.getfullname(), tmpname), (date, date)) # In case date is wrongly so far into the future as to be > max int32 From 191f031b1caa4089e7c172237aaedf4535bdd1c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Gross?= Date: Tue, 3 Nov 2015 23:24:48 +0100 Subject: [PATCH 2/3] Add new config option filename_use_mail_timestamp. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If this value is true, use (if possible) a timestamp based on message Date or Delivery-date headers. The current system time is used otherwise. filename_use_mail_timestamp and utime_from_header are now completely separated option that do not interfere one with other. To handle this feature in a multithread context we use a hash to count the number of mail with the same timestamp. This method is more accurate than using the old lasttime and timeseq variables. Signed-off-by: Sébastien Gross --- offlineimap/folder/Base.py | 7 +++++ offlineimap/folder/Maildir.py | 56 ++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/offlineimap/folder/Base.py b/offlineimap/folder/Base.py index d81f3efe6..d2a0706af 100644 --- a/offlineimap/folder/Base.py +++ b/offlineimap/folder/Base.py @@ -60,6 +60,13 @@ def __init__(self, name, repository): self._utime_from_header = self.config.getdefaultboolean(repo, "utime_from_header", utime_from_header_global) + # Do we need to use mail timestamp for filename prefix? + filename_use_mail_timestamp_global = self.config.getdefaultboolean( + "general", "filename_use_mail_timestamp", False) + repo = "Repository " + repository.name + self._filename_use_mail_timestamp = self.config.getdefaultboolean(repo, + "filename_use_mail_timestamp", filename_use_mail_timestamp_global) + # Determine if we're running static or dynamic folder filtering # and check filtering status self._dynamic_folderfilter = self.config.getdefaultboolean( diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 49d7f8f0b..a7dbf268e 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -38,22 +38,20 @@ # Find a numeric timestamp in a string (filename prefix) re_timestampmatch = re.compile('(\d+)'); -timeseq = 0 -lasttime = 0 +timehash = {} timelock = Lock() -def _gettimeseq(): - global lasttime, timeseq, timelock +def _gettimeseq(date=None): + global timehash, timelock timelock.acquire() try: - thistime = long(time.time()) - if thistime == lasttime: - timeseq += 1 - return (thistime, timeseq) + if date is None: + date = long(time.time()) + if timehash.has_key(date): + timehash[date] += 1 else: - lasttime = thistime - timeseq = 0 - return (thistime, timeseq) + timehash[date] = 0 + return (date, timehash[date]) finally: timelock.release() @@ -269,14 +267,14 @@ def getmessagetime(self, uid): filepath = os.path.join(self.getfullname(), filename) return os.path.getmtime(filepath) - def new_message_filename(self, uid, flags=set()): + def new_message_filename(self, uid, flags=set(), date=None): """Creates a new unique Maildir filename :param uid: The UID`None`, or a set of maildir flags :param flags: A set of maildir flags :returns: String containing unique message filename""" - timeval, timeseq = _gettimeseq() + timeval, timeseq = _gettimeseq(date) return '%d_%d.%d.%s,U=%d,FMD5=%s%s2,%s'% \ (timeval, timeseq, os.getpid(), socket.gethostname(), uid, self._foldermd5, self.infosep, ''.join(sorted(flags))) @@ -346,23 +344,33 @@ def savemessage(self, uid, content, flags, rtime): # Otherwise, save the message in tmp/ and then call savemessageflags() # to give it a permanent home. tmpdir = os.path.join(self.getfullname(), 'tmp') - messagename = self.new_message_filename(uid, flags) + + # use the mail timestamp given by either Date or Delivery-date mail + # headers. + message_timestamp = None + if self._filename_use_mail_timestamp: + try: + message_timestamp = emailutil.get_message_date(content, 'Date') + if message_timestamp is None: + # Give a try with Delivery-date + date = emailutil.get_message_date(content, 'Delivery-date') + except: + # This should never happen + from email.Parser import Parser + from offlineimap.ui import getglobalui + datestr = Parser().parsestr(content, True).get("Date") + ui = getglobalui() + ui.warn("UID %d has invalid date %s: %s\n" + "Not using message timestamp as file prefix" % (uid, datestr, e)) + # No need to check if date is None here since it would + # be overridden by _gettimeseq. + messagename = self.new_message_filename(uid, flags, date=message_timestamp) tmpname = self.save_to_tmp_file(messagename, content) if self.utime_from_header: try: date = emailutil.get_message_date(content, 'Date') - if date is None: - # Give a try with Delivery-date - date = emailutil.get_message_date(content, 'Delivery-date') if date is not None: - # If a date is found, make the timestamp is used a file - # prefix. This ensure correct order when sorting by most - # recent mail based on filename. The timeseq is 0 here - # since chances having 2 message arriving at the exact same - # time is very thin. - messagename = '%s_0.%s' % (date, '.'.join(messagename.split('.')[1:])) - tmpname = self.save_to_tmp_file(messagename, content) os.utime(os.path.join(self.getfullname(), tmpname), (date, date)) # In case date is wrongly so far into the future as to be > max int32 From 90c9c4c6d219cc28f7a4e8e35d3e58bb24d944a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Gross?= Date: Wed, 4 Nov 2015 10:32:22 +0100 Subject: [PATCH 3/3] Add documentation for filename_use_mail_timestamp. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sébastien Gross --- offlineimap.conf | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/offlineimap.conf b/offlineimap.conf index 07f99016d..6c5ac24e0 100644 --- a/offlineimap.conf +++ b/offlineimap.conf @@ -514,6 +514,29 @@ localfolders = ~/Test #utime_from_header = no +# This option stands in the [Repository LocalExample] section. +# +# This option is similar to "utime_from_header" and could be use as a +# complementary feature to keep track of a message date. This option only +# makes sense for the Maildir type. +# +# By default each message is stored in a file which prefix is the fetch +# timestamp and an order rank such as "1446590057_0". In a multithreading +# environment message are fetched in a random order, then you can't trust +# the file name to sort your boxes. +# +# If set to "yes" the file name prefix if build on the message "Date" header +# (which should be present) or the "Received-date" if "Date" is not +# found. If neither "Received-date" nor "Date" is found, the current system +# date is used. Now you can quickly sort your messages using their file +# names. +# +# Used in combination with "utime_from_header" all your message would be in +# order with the correct mtime attribute. +# +#filename_use_mail_timestamp = no + + [Repository GmailLocalExample] # This type of repository enables syncing of Gmail. All Maildir