From 919d7c2c96adebd337fb52da8452ecae85f32676 Mon Sep 17 00:00:00 2001 From: "K.Kanakhin" Date: Wed, 9 Nov 2016 16:20:14 +0600 Subject: [PATCH] retention-period: Add archiving attachments feature. - Add model with migration for attachments archiving. - Add archiving attachments to archiving tool. - Add tests. Fixes #106 --- zerver/lib/retention.py | 57 ++++++++++++++- zerver/migrations/0055_archiveattachment.py | 35 ++++++++++ zerver/models.py | 29 +++++--- zerver/tests/test_retention.py | 77 ++++++++++++++++++++- 4 files changed, 186 insertions(+), 12 deletions(-) create mode 100644 zerver/migrations/0055_archiveattachment.py diff --git a/zerver/lib/retention.py b/zerver/lib/retention.py index f1d260c7e37f04..3c22c662e81c14 100644 --- a/zerver/lib/retention.py +++ b/zerver/lib/retention.py @@ -4,9 +4,10 @@ from django.db import connection, transaction from django.utils import timezone -from zerver.models import Message, UserMessage, ArchiveMessage, ArchiveUserMessage +from zerver.models import (Message, UserMessage, ArchiveMessage, ArchiveUserMessage, + Attachment, ArchiveAttachment) -from typing import Any, Optional +from typing import Any @transaction.atomic @@ -57,6 +58,47 @@ def move_expired_user_messages_to_archive(): """ move_expired_rows(UserMessage, query) + +def move_expired_attachments_to_archive(): + # type: () -> None + query = """ + INSERT INTO zerver_archiveattachment ({dst_fields}, archived_date) + SELECT {src_fields}, '{archived_date}' + FROM zerver_attachment + INNER JOIN zerver_attachment_messages + ON zerver_attachment_messages.attachment_id = zerver_attachment.id + INNER JOIN zerver_message ON zerver_message.id = zerver_attachment_messages.message_id + INNER JOIN zerver_usermessage ON zerver_message.id = zerver_usermessage.message_id + INNER JOIN zerver_userprofile ON zerver_usermessage.user_profile_id = zerver_userprofile.id + INNER JOIN zerver_realm ON zerver_userprofile.realm_id = zerver_realm.id + WHERE zerver_realm.message_retention_days IS NOT NULL + AND EXTRACT(DAY FROM (CURRENT_DATE - zerver_message.pub_date)) >= zerver_realm.message_retention_days + AND zerver_attachment.id NOT IN (SELECT id FROM zerver_archiveattachment) + GROUP BY zerver_attachment.id + """ + move_expired_rows(Attachment, query) + + +def move_expired_attachments_message_rows_to_archive(): + # type: () -> None + query = """ + INSERT INTO zerver_archiveattachment_messages (id, archiveattachment_id, archivemessage_id) + SELECT zerver_attachment_messages.id, zerver_attachment_messages.attachment_id, + zerver_attachment_messages.message_id + FROM zerver_attachment_messages + INNER JOIN zerver_message ON zerver_message.id = zerver_attachment_messages.message_id + INNER JOIN zerver_usermessage ON zerver_message.id = zerver_usermessage.message_id + INNER JOIN zerver_userprofile ON zerver_usermessage.user_profile_id = zerver_userprofile.id + INNER JOIN zerver_realm ON zerver_userprofile.realm_id = zerver_realm.id + WHERE zerver_realm.message_retention_days IS NOT NULL + AND EXTRACT(DAY FROM (CURRENT_DATE - zerver_message.pub_date)) >= zerver_realm.message_retention_days + AND zerver_attachment_messages.id NOT IN (SELECT id FROM zerver_archiveattachment_messages) + GROUP BY zerver_attachment_messages.id + """ + with connection.cursor() as cursor: + cursor.execute(query) + + def delete_expired_messages(): # type: () -> None removing_messages = Message.objects.filter( @@ -71,9 +113,20 @@ def delete_expired_user_messages(): ) removing_user_messages.delete() + +def delete_expired_attachments(): + # type: () -> None + removing_attachments = Attachment.objects.filter( + messages__isnull=True, id__in=ArchiveAttachment.objects.all()) + removing_attachments.delete() + + def archive_messages(): # type: () -> None move_expired_messages_to_archive() move_expired_user_messages_to_archive() + move_expired_attachments_to_archive() + move_expired_attachments_message_rows_to_archive() delete_expired_user_messages() delete_expired_messages() + delete_expired_attachments() diff --git a/zerver/migrations/0055_archiveattachment.py b/zerver/migrations/0055_archiveattachment.py new file mode 100644 index 00000000000000..95921fc116e730 --- /dev/null +++ b/zerver/migrations/0055_archiveattachment.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import django.utils.timezone +from django.conf import settings +import zerver.lib.str_utils + + +class Migration(migrations.Migration): + + dependencies = [ + ('zerver', '0054_archive_user_message'), + ] + + operations = [ + migrations.CreateModel( + name='ArchiveAttachment', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('file_name', models.TextField(db_index=True)), + ('path_id', models.TextField(db_index=True)), + ('is_realm_public', models.BooleanField(default=False)), + ('create_time', models.DateTimeField(default=django.utils.timezone.now, db_index=True)), + ('archived_date', models.DateTimeField(default=django.utils.timezone.now, verbose_name='data archived', db_index=True)), + ('messages', models.ManyToManyField(to='zerver.ArchiveMessage')), + ('owner', models.ForeignKey(to=settings.AUTH_USER_MODEL)), + ('realm', models.ForeignKey(blank=True, to='zerver.Realm', null=True)), + ], + options={ + 'abstract': False, + }, + bases=(zerver.lib.str_utils.ModelReprMixin, models.Model), + ), + ] diff --git a/zerver/models.py b/zerver/models.py index 5b7792e9d256c8..f9c8833229669c 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -1143,17 +1143,24 @@ def parse_usermessage_flags(val): return flags -class Attachment(ModelReprMixin, models.Model): - file_name = models.TextField(db_index=True) # type: Text +class AbstractAttachment(ModelReprMixin, models.Model): + file_name = models.TextField(db_index=True) # type: Text # path_id is a storage location agnostic representation of the path of the file. # If the path of a file is http://localhost:9991/user_uploads/a/b/abc/temp_file.py # then its path_id will be a/b/abc/temp_file.py. - path_id = models.TextField(db_index=True) # type: Text - owner = models.ForeignKey(UserProfile) # type: UserProfile - realm = models.ForeignKey(Realm, blank=True, null=True) # type: Realm - is_realm_public = models.BooleanField(default=False) # type: bool - messages = models.ManyToManyField(Message) # type: Manager - create_time = models.DateTimeField(default=timezone.now, db_index=True) # type: datetime.datetime + path_id = models.TextField(db_index=True) # type: Text + owner = models.ForeignKey(UserProfile) # type: UserProfile + realm = models.ForeignKey(Realm, blank=True, null=True) # type: Realm + is_realm_public = models.BooleanField(default=False) # type: bool + create_time = models.DateTimeField(default=timezone.now, + db_index=True) # type: datetime.datetime + + class Meta(object): + abstract = True + + +class Attachment(AbstractAttachment): + messages = models.ManyToManyField(Message) # type: Manager def __unicode__(self): # type: () -> Text @@ -1163,6 +1170,12 @@ def is_claimed(self): # type: () -> bool return self.messages.count() > 0 + +class ArchiveAttachment(AbstractAttachment): + archived_date = models.DateTimeField('data archived', default=timezone.now, + db_index=True) # type: datetime.datetime + messages = models.ManyToManyField(ArchiveMessage) # type: Manager + def get_old_unclaimed_attachments(weeks_ago): # type: (int) -> Sequence[Attachment] # TODO: Change return type to QuerySet[Attachment] diff --git a/zerver/tests/test_retention.py b/zerver/tests/test_retention.py index fe3402e205a4d7..d6e1233dee769d 100644 --- a/zerver/tests/test_retention.py +++ b/zerver/tests/test_retention.py @@ -7,11 +7,15 @@ from django.utils import timezone from zerver.lib.test_classes import ZulipTestCase +from zerver.lib.upload import create_attachment from zerver.models import (Message, Realm, Recipient, UserProfile, ArchiveUserMessage, - ArchiveMessage, UserMessage, get_user_profile_by_email) + ArchiveMessage, UserMessage, get_user_profile_by_email, + ArchiveAttachment, Attachment) from zerver.lib.retention import (move_expired_messages_to_archive, move_expired_user_messages_to_archive, delete_expired_messages, - delete_expired_user_messages, archive_messages) + delete_expired_user_messages, archive_messages, + move_expired_attachments_to_archive, + move_expired_attachments_message_rows_to_archive) from six.moves import range @@ -88,6 +92,39 @@ def _check_archive_messages_ids_by_realm(self, exp_message_ids, realm): [arc_msg.id for arc_msg in arc_messages] ) + def _send_msgs_with_attachments(self): + # type: () -> Dict[str, int] + sender_email = "hamlet@zulip.com" + user_profile = get_user_profile_by_email(sender_email) + dummy_files = [ + ('zulip.txt', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt'), + ('temp_file.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py'), + ('abc.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py') + ] + + for file_name, path_id in dummy_files: + create_attachment(file_name, path_id, user_profile) + + self.subscribe_to_stream(sender_email, "Denmark") + body = "Some files here ...[zulip.txt](http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + \ + "http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + \ + "http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py" + + expired_message_id = self.send_message(sender_email, "Denmark", Recipient.STREAM, body, + "test") + actual_message_id = self.send_message(sender_email, "Denmark", Recipient.STREAM, body, + "test") + self._change_msgs_pub_date([expired_message_id], timezone.now() - timedelta(days=101)) + return {'expired_message_id': expired_message_id, 'actual_message_id': actual_message_id} + + def _add_expired_date_to_archive_data(self): + # type: () -> None + arc_expired_date = timezone.now() - timedelta( + days=settings.ARCHIVED_DATA_RETENTION_DAYS + 2) + ArchiveMessage.objects.update(archived_date=arc_expired_date) + ArchiveUserMessage.objects.update(archived_date=arc_expired_date) + ArchiveAttachment.objects.update(archived_date=arc_expired_date) + def _check_cross_realm_messages_archiving(self, arc_user_msg_qty, period, realm=None): # type: (int, int, Realm) -> int sended_message_id = self._send_cross_realm_message() @@ -247,3 +284,39 @@ def test_archive_message_tool(self): self._check_archive_messages_ids_by_realm( exp_msgs_ids_dict['mit_msgs_ids'], self.mit_realm) + def test_moving_attachments_to_archive_without_removing(self): + # type: () -> None + msgs_ids = self._send_msgs_with_attachments() + move_expired_messages_to_archive() + move_expired_user_messages_to_archive() + move_expired_attachments_to_archive() + move_expired_attachments_message_rows_to_archive() + archived_attachment = ArchiveAttachment.objects.all() + self.assertEqual(archived_attachment.count(), 3) + self.assertEqual( + list(archived_attachment.distinct('messages__id').values_list('messages__id', + flat=True)), + [msgs_ids['expired_message_id']]) + + def test_archiving_attachments_with_removing(self): + # type: () -> None + msgs_ids = self._send_msgs_with_attachments() + + archive_messages() + archived_attachment = ArchiveAttachment.objects.all() + attachment = Attachment.objects.all() + self.assertEqual(archived_attachment.count(), 3) + self.assertEqual( + list(archived_attachment.distinct('messages__id').values_list('messages__id', + flat=True)), + [msgs_ids['expired_message_id']]) + self.assertEqual(attachment.count(), 3) + self._change_msgs_pub_date([msgs_ids['actual_message_id']], + timezone.now() - timedelta(days=101)) + archive_messages() + self.assertEqual(attachment.count(), 0) + self.assertEqual( + list(archived_attachment.distinct('messages__id').order_by('messages__id').values_list( + 'messages__id', flat=True)), + sorted(msgs_ids.values())) +