Skip to content

Commit

Permalink
retention-period: Add archiving attachments feature.
Browse files Browse the repository at this point in the history
- Add model with migration for attachments archiving.
- Add archiving attachments to archiving tool.
- Add tests.

Fixes zulip#106
  • Loading branch information
kkanahin committed Feb 15, 2017
1 parent b974daf commit 919d7c2
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 12 deletions.
57 changes: 55 additions & 2 deletions zerver/lib/retention.py
Expand Up @@ -4,9 +4,10 @@

from django.db import connection, transaction
from django.utils import timezone
from zerver.models import Message, UserMessage, ArchiveMessage, ArchiveUserMessage
from zerver.models import (Message, UserMessage, ArchiveMessage, ArchiveUserMessage,
Attachment, ArchiveAttachment)

from typing import Any, Optional
from typing import Any


@transaction.atomic
Expand Down Expand Up @@ -57,6 +58,47 @@ def move_expired_user_messages_to_archive():
"""
move_expired_rows(UserMessage, query)


def move_expired_attachments_to_archive():
# type: () -> None
query = """
INSERT INTO zerver_archiveattachment ({dst_fields}, archived_date)
SELECT {src_fields}, '{archived_date}'
FROM zerver_attachment
INNER JOIN zerver_attachment_messages
ON zerver_attachment_messages.attachment_id = zerver_attachment.id
INNER JOIN zerver_message ON zerver_message.id = zerver_attachment_messages.message_id
INNER JOIN zerver_usermessage ON zerver_message.id = zerver_usermessage.message_id
INNER JOIN zerver_userprofile ON zerver_usermessage.user_profile_id = zerver_userprofile.id
INNER JOIN zerver_realm ON zerver_userprofile.realm_id = zerver_realm.id
WHERE zerver_realm.message_retention_days IS NOT NULL
AND EXTRACT(DAY FROM (CURRENT_DATE - zerver_message.pub_date)) >= zerver_realm.message_retention_days
AND zerver_attachment.id NOT IN (SELECT id FROM zerver_archiveattachment)
GROUP BY zerver_attachment.id
"""
move_expired_rows(Attachment, query)


def move_expired_attachments_message_rows_to_archive():
# type: () -> None
query = """
INSERT INTO zerver_archiveattachment_messages (id, archiveattachment_id, archivemessage_id)
SELECT zerver_attachment_messages.id, zerver_attachment_messages.attachment_id,
zerver_attachment_messages.message_id
FROM zerver_attachment_messages
INNER JOIN zerver_message ON zerver_message.id = zerver_attachment_messages.message_id
INNER JOIN zerver_usermessage ON zerver_message.id = zerver_usermessage.message_id
INNER JOIN zerver_userprofile ON zerver_usermessage.user_profile_id = zerver_userprofile.id
INNER JOIN zerver_realm ON zerver_userprofile.realm_id = zerver_realm.id
WHERE zerver_realm.message_retention_days IS NOT NULL
AND EXTRACT(DAY FROM (CURRENT_DATE - zerver_message.pub_date)) >= zerver_realm.message_retention_days
AND zerver_attachment_messages.id NOT IN (SELECT id FROM zerver_archiveattachment_messages)
GROUP BY zerver_attachment_messages.id
"""
with connection.cursor() as cursor:
cursor.execute(query)


def delete_expired_messages():
# type: () -> None
removing_messages = Message.objects.filter(
Expand All @@ -71,9 +113,20 @@ def delete_expired_user_messages():
)
removing_user_messages.delete()


def delete_expired_attachments():
# type: () -> None
removing_attachments = Attachment.objects.filter(
messages__isnull=True, id__in=ArchiveAttachment.objects.all())
removing_attachments.delete()


def archive_messages():
# type: () -> None
move_expired_messages_to_archive()
move_expired_user_messages_to_archive()
move_expired_attachments_to_archive()
move_expired_attachments_message_rows_to_archive()
delete_expired_user_messages()
delete_expired_messages()
delete_expired_attachments()
35 changes: 35 additions & 0 deletions zerver/migrations/0055_archiveattachment.py
@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import migrations, models
import django.utils.timezone
from django.conf import settings
import zerver.lib.str_utils


class Migration(migrations.Migration):

dependencies = [
('zerver', '0054_archive_user_message'),
]

operations = [
migrations.CreateModel(
name='ArchiveAttachment',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('file_name', models.TextField(db_index=True)),
('path_id', models.TextField(db_index=True)),
('is_realm_public', models.BooleanField(default=False)),
('create_time', models.DateTimeField(default=django.utils.timezone.now, db_index=True)),
('archived_date', models.DateTimeField(default=django.utils.timezone.now, verbose_name='data archived', db_index=True)),
('messages', models.ManyToManyField(to='zerver.ArchiveMessage')),
('owner', models.ForeignKey(to=settings.AUTH_USER_MODEL)),
('realm', models.ForeignKey(blank=True, to='zerver.Realm', null=True)),
],
options={
'abstract': False,
},
bases=(zerver.lib.str_utils.ModelReprMixin, models.Model),
),
]
29 changes: 21 additions & 8 deletions zerver/models.py
Expand Up @@ -1143,17 +1143,24 @@ def parse_usermessage_flags(val):
return flags


class Attachment(ModelReprMixin, models.Model):
file_name = models.TextField(db_index=True) # type: Text
class AbstractAttachment(ModelReprMixin, models.Model):
file_name = models.TextField(db_index=True) # type: Text
# path_id is a storage location agnostic representation of the path of the file.
# If the path of a file is http://localhost:9991/user_uploads/a/b/abc/temp_file.py
# then its path_id will be a/b/abc/temp_file.py.
path_id = models.TextField(db_index=True) # type: Text
owner = models.ForeignKey(UserProfile) # type: UserProfile
realm = models.ForeignKey(Realm, blank=True, null=True) # type: Realm
is_realm_public = models.BooleanField(default=False) # type: bool
messages = models.ManyToManyField(Message) # type: Manager
create_time = models.DateTimeField(default=timezone.now, db_index=True) # type: datetime.datetime
path_id = models.TextField(db_index=True) # type: Text
owner = models.ForeignKey(UserProfile) # type: UserProfile
realm = models.ForeignKey(Realm, blank=True, null=True) # type: Realm
is_realm_public = models.BooleanField(default=False) # type: bool
create_time = models.DateTimeField(default=timezone.now,
db_index=True) # type: datetime.datetime

class Meta(object):
abstract = True


class Attachment(AbstractAttachment):
messages = models.ManyToManyField(Message) # type: Manager

def __unicode__(self):
# type: () -> Text
Expand All @@ -1163,6 +1170,12 @@ def is_claimed(self):
# type: () -> bool
return self.messages.count() > 0


class ArchiveAttachment(AbstractAttachment):
archived_date = models.DateTimeField('data archived', default=timezone.now,
db_index=True) # type: datetime.datetime
messages = models.ManyToManyField(ArchiveMessage) # type: Manager

def get_old_unclaimed_attachments(weeks_ago):
# type: (int) -> Sequence[Attachment]
# TODO: Change return type to QuerySet[Attachment]
Expand Down
77 changes: 75 additions & 2 deletions zerver/tests/test_retention.py
Expand Up @@ -7,11 +7,15 @@
from django.utils import timezone

from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.upload import create_attachment
from zerver.models import (Message, Realm, Recipient, UserProfile, ArchiveUserMessage,
ArchiveMessage, UserMessage, get_user_profile_by_email)
ArchiveMessage, UserMessage, get_user_profile_by_email,
ArchiveAttachment, Attachment)
from zerver.lib.retention import (move_expired_messages_to_archive,
move_expired_user_messages_to_archive, delete_expired_messages,
delete_expired_user_messages, archive_messages)
delete_expired_user_messages, archive_messages,
move_expired_attachments_to_archive,
move_expired_attachments_message_rows_to_archive)

from six.moves import range

Expand Down Expand Up @@ -88,6 +92,39 @@ def _check_archive_messages_ids_by_realm(self, exp_message_ids, realm):
[arc_msg.id for arc_msg in arc_messages]
)

def _send_msgs_with_attachments(self):
# type: () -> Dict[str, int]
sender_email = "hamlet@zulip.com"
user_profile = get_user_profile_by_email(sender_email)
dummy_files = [
('zulip.txt', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt'),
('temp_file.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py'),
('abc.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py')
]

for file_name, path_id in dummy_files:
create_attachment(file_name, path_id, user_profile)

self.subscribe_to_stream(sender_email, "Denmark")
body = "Some files here ...[zulip.txt](http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + \
"http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + \
"http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py"

expired_message_id = self.send_message(sender_email, "Denmark", Recipient.STREAM, body,
"test")
actual_message_id = self.send_message(sender_email, "Denmark", Recipient.STREAM, body,
"test")
self._change_msgs_pub_date([expired_message_id], timezone.now() - timedelta(days=101))
return {'expired_message_id': expired_message_id, 'actual_message_id': actual_message_id}

def _add_expired_date_to_archive_data(self):
# type: () -> None
arc_expired_date = timezone.now() - timedelta(
days=settings.ARCHIVED_DATA_RETENTION_DAYS + 2)
ArchiveMessage.objects.update(archived_date=arc_expired_date)
ArchiveUserMessage.objects.update(archived_date=arc_expired_date)
ArchiveAttachment.objects.update(archived_date=arc_expired_date)

def _check_cross_realm_messages_archiving(self, arc_user_msg_qty, period, realm=None):
# type: (int, int, Realm) -> int
sended_message_id = self._send_cross_realm_message()
Expand Down Expand Up @@ -247,3 +284,39 @@ def test_archive_message_tool(self):
self._check_archive_messages_ids_by_realm(
exp_msgs_ids_dict['mit_msgs_ids'], self.mit_realm)

def test_moving_attachments_to_archive_without_removing(self):
# type: () -> None
msgs_ids = self._send_msgs_with_attachments()
move_expired_messages_to_archive()
move_expired_user_messages_to_archive()
move_expired_attachments_to_archive()
move_expired_attachments_message_rows_to_archive()
archived_attachment = ArchiveAttachment.objects.all()
self.assertEqual(archived_attachment.count(), 3)
self.assertEqual(
list(archived_attachment.distinct('messages__id').values_list('messages__id',
flat=True)),
[msgs_ids['expired_message_id']])

def test_archiving_attachments_with_removing(self):
# type: () -> None
msgs_ids = self._send_msgs_with_attachments()

archive_messages()
archived_attachment = ArchiveAttachment.objects.all()
attachment = Attachment.objects.all()
self.assertEqual(archived_attachment.count(), 3)
self.assertEqual(
list(archived_attachment.distinct('messages__id').values_list('messages__id',
flat=True)),
[msgs_ids['expired_message_id']])
self.assertEqual(attachment.count(), 3)
self._change_msgs_pub_date([msgs_ids['actual_message_id']],
timezone.now() - timedelta(days=101))
archive_messages()
self.assertEqual(attachment.count(), 0)
self.assertEqual(
list(archived_attachment.distinct('messages__id').order_by('messages__id').values_list(
'messages__id', flat=True)),
sorted(msgs_ids.values()))

0 comments on commit 919d7c2

Please sign in to comment.