Browse files

Added the ``shred`` method for deleting objects and containers perman…

…ently.
  • Loading branch information...
1 parent fdf4096 commit c17b7fac03ae784d9a2cc9f8e89aac869b17c380 @hathawsh hathawsh committed Jan 30, 2012
Showing with 338 additions and 20 deletions.
  1. +5 −0 CHANGES.txt
  2. +3 −3 docs/conf.py
  3. +108 −2 repozitory/archive.py
  4. +13 −1 repozitory/interfaces.py
  5. +1 −1 repozitory/schema.py
  6. +207 −12 repozitory/tests/test_archive.py
  7. +1 −1 setup.py
View
5 CHANGES.txt
@@ -1,3 +1,8 @@
+1.2 (2012-01-30)
+----------------
+
+- Added the ``shred`` method for deleting objects and containers permanently.
+
1.1 (2012-01-24)
----------------
View
6 docs/conf.py
@@ -45,16 +45,16 @@
# General information about the project.
project = u'Repozitory'
-copyright = u'2011, Shane Hathaway'
+copyright = u'2011-2012, Shane Hathaway'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
-version = '1.0'
+version = '1.2'
# The full version, including alpha/beta/rc tags.
-release = '1.0'
+release = '1.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
View
110 repozitory/archive.py
@@ -23,10 +23,13 @@
from zope.sqlalchemy import ZopeTransactionExtension
import datetime
import hashlib
+import logging
import tempfile
_global_sessions = {} # {db_string: SQLAlchemy session}
+log = logging.getLogger(__name__)
+
def forget_sessions():
_global_sessions.clear()
@@ -278,7 +281,7 @@ def history(self, docid, only_current=False):
for row in rows]
def get_version(self, docid, version_num):
- """Return a specific IObjectHistoryRecord for a document.
+ """Return a specific IObjectHistoryRecord for an object.
"""
created = (self.session.query(ArchivedObject.created)
.filter_by(docid=docid)
@@ -531,7 +534,7 @@ def which_contain_deleted(self, container_ids, max_depth=None):
session.query(ArchivedItem.docid)
.filter(ArchivedItem.docid.in_(docids))
.all())
- # For each deleted item, add to the list of results
+ # For each deleted (not moved) item, add to the list of results
# and remove from the set of containers to examine further.
for (container_id, docid) in deleted_rows:
if docid not in moved:
@@ -568,6 +571,109 @@ def which_contain_deleted(self, container_ids, max_depth=None):
return res
+ def shred(self, docids=(), container_ids=()):
+ """Delete the specified objects and containers permanently.
+
+ The objects to shred must not exist in any container
+ (exempting the containers to be shredded) and the
+ containers must not contain any objects (exempting the
+ objects to be shredded). If these rules are not met, this
+ method will raise a ValueError.
+ """
+ session = self.session
+ conflicting_item = None
+
+ if docids:
+ # Verify none of the objects exist in any container
+ # (except the containers to be shredded.)
+ q = session.query(ArchivedItem).filter(
+ ArchivedItem.docid.in_(docids))
+ if container_ids:
+ q = q.filter(~ArchivedItem.container_id.in_(container_ids))
+ conflicting_item = q.order_by(ArchivedItem.docid).first()
+
+ if container_ids and conflicting_item is None:
+ # Verify none of the containers contain any objects
+ # (except the objects to be shredded.)
+ q = session.query(ArchivedItem).filter(
+ ArchivedItem.container_id.in_(container_ids))
+ if docids:
+ q = q.filter(~ArchivedItem.docid.in_(docids))
+ conflicting_item = q.order_by(ArchivedItem.container_id).first()
+
+ if conflicting_item is not None:
+ raise ValueError("Document %d is still in container %d" % (
+ conflicting_item.docid, conflicting_item.container_id))
+
+ # List the blob_ids referenced by the objects to shred.
+ # (Later, orphaned blobs will also be shredded.)
+ if docids:
+ blob_id_rows = (session.query(ArchivedBlobLink.blob_id)
+ .filter(ArchivedBlobLink.docid.in_(docids))
+ .all())
+ blob_ids = set(blob_id for (blob_id,) in blob_id_rows)
+ else:
+ blob_ids = None
+
+ if container_ids:
+ # Shred the specified containers.
+ # (Although we could rely on cascading, it seems useful to
+ # delete the rows explicitly to prevent accidents.)
+ log.warning("Shredding containers: %s", container_ids)
+ (session.query(ArchivedItem)
+ .filter(ArchivedItem.container_id.in_(container_ids))
+ .delete(False))
+ (session.query(ArchivedItemDeleted)
+ .filter(ArchivedItemDeleted.container_id.in_(container_ids))
+ .delete(False))
+ (session.query(ArchivedContainer)
+ .filter(ArchivedContainer.container_id.in_(container_ids))
+ .delete(False))
+
+ if docids:
+ # Shred the specified objects.
+ log.warning("Shredding objects: %s", container_ids)
+ (session.query(ArchivedItem)
+ .filter(ArchivedItem.docid.in_(docids))
+ .delete(False))
+ (session.query(ArchivedItemDeleted)
+ .filter(ArchivedItemDeleted.docid.in_(docids))
+ .delete(False))
+ (session.query(ArchivedCurrent)
+ .filter(ArchivedCurrent.docid.in_(docids))
+ .delete(False))
+ (session.query(ArchivedState)
+ .filter(ArchivedState.docid.in_(docids))
+ .delete(False))
+ (session.query(ArchivedBlobLink)
+ .filter(ArchivedBlobLink.docid.in_(docids))
+ .delete(False))
+ (session.query(ArchivedObject)
+ .filter(ArchivedObject.docid.in_(docids))
+ .delete(False))
+
+ if blob_ids:
+ keep_blob_rows = (session.query(ArchivedBlobLink.blob_id)
+ .filter(ArchivedBlobLink.blob_id.in_(blob_ids))
+ .all())
+ keep_blob_ids = set(blob_id for (blob_id,) in keep_blob_rows)
+ orphaned_blob_ids = blob_ids.difference(keep_blob_ids)
+
+ if orphaned_blob_ids:
+ # Shred the orphaned blobs.
+ log.warning("Shredding orphaned blobs: %s", orphaned_blob_ids)
+ (session.query(ArchivedChunk)
+ .filter(ArchivedChunk.blob_id.in_(orphaned_blob_ids))
+ .delete(False))
+ (session.query(ArchivedBlobInfo)
+ .filter(ArchivedBlobInfo.blob_id.in_(orphaned_blob_ids))
+ .delete(False))
+
+ # Above, we use delete(False) for speed. According to the
+ # SQLAlchemy docs, we should call expire_all() after
+ # using delete(False).
+ session.expire_all()
+
class ObjectHistoryRecord(object):
implements(IObjectHistoryRecord)
View
14 repozitory/interfaces.py
@@ -136,7 +136,7 @@ def filter_container_ids(container_ids):
Returns a sequence containing a subset of the provided container_ids.
"""
- def which_contain_deleted(self, container_ids, max_depth=None):
+ def which_contain_deleted(container_ids, max_depth=None):
"""Returns the subset of container_ids that have had something deleted.
This is useful for building a hierarchical trash UI that allows
@@ -150,6 +150,18 @@ def which_contain_deleted(self, container_ids, max_depth=None):
(Most other methods make no such assumption.)
"""
+ def shred(docids=(), container_ids=()):
+ """Delete the specified objects and containers permanently.
+
+ The objects to shred must not exist in any container
+ (exempting the containers to be shredded) and the
+ containers must not contain any objects (exempting the
+ objects to be shredded). If these rules are not met, this
+ method will raise a ValueError.
+
+ Returns None.
+ """
+
class IObjectVersion(IDCDescriptiveProperties, IDCTimes):
"""The content of an object for version control.
View
2 repozitory/schema.py
@@ -116,7 +116,7 @@ class ArchivedBlobLink(Base):
version_num = Column(Integer, primary_key=True, nullable=False)
name = Column(Unicode, primary_key=True, nullable=False)
blob_id = Column(Integer, ForeignKey('archived_blob_info.blob_id'),
- nullable=True)
+ nullable=True, index=True)
__table_args__ = (
ForeignKeyConstraint(
View
219 repozitory/tests/test_archive.py
@@ -2,7 +2,12 @@
from StringIO import StringIO
import datetime
-import unittest2 as unittest
+
+try:
+ import unittest2 as unittest
+except ImportError:
+ # Python 2.7+
+ import unittest
class ArchiveTest(unittest.TestCase):
@@ -483,7 +488,7 @@ def test_archive_container_empty(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {}
+ map = {} # @ReservedAssignment
ns_map = {}
archive.archive_container(DummyContainerVersion(), 'testuser')
@@ -514,7 +519,7 @@ def test_archive_container_non_empty(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
archive.archive_container(DummyContainerVersion(), 'testuser')
@@ -553,7 +558,7 @@ def test_archive_container_with_deletion(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
c = DummyContainerVersion()
@@ -596,7 +601,7 @@ def test_archive_container_with_undeletion(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
c = DummyContainerVersion()
@@ -640,7 +645,7 @@ def test_archive_container_with_no_change(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
c = DummyContainerVersion()
@@ -681,7 +686,7 @@ def test_archive_container_with_item_rename(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
c = DummyContainerVersion()
@@ -720,7 +725,7 @@ def test_archive_container_with_path_change(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = None
c = DummyContainerVersion()
@@ -758,7 +763,7 @@ def test_archive_container_with_changing_docid(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
c = DummyContainerVersion()
@@ -801,7 +806,7 @@ def test_container_contents_empty(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {}
+ map = {} # @ReservedAssignment
ns_map = {}
archive.archive_container(DummyContainerVersion(), 'testuser')
@@ -824,7 +829,7 @@ def test_container_contents_non_empty(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
archive.archive_container(DummyContainerVersion(), 'testuser')
@@ -852,7 +857,7 @@ def test_container_contents_with_deletion(self):
class DummyContainerVersion:
container_id = 5
path = '/my/container'
- map = {'a': 4}
+ map = {'a': 4} # @ReservedAssignment
ns_map = {'headers': {'b': 6}}
c = DummyContainerVersion()
@@ -1146,6 +1151,196 @@ def test_which_contain_deleted_ignore_moved(self):
expect = []
self.assertEqual(set(expect), set(actual))
+ def test_shred_with_object_success(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ archive.archive(obj4)
+
+ class DummyContainerVersion:
+ def __init__(self, container_id, path):
+ self.container_id = container_id
+ self.path = path
+ self.map = {}
+ self.ns_map = {}
+
+ # Archive c5, which contains obj4.
+ c5 = DummyContainerVersion(5, '/c5')
+ c5.map = {'a': 4}
+ archive.archive_container(c5, 'user1')
+ # Now delete obj4 from c5.
+ c5.map = {}
+ archive.archive_container(c5, 'user1')
+
+ # Verify obj4 still exists.
+ contents = archive.container_contents(5)
+ self.assertEqual(len(contents.deleted), 1)
+ from repozitory.schema import ArchivedObject
+ rowcount = (archive.session.query(ArchivedObject).count())
+ self.assertEqual(rowcount, 1)
+
+ # Shred obj4.
+ archive.shred([4])
+
+ # Verify the object is no longer readable in any way.
+ contents = archive.container_contents(5)
+ self.assertFalse(contents.deleted)
+ rowcount = (archive.session.query(ArchivedObject).count())
+ self.assertEqual(rowcount, 0)
+
+ def test_shred_with_object_and_orphaned_blobs(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ obj4.blobs = {'spam': StringIO('eggs'), 'ham': StringIO('bacon')}
+ archive.archive(obj4)
+
+ # Verify two blobs were created.
+ from repozitory.schema import ArchivedBlobInfo
+ rowcount = (archive.session.query(ArchivedBlobInfo).count())
+ self.assertEqual(rowcount, 2)
+
+ # Shred obj4.
+ archive.shred([4])
+
+ # Verify the blobs are no longer readable in any way.
+ rowcount = (archive.session.query(ArchivedBlobInfo).count())
+ self.assertEqual(rowcount, 0)
+
+ def test_shred_with_object_but_keep_a_shared_blob(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ obj4.blobs = {'spam': StringIO('eggs'), 'ham': StringIO('bacon')}
+ archive.archive(obj4)
+ obj6 = self._make_dummy_object_version(6)
+ obj6.blobs = {'sausage': StringIO('eggs')}
+ archive.archive(obj6)
+
+ # Verify two blobs were created.
+ from repozitory.schema import ArchivedChunk
+ rowcount = (archive.session.query(ArchivedChunk).count())
+ self.assertEqual(rowcount, 2)
+
+ # Shred obj4.
+ archive.shred([4])
+
+ # Verify the 'eggs' blob still exists since it was shared with
+ # obj6.
+ row = (archive.session.query(ArchivedChunk).one())
+ self.assertEqual(row.data, 'eggs')
+
+ def test_shred_with_object_and_container_success(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ archive.archive(obj4)
+
+ class DummyContainerVersion:
+ def __init__(self, container_id, path):
+ self.container_id = container_id
+ self.path = path
+ self.map = {}
+ self.ns_map = {}
+
+ # Archive c5, which contains obj4.
+ c5 = DummyContainerVersion(5, '/c5')
+ c5.map = {'a': 4}
+ archive.archive_container(c5, 'user1')
+
+ # Shred the document and the container at the same time.
+ archive.shred([4], [5])
+
+ # Verify the object and container are no longer readable in any way.
+ container_ids = archive.filter_container_ids([5])
+ self.assertFalse(container_ids)
+ from repozitory.schema import ArchivedObject
+ rowcount = (archive.session.query(ArchivedObject).count())
+ self.assertEqual(rowcount, 0)
+ from repozitory.schema import ArchivedContainer
+ rowcount = (archive.session.query(ArchivedContainer).count())
+ self.assertEqual(rowcount, 0)
+
+ def test_shred_must_not_remove_other_object_and_container(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ archive.archive(obj4)
+ obj6 = self._make_dummy_object_version(6)
+ archive.archive(obj6)
+
+ class DummyContainerVersion:
+ def __init__(self, container_id, path):
+ self.container_id = container_id
+ self.path = path
+ self.map = {}
+ self.ns_map = {}
+
+ # Archive c5, which contains obj4.
+ c5 = DummyContainerVersion(5, '/c5')
+ c5.map = {'a': 4}
+ archive.archive_container(c5, 'user1')
+
+ # Archive c7, which contains obj6.
+ c7 = DummyContainerVersion(7, '/c7')
+ c7.map = {'a': 6}
+ archive.archive_container(c7, 'user1')
+
+ # Shred obj4 and c5.
+ archive.shred([4], [5])
+
+ # Verify obj6 and c7 still exist, but not obj4 or c5.
+ container_ids = archive.filter_container_ids([4, 5, 6, 7])
+ self.assertEqual(set(container_ids), set([7]))
+ from repozitory.schema import ArchivedObject
+ rows = (archive.session.query(ArchivedObject).all())
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0].docid, 6)
+ from repozitory.schema import ArchivedContainer
+ rows = (archive.session.query(ArchivedContainer).all())
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0].container_id, 7)
+ from repozitory.schema import ArchivedItem
+ rows = (archive.session.query(ArchivedItem).all())
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0].container_id, 7)
+ self.assertEqual(rows[0].docid, 6)
+
+ def test_shred_must_not_delete_an_object_still_in_a_container(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ archive.archive(obj4)
+
+ class DummyContainerVersion:
+ def __init__(self, container_id, path):
+ self.container_id = container_id
+ self.path = path
+ self.map = {}
+ self.ns_map = {}
+
+ # Archive c5, which contains obj4.
+ c5 = DummyContainerVersion(5, '/c5')
+ c5.map = {'a': 4}
+ archive.archive_container(c5, 'user1')
+
+ with self.assertRaises(ValueError):
+ archive.shred([4])
+
+ def test_shred_must_not_delete_a_non_empty_container(self):
+ archive = self._make_default()
+ obj4 = self._make_dummy_object_version()
+ archive.archive(obj4)
+
+ class DummyContainerVersion:
+ def __init__(self, container_id, path):
+ self.container_id = container_id
+ self.path = path
+ self.map = {}
+ self.ns_map = {}
+
+ # Archive c5, which contains obj4.
+ c5 = DummyContainerVersion(5, '/c5')
+ c5.map = {'a': 4}
+ archive.archive_container(c5, 'user1')
+
+ with self.assertRaises(ValueError):
+ archive.shred((), [5])
+
class DummyObjectVersion:
path = '/my/object'
View
2 setup.py
@@ -2,7 +2,7 @@
from setuptools import setup, find_packages
import os
-version = '1.1'
+version = '1.2'
here = os.path.abspath(os.path.dirname(__file__))
README = open(os.path.join(here, 'README.txt')).read()

0 comments on commit c17b7fa

Please sign in to comment.