Skip to content

Commit

Permalink
Added the shred method for deleting objects and containers perman…
Browse files Browse the repository at this point in the history
…ently.
  • Loading branch information
hathawsh committed Jan 30, 2012
1 parent fdf4096 commit c17b7fa
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 20 deletions.
5 changes: 5 additions & 0 deletions CHANGES.txt
@@ -1,3 +1,8 @@
1.2 (2012-01-30)
----------------

- Added the ``shred`` method for deleting objects and containers permanently.

1.1 (2012-01-24)
----------------

Expand Down
6 changes: 3 additions & 3 deletions docs/conf.py
Expand Up @@ -45,16 +45,16 @@

# General information about the project.
project = u'Repozitory'
copyright = u'2011, Shane Hathaway'
copyright = u'2011-2012, Shane Hathaway'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '1.0'
version = '1.2'
# The full version, including alpha/beta/rc tags.
release = '1.0'
release = '1.2'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
110 changes: 108 additions & 2 deletions repozitory/archive.py
Expand Up @@ -23,10 +23,13 @@
from zope.sqlalchemy import ZopeTransactionExtension
import datetime
import hashlib
import logging
import tempfile

_global_sessions = {} # {db_string: SQLAlchemy session}

log = logging.getLogger(__name__)


def forget_sessions():
_global_sessions.clear()
Expand Down Expand Up @@ -278,7 +281,7 @@ def history(self, docid, only_current=False):
for row in rows]

def get_version(self, docid, version_num):
"""Return a specific IObjectHistoryRecord for a document.
"""Return a specific IObjectHistoryRecord for an object.
"""
created = (self.session.query(ArchivedObject.created)
.filter_by(docid=docid)
Expand Down Expand Up @@ -531,7 +534,7 @@ def which_contain_deleted(self, container_ids, max_depth=None):
session.query(ArchivedItem.docid)
.filter(ArchivedItem.docid.in_(docids))
.all())
# For each deleted item, add to the list of results
# For each deleted (not moved) item, add to the list of results
# and remove from the set of containers to examine further.
for (container_id, docid) in deleted_rows:
if docid not in moved:
Expand Down Expand Up @@ -568,6 +571,109 @@ def which_contain_deleted(self, container_ids, max_depth=None):

return res

def shred(self, docids=(), container_ids=()):
"""Delete the specified objects and containers permanently.
The objects to shred must not exist in any container
(exempting the containers to be shredded) and the
containers must not contain any objects (exempting the
objects to be shredded). If these rules are not met, this
method will raise a ValueError.
"""
session = self.session
conflicting_item = None

if docids:
# Verify none of the objects exist in any container
# (except the containers to be shredded.)
q = session.query(ArchivedItem).filter(
ArchivedItem.docid.in_(docids))
if container_ids:
q = q.filter(~ArchivedItem.container_id.in_(container_ids))
conflicting_item = q.order_by(ArchivedItem.docid).first()

if container_ids and conflicting_item is None:
# Verify none of the containers contain any objects
# (except the objects to be shredded.)
q = session.query(ArchivedItem).filter(
ArchivedItem.container_id.in_(container_ids))
if docids:
q = q.filter(~ArchivedItem.docid.in_(docids))
conflicting_item = q.order_by(ArchivedItem.container_id).first()

if conflicting_item is not None:
raise ValueError("Document %d is still in container %d" % (
conflicting_item.docid, conflicting_item.container_id))

# List the blob_ids referenced by the objects to shred.
# (Later, orphaned blobs will also be shredded.)
if docids:
blob_id_rows = (session.query(ArchivedBlobLink.blob_id)
.filter(ArchivedBlobLink.docid.in_(docids))
.all())
blob_ids = set(blob_id for (blob_id,) in blob_id_rows)
else:
blob_ids = None

if container_ids:
# Shred the specified containers.
# (Although we could rely on cascading, it seems useful to
# delete the rows explicitly to prevent accidents.)
log.warning("Shredding containers: %s", container_ids)
(session.query(ArchivedItem)
.filter(ArchivedItem.container_id.in_(container_ids))
.delete(False))
(session.query(ArchivedItemDeleted)
.filter(ArchivedItemDeleted.container_id.in_(container_ids))
.delete(False))
(session.query(ArchivedContainer)
.filter(ArchivedContainer.container_id.in_(container_ids))
.delete(False))

if docids:
# Shred the specified objects.
log.warning("Shredding objects: %s", container_ids)
(session.query(ArchivedItem)
.filter(ArchivedItem.docid.in_(docids))
.delete(False))
(session.query(ArchivedItemDeleted)
.filter(ArchivedItemDeleted.docid.in_(docids))
.delete(False))
(session.query(ArchivedCurrent)
.filter(ArchivedCurrent.docid.in_(docids))
.delete(False))
(session.query(ArchivedState)
.filter(ArchivedState.docid.in_(docids))
.delete(False))
(session.query(ArchivedBlobLink)
.filter(ArchivedBlobLink.docid.in_(docids))
.delete(False))
(session.query(ArchivedObject)
.filter(ArchivedObject.docid.in_(docids))
.delete(False))

if blob_ids:
keep_blob_rows = (session.query(ArchivedBlobLink.blob_id)
.filter(ArchivedBlobLink.blob_id.in_(blob_ids))
.all())
keep_blob_ids = set(blob_id for (blob_id,) in keep_blob_rows)
orphaned_blob_ids = blob_ids.difference(keep_blob_ids)

if orphaned_blob_ids:
# Shred the orphaned blobs.
log.warning("Shredding orphaned blobs: %s", orphaned_blob_ids)
(session.query(ArchivedChunk)
.filter(ArchivedChunk.blob_id.in_(orphaned_blob_ids))
.delete(False))
(session.query(ArchivedBlobInfo)
.filter(ArchivedBlobInfo.blob_id.in_(orphaned_blob_ids))
.delete(False))

# Above, we use delete(False) for speed. According to the
# SQLAlchemy docs, we should call expire_all() after
# using delete(False).
session.expire_all()


class ObjectHistoryRecord(object):
implements(IObjectHistoryRecord)
Expand Down
14 changes: 13 additions & 1 deletion repozitory/interfaces.py
Expand Up @@ -136,7 +136,7 @@ def filter_container_ids(container_ids):
Returns a sequence containing a subset of the provided container_ids.
"""

def which_contain_deleted(self, container_ids, max_depth=None):
def which_contain_deleted(container_ids, max_depth=None):
"""Returns the subset of container_ids that have had something deleted.
This is useful for building a hierarchical trash UI that allows
Expand All @@ -150,6 +150,18 @@ def which_contain_deleted(self, container_ids, max_depth=None):
(Most other methods make no such assumption.)
"""

def shred(docids=(), container_ids=()):
"""Delete the specified objects and containers permanently.
The objects to shred must not exist in any container
(exempting the containers to be shredded) and the
containers must not contain any objects (exempting the
objects to be shredded). If these rules are not met, this
method will raise a ValueError.
Returns None.
"""


class IObjectVersion(IDCDescriptiveProperties, IDCTimes):
"""The content of an object for version control.
Expand Down
2 changes: 1 addition & 1 deletion repozitory/schema.py
Expand Up @@ -116,7 +116,7 @@ class ArchivedBlobLink(Base):
version_num = Column(Integer, primary_key=True, nullable=False)
name = Column(Unicode, primary_key=True, nullable=False)
blob_id = Column(Integer, ForeignKey('archived_blob_info.blob_id'),
nullable=True)
nullable=True, index=True)

__table_args__ = (
ForeignKeyConstraint(
Expand Down

0 comments on commit c17b7fa

Please sign in to comment.