Skip to content
This repository has been archived by the owner on Mar 12, 2020. It is now read-only.

Commit

Permalink
Add DSpace submission support
Browse files Browse the repository at this point in the history
This adds a task for creating and submitting a SWORD package
to DSpace.
  • Loading branch information
Mike Graves committed Jun 1, 2015
1 parent e79a487 commit 7e666ad
Show file tree
Hide file tree
Showing 13 changed files with 247 additions and 3 deletions.
8 changes: 8 additions & 0 deletions docs/application.rst
@@ -0,0 +1,8 @@
Application Documentation
=========================

SWORD
-----

.. automodule:: kepler.sword
:members:
2 changes: 1 addition & 1 deletion docs/conf.py
Expand Up @@ -20,7 +20,7 @@
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
sys.path.append(os.path.abspath('../'))

# -- General configuration ------------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Expand Up @@ -48,3 +48,4 @@ Contents:
:maxdepth: 2

metadata
application
1 change: 1 addition & 0 deletions kepler/settings.py
Expand Up @@ -19,3 +19,4 @@ class TestConfig(DefaultConfig):
GEOSERVER_WORKSPACE = 'mit'
GEOSERVER_DATASTORE = 'data'
SOLR_URL = 'http://localhost:8983/solr/geoblacklight/'
SWORD_SERVICE_URL = 'http://example.com/sword'
99 changes: 99 additions & 0 deletions kepler/sword.py
@@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from zipfile import ZipFile
import xml.etree.ElementTree as ET
import os.path
import io

import arrow
import requests
from flask import current_app


class SWORDPackage(object):
"""A SWORD Package.
This provids a container for the metadata and data files comprising a
SWORD package. Use the :func:`~kepler.sword.SWORDPackage.write` method
to serialize the package::
from kepler.sword import SWORDPackage
pkg = SWORDPackage(uuid='1234', metadata='<xml...>')
pkg.datafiles.append('some/data/file.tif')
pkg.write('sword.zip')
:param uuid: file uuid
:param metadata: metadata string to be added to mods record
:param datafiles: list of filenames to be added to SWORD package
"""

def __init__(self, uuid, metadata=None, datafiles=[]):
self.uuid = uuid
self.metadata = metadata
self.datafiles = datafiles

def write(self, file):
"""Serialize the SWORD package.
:param file: file name or file pointer to write package to
"""

datafile = self.datafiles[0]
filename = os.path.basename(datafile)
mets = create_mets(uuid=self.uuid, file_path=filename,
metadata=self.metadata,
create_date=arrow.utcnow().isoformat())
pkg = ZipFile(file, 'w')
try:
pkg.writestr('mets.xml', mets.encode('utf-8'))
pkg.write(datafile, filename)
finally:
pkg.close()


def submit(service, package):
"""Submit a SWORD package.
Submits the given package to the specified SWORD service. If the submission
was successful, the handle of the created resource will be returned.
:param service: URL for SWORD service
:param package: path to SWORD package on file system
"""

headers = sword_headers(os.path.basename(package))
with io.open(package, 'rb') as fp:
r = requests.post(service, data=fp, headers=headers)
r.raise_for_status()
doc = ET.fromstring(r.text)
handle = doc.find('.//{http://www.w3.org/2005/Atom}id').text
return handle


def create_mets(**kwargs):
"""Create a METS file for inclusion in SWORD package.
This will look for a ``mets.xml`` template in the current app's template
directory and apply the supplied ``kwargs`` as a context.
:param kwargs: context for METS template
"""

env = current_app.jinja_env
tmpl = env.get_template('mets.xml')
return tmpl.render(**kwargs)


def sword_headers(filename):
"""Returns SWORD headers for specified package.
:params filename: name of SWORD package
"""

return {
'Content-Type': 'application/zip',
'Content-Disposition': 'filename=%s' % filename,
'X-No-Op': 'false',
'X-Packaging': 'http://purl.org/net/sword-types/METSDSpaceSIP',
}
10 changes: 10 additions & 0 deletions kepler/tasks.py
Expand Up @@ -3,6 +3,7 @@
from functools import partial
import io
import json
import tempfile

from flask import current_app
from ogre.xml import FGDCParser
Expand All @@ -13,6 +14,7 @@
from kepler.records import create_record
from kepler.services.solr import SolrServiceManager
from kepler.git import repository
from kepler import sword


def tasks(task_list):
Expand Down Expand Up @@ -53,5 +55,13 @@ def load_repo_records(repo):
yield json.load(fp)


def submit_to_dspace(record, filename):
pkg = sword.SWORDPackage(uuid=record.uuid)
pkg.datafiles.append(filename)
with tempfile.NamedTemporaryFile(suffix='.zip') as fp:
pkg.write(fp)
handle = sword.submit(current_app.config['SWORD_SERVICE_URL'], fp.name)


shapefile_upload_task = partial(upload_to_geoserver, mimetype='application/zip')
tiff_upload_task = partial(upload_to_geoserver, mimetype='image/tiff')
37 changes: 37 additions & 0 deletions kepler/templates/mets.xml
@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets
xmlns="http://www.loc.gov/METS/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.ogv/METS/ http://www.loc.gov/standards/mets/mets.xsd"
ID="sort-mets_mets"
OBJID="sword-mets"
LABEL="DSpace SWORD Item"
PROFILE="DSpace METS SIP Profile 1.0">
<metsHdr CREATEDATE="{{ create_date }}">
<agent ROLE="CUSTODIAN" TYPE="ORGANIZATION">
<name>GIS Services</name>
</agent>
</metsHdr>
<dmdSec ID="dmdSec_1">
<mdWrap MIMETYPE="text/xml" MDTYPE="MODS">
<xmlData>
{{ metadata }}
</xmlData>
</mdWrap>
</dmdSec>
<fileSec>
<fileGrp USE="ORIGINAL">
<file ID="{{ uuid }}">
<FLocat LOCTYPE="URL" xlink:href="{{ file_path }}" />
</file>
</fileGrp>
</fileSec>
<structMap TYPE="LOGICAL" LABEL="DSpace">
<div DMDID="dmdSec_1">
<div TYPE="File">
<fptr FILEID="{{ uuid }}" />
</div>
</div>
</structMap>
</mets>
18 changes: 17 additions & 1 deletion tests/conftest.py
Expand Up @@ -4,7 +4,7 @@

import pytest
from webtest import TestApp
from mock import patch
from mock import patch, Mock

from kepler.app import create_app
from kepler.extensions import db as _db
Expand Down Expand Up @@ -53,3 +53,19 @@ def pysolr_add():
patcher = patch('pysolr.Solr.add')
yield patcher.start()
patcher.stop()


@pytest.yield_fixture
def sword_service(sword):
patcher = patch('kepler.sword.requests')
req = patcher.start()
req.post.return_value = Mock(text=sword)
yield req
patcher.stop()


@pytest.fixture
def sword():
with io.open('tests/data/sword.xml', 'r') as fp:
resp = fp.read()
return resp
4 changes: 4 additions & 0 deletions tests/data/sword.xml
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<entry xmlns="http://www.w3.org/2005/Atom">
<id>mit.edu:dusenbury-device:1</id>
</entry>
Binary file added tests/fixtures/sword.zip
Binary file not shown.
62 changes: 62 additions & 0 deletions tests/test_sword.py
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import xml.etree.ElementTree as ET
import tempfile
import zipfile

import pytest
from mock import Mock
from requests import HTTPError

from kepler.sword import *


pytestmark = pytest.mark.usefixtures('app')


class TestSWORDPackage(object):
def testWriteCreatesZipfile(self):
pkg = SWORDPackage(uuid='foobar', metadata='foobaz')
pkg.datafiles.append('tests/data/grayscale.tif')
with tempfile.TemporaryFile() as fp:
pkg.write(fp)
zp = zipfile.ZipFile(fp)
assert 'mets.xml' in zp.namelist()
assert 'grayscale.tif' in zp.namelist()


def testCreateMetsPopulatesXMLTemplate():
mets = ET.fromstring(create_mets(uuid='frob'))
f = mets.findall(".//{http://www.loc.gov/METS/}file")[0]
assert f.attrib.get('ID') == 'frob'


def testSwordHeadersReturnsHeaders():
headers = {
'Content-Type': 'application/zip',
'Content-Disposition': 'filename=dusenbury_device',
'X-No-Op': 'false',
'X-Packaging': 'http://purl.org/net/sword-types/METSDSpaceSIP',
}
assert sword_headers('dusenbury_device') == headers


def testSubmitPostsToDSpace(sword_service):
submit('http://example.com/sword', 'tests/fixtures/sword.zip')
args = sword_service.post.call_args[0]
kwargs = sword_service.post.call_args[1]
assert args[0] == 'http://example.com/sword'
assert kwargs.get('headers') == sword_headers('sword.zip')
assert kwargs.get('data').name == 'tests/fixtures/sword.zip'


def testSubmitReturnsHandle(sword_service):
handle = submit('foo', 'tests/fixtures/sword.zip')
assert handle == 'mit.edu:dusenbury-device:1'


def testSubmitRaisesErrorOnFailedSubmission(sword_service):
sword_service.post.return_value = \
Mock(**{'raise_for_status.side_effect': HTTPError})
with pytest.raises(HTTPError):
submit('foo', 'tests/fixtures/sword.zip')
4 changes: 4 additions & 0 deletions tests/test_tasks.py
Expand Up @@ -54,3 +54,7 @@ def testIndexRecordsAddsRecordsToSolr(self, pysolr_add):
records = [{'uuid': 'foobar'}, {'uuid': 'foobaz'}]
index_records(records)
pysolr_add.assert_called_once_with(records)

def testSubmitToDspaceUploadsSwordPackage(self, sword_service):
submit_to_dspace(Mock(uuid='abcd123'), 'tests/data/grayscale.tif')
assert sword_service.post.called
4 changes: 3 additions & 1 deletion tox.ini
Expand Up @@ -39,5 +39,7 @@ commands =

[testenv:docs]
changedir = docs
deps = sphinx
deps =
sphinx
{[testenv]deps}
commands = sphinx-build -W -b html -d {envtmpdir}/doctrees . {envtmpdir}/html

0 comments on commit 7e666ad

Please sign in to comment.