Skip to content
This repository has been archived by the owner on Mar 12, 2020. It is now read-only.

Commit

Permalink
Retrieve GeoTIFF URL
Browse files Browse the repository at this point in the history
Initial work on #71

Integrating into the Job task flow remains.

At this time, a specific server from which to retrieve a record based
on a handle is required. It seems theoretically possible to guess the
paths based on the results of a handle redirection, but for our use
case this seemed fine.
  • Loading branch information
JPrevost committed Oct 28, 2015
1 parent bfffb3d commit 4084c25
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 2 deletions.
1 change: 1 addition & 0 deletions kepler/settings.py
Expand Up @@ -20,6 +20,7 @@ class TestConfig(DefaultConfig):
GEOSERVER_RESTRICTED_URL = 'http://example.com/secure-geoserver/'
GEOSERVER_WORKSPACE = 'mit'
GEOSERVER_DATASTORE = 'data'
OAI_ORE_URL = 'http://example.com/metadata/handle/'
SOLR_URL = 'http://localhost:8983/solr/geoblacklight/'
SWORD_SERVICE_URL = 'http://example.com/sword'
SWORD_SERVICE_USERNAME = 'swordymcswordmuffin'
Expand Down
29 changes: 27 additions & 2 deletions kepler/tasks.py
Expand Up @@ -13,13 +13,15 @@
"""

from __future__ import absolute_import
import xml.etree.ElementTree as ET
import tempfile
import uuid

from flask import current_app
from ogre.xml import FGDCParser
from lxml import etree
import pysolr
import requests

from kepler.geoserver import put, wfs_url, wms_url
from kepler.bag import get_fgdc, get_shapefile, get_geotiff
Expand Down Expand Up @@ -76,12 +78,35 @@ def submit_to_dspace(job, data):
pkg.metadata = _fgdc_to_mods(get_fgdc(data))
with tempfile.NamedTemporaryFile(suffix='.zip') as fp:
pkg.write(fp)
handle = sword.submit(current_app.config['SWORD_SERVICE_URL'], fp.name,
auth=(username, password))
handle = sword.submit(current_app.config['SWORD_SERVICE_URL'],
fp.name, auth=(username, password))
job.item.handle = handle
db.session.commit()


def get_geotiff_url_from_dspace(job):
"""Retrieve the GeoTIFF URL from a DSpace Handle.
.. note:: assumes the OAI-ORE only contains a single TIFF.
:param job: :class:`~kepler.models.Job`
"""
handle = job.item.handle.replace('http://hdl.handle.net/', '')
ore_url = current_app.config['OAI_ORE_URL'] + handle + '/ore.xml'
r = requests.get(ore_url)
r.raise_for_status()
doc = ET.fromstring(r.text)
image_urls = doc.findall(
'.//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description')
url_list = []
for url_element in image_urls:
url = url_element.get(
'{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about')
if '.tif?' in url:
url_list.append(url)
job.item.geotiff_url = url_list[0]


def upload_shapefile(job, data):
"""Upload Shapefile to GeoServer.
Expand Down
7 changes: 7 additions & 0 deletions tests/conftest.py
Expand Up @@ -107,6 +107,13 @@ def fgdc(bag):
return os.path.join(bag, 'data/fgdc.xml')


@pytest.fixture
def oai_ore():
with io.open(_fixture_path('oai_ore.xml'), 'r') as fp:
resp = fp.read()
return resp


@pytest.yield_fixture
def pysolr_add():
patcher = patch('pysolr.Solr.add')
Expand Down
57 changes: 57 additions & 0 deletions tests/fixtures/oai_ore.xml
@@ -0,0 +1,57 @@
<?xml version="1.0" encoding="UTF-8"?>
<atom:entry xmlns:atom="http://www.w3.org/2005/Atom" xmlns:ore="http://www.openarchives.org/ore/terms/" xmlns:oreatom="http://www.openarchives.org/ore/atom/" xmlns:dcterms="http://purl.org/dc/terms/">
<atom:id>http://example.com/oai/metadata/handle/1234.5/67890/ore.xml</atom:id>
<atom:link rel="alternate" href="http://hdl.handle.net/1234.5/67890"/>
<atom:link rel="http://www.openarchives.org/ore/terms/describes" href="http://example.com/oai/metadata/handle/1234.5/67890/ore.xml"/>
<atom:link rel="self" href="http://example.com/oai/metadata/handle/1234.5/67890/ore.xml#atom" type="application/atom+xml"/>
<atom:published>2015-10-28T09:54:24.339-04:00</atom:published>
<atom:updated>2015-10-28T09:54:24.343-04:00</atom:updated>
<atom:source>
<atom:generator uri="http://example.com/oai">Some Server Somewhere</atom:generator>
</atom:source>
<atom:title>This is a title.</atom:title>
<atom:category scheme="http://www.openarchives.org/ore/terms/" term="http://www.openarchives.org/ore/terms/Aggregation" label="Aggregation"/>
<atom:category scheme="http://www.openarchives.org/ore/atom/modified" term="2014-05-29T02:26:08.501-04:00"/>
<atom:category scheme="http://www.dspace.org/objectModel/" term="DSpaceItem" label="DSpace Item"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077_cp.jpg.jpg?sequence=5" title="248077_cp.jpg.jpg" type="image/jpeg" length="2393"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077_sv.jpg.jpg?sequence=6" title="248077_sv.jpg.jpg" type="image/jpeg" length="2539"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077_tm.jpg.jpg?sequence=7" title="248077_tm.jpg.jpg" type="image/jpeg" length="1591"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077.tif?sequence=4" title="248077.tif" type="image/tiff" length="20859226"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077_cp.jpg?sequence=1" title="248077_cp.jpg" type="image/jpeg" length="1614753"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077_sv.jpg?sequence=2" title="248077_sv.jpg" type="image/jpeg" length="5771544"/>
<atom:link rel="http://www.openarchives.org/ore/terms/aggregates" href="http://example.com/bitstream/handle/1234.5/67890/248077_tm.jpg?sequence=3" title="248077_tm.jpg" type="image/jpeg" length="10361"/>
<oreatom:triples>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/oai/metadata/handle/1234.5/67890/ore.xml">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceItem"/>
<dcterms:modified>2014-05-29T02:26:08.501-04:00</dcterms:modified>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077_cp.jpg.jpg?sequence=5">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>THUMBNAIL</dcterms:description>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077_sv.jpg.jpg?sequence=6">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>THUMBNAIL</dcterms:description>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077_tm.jpg.jpg?sequence=7">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>THUMBNAIL</dcterms:description>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077.tif?sequence=4">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>HIDDEN</dcterms:description>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077_cp.jpg?sequence=1">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>ORIGINAL</dcterms:description>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077_sv.jpg?sequence=2">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>ORIGINAL</dcterms:description>
</rdf:Description>
<rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="http://example.com/bitstream/handle/1234.5/67890/248077_tm.jpg?sequence=3">
<rdf:type rdf:resource="http://www.dspace.org/objectModel/DSpaceBitstream"/>
<dcterms:description>ORIGINAL</dcterms:description>
</rdf:Description>
</oreatom:triples>
</atom:entry>
10 changes: 10 additions & 0 deletions tests/test_tasks.py
Expand Up @@ -3,6 +3,7 @@
import os.path

import pytest
import requests_mock
from mock import Mock, patch, DEFAULT

from kepler.tasks import *
Expand Down Expand Up @@ -56,6 +57,15 @@ def testSubmitToDspaceAddsHandleToItem(job, bag_tif):
assert job.item.handle == 'foobar'


def testGetGeotiffUrlFromDspaceAddsGeotiffUrlToItem(job, oai_ore):
with requests_mock.mock() as m:
m.get('http://example.com/metadata/handle/1234.5/67890/ore.xml',
text=oai_ore)
job.item.handle = 'http://hdl.handle.net/1234.5/67890'
get_geotiff_url_from_dspace(job)
assert job.item.geotiff_url == 'http://example.com/bitstream/handle/1234.5/67890/248077.tif?sequence=4'


def testSubmitToDspaceWithExistingHandleDoesNotSubmit(job, bag_tif):
job.item.handle = "popcorn"
with patch('kepler.tasks.sword.submit') as mock:
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Expand Up @@ -11,6 +11,7 @@ deps =
webtest
pytest
mock
requests_mock
-r{toxinidir}/requirements.txt

[testenv:clean]
Expand Down

0 comments on commit 4084c25

Please sign in to comment.