Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
Remove ogre dependency
Browse files Browse the repository at this point in the history
This is the only place this code is being used now. Just bringing it
here to simplify things.
  • Loading branch information
Mike Graves committed Apr 12, 2017
1 parent a9918a7 commit c2eb55c
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 39 deletions.
19 changes: 8 additions & 11 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
verify_ssl = true
url = "https://pypi.python.org/simple"

[dev-packages]
coveralls = "*"
pytest = "*"
pytest-cov = "*"
requests-mock = "*"
mock = "*"

[packages]
arrow = "*"
bagit = "*"
click = "*"
geomet = "*"
Expand All @@ -11,14 +19,3 @@ pyshp = "*"
requests = "*"
GeoAlchemy2 = "*"
PlyPlus = "*"

[dev-packages]
coveralls = "*"
pytest = "*"
pytest-cov = "*"
requests-mock = "*"
mock = "*"

[packages.ogre-toolkit]
git = "https://github.com/MITLibraries/ogre-toolkit.git"
ref = "master"
14 changes: 9 additions & 5 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions slingshot/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

import bagit
from geomet import wkt
from ogre.xml import FGDCParser
import requests
from shapefile import Reader

from slingshot.db import engine, multiply, prep_field, table
from slingshot.parsers import FGDCParser, parse
from slingshot.proj import parser
from slingshot.record import create_record as _create_record
from slingshot.record import MitRecord


GEOM_TYPES = {
Expand Down Expand Up @@ -48,7 +48,10 @@ def make_bag_dir(destination, overwrite=False):


def create_record(bag, public, secure, **kwargs):
record = _create_record(bag.fgdc, FGDCParser, **kwargs)
r = parse(bag.fgdc, FGDCParser)
r.update(**kwargs)
record = MitRecord(solr_geom=(r['_bbox_w'], r['_bbox_e'], r['_bbox_n'],
r['_bbox_s']), **r)
gs = public if record.dc_rights_s == 'Public' else secure
gs = gs.rstrip('/')
refs = {
Expand Down
77 changes: 77 additions & 0 deletions slingshot/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
try:
from lxml.etree import iterparse
except ImportError:
from xml.etree.ElementTree import iterparse


def parse(fp, parser):
"""
Parse XML data using the specified parser.
A parser class must implement at least two methods,
``start_handler`` and ``end_handler``, which accept an ``Element``.
These methods must populate an instance property ``record`` which is
returned when parsing is complete.
:param source: file name or file pointer containing XML data
:param parser: parser class to use for parsing
"""

parser = parser()
for event, elem in iterparse(fp, events=('start', 'end')):
if event == 'start':
parser.start_handler(elem)
else:
parser.end_handler(elem)
elem.clear()
return parser.record


class FGDCParser(object):
"""An FGDC XML parser."""

def __init__(self):
#: Parsed GeoBlacklight record
self.record = {}

def start_handler(self, elem):
"""
Start handler called when encountering a new element.
No-op.
"""

pass

def end_handler(self, elem):
"""End handler called when encountering the end of an element."""

if elem.tag == 'title' and elem.text:
self.record['dc_title_s'] = elem.text
elif elem.tag == 'origin' and elem.text:
self.record.setdefault('dc_creator_sm', set()).add(elem.text)
elif elem.tag == 'abstract' and elem.text:
self.record['dc_description_s'] = elem.text
elif elem.tag == 'publish' and elem.text:
self.record['dc_publisher_s'] = elem.text
elif elem.tag == 'westbc' and elem.text:
self.record['_bbox_w'] = elem.text
elif elem.tag == 'eastbc' and elem.text:
self.record['_bbox_e'] = elem.text
elif elem.tag == 'northbc' and elem.text:
self.record['_bbox_n'] = elem.text
elif elem.tag == 'southbc' and elem.text:
self.record['_bbox_s'] = elem.text
elif elem.tag == 'accconst' and elem.text:
self.record['dc_rights_s'] = elem.text
elif elem.tag == 'themekey' and elem.text:
self.record.setdefault('dc_subject_sm', set()).add(elem.text)
elif elem.tag == 'placekey' and elem.text:
self.record.setdefault('dct_spatial_sm', set()).add(elem.text)
elif elem.tag == 'direct' and elem.text:
if elem.text.lower() == 'raster':
self.record['layer_geom_type_s'] = elem.text
elif elem.tag == 'sdtstype' and elem.text:
self.record['layer_geom_type_s'] = elem.text
194 changes: 186 additions & 8 deletions slingshot/record.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

import json
import arrow
from ogre.record import Record
from ogre.xml import parse

from slingshot.parsers import parse

def create_record(metadata, parser, **kwargs):
record = parse(metadata, parser)
record.update(kwargs)
return MitRecord(solr_geom=(record['_bbox_w'], record['_bbox_e'],
record['_bbox_n'], record['_bbox_s']),
**record)

class Enum(object):
def __init__(self, *args):
self.enums = args

def __call__(self, f):
def wrapped(*args):
for arg in args[1:]:
if arg not in self.enums:
raise InvalidDataError(f.__name__, arg)
f(*args)
return wrapped


def optional(f):
def wrapped(self):
try:
return f(self)
except AttributeError:
return None
return wrapped


def rights_mapper(term):
Expand All @@ -36,6 +51,163 @@ def geometry_mapper(term):
return term


class Record(object):
dc_description_s = None
dc_identifier_s = None
dc_language_s = None
dc_publisher_s = None
dc_title_s = None
dct_issued_dt = None
dct_provenance_s = None
geoblacklight_version = None
layer_id_is = None
layer_modified_dt = None
layer_slug_s = None

def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)

@property
@optional
def dc_creator_sm(self):
return self._dc_creator_sm

@dc_creator_sm.setter
def dc_creator_sm(self, value):
self._dc_creator_sm = set(value)

@property
@optional
def dc_format_s(self):
return self._dc_format_s

@dc_format_s.setter
def dc_format_s(self, value):
self._dc_format_s = value

@property
def dc_rights_s(self):
return self._dc_rights_s

@dc_rights_s.setter
@Enum('Public', 'Restricted')
def dc_rights_s(self, value):
self._dc_rights_s = value

@property
@optional
def dc_source_sm(self):
return self._dc_source_sm

@dc_source_sm.setter
def dc_source_sm(self, value):
self._dc_source_dm = set(value)

@property
@optional
def dc_subject_sm(self):
return self._dc_subject_sm

@dc_subject_sm.setter
def dc_subject_sm(self, value):
self._dc_subject_sm = set(value)

@property
@optional
def dc_type_s(self):
return self._dc_type_s

@dc_type_s.setter
@Enum('Dataset', 'Image', 'PhysicalObject')
def dc_type_s(self, value):
self._dc_type_s = value

@property
@optional
def dct_isPartOf_sm(self):
return self._dct_isPartOf_sm

@dct_isPartOf_sm.setter
def dct_isPartOf_sm(self, value):
self._dct_isPartOf_sm = set(value)

@property
def dct_references_s(self):
return self._dct_references_s

@dct_references_s.setter
def dct_references_s(self, value):
self._dct_references_s = dict(value)

@property
@optional
def dct_spatial_sm(self):
return self._dct_spatial_sm

@dct_spatial_sm.setter
def dct_spatial_sm(self, value):
self._dct_spatial_sm = set(value)

@property
@optional
def dct_temporal_sm(self):
return self._dct_temporal_sm

@dct_temporal_sm.setter
def dct_temporal_sm(self, value):
self._dct_temporal_sm = set(value)

@property
def layer_geom_type_s(self):
return self._layer_geom_type_s

@layer_geom_type_s.setter
@Enum('Point', 'Line', 'Polygon', 'Raster', 'Scanned Map', 'Mixed')
def layer_geom_type_s(self, value):
self._layer_geom_type_s = value

@property
def solr_geom(self):
return self._solr_geom

@solr_geom.setter
def solr_geom(self, values):
"""W,E,N,S"""
self._solr_geom = "ENVELOPE({}, {}, {}, {})".format(*values)

def as_dict(self):
record = {
'dc_creator_sm': list(self.dc_creator_sm or []),
'dc_description_s': self.dc_description_s,
'dc_format_s': self.dc_format_s,
'dc_identifier_s': self.dc_identifier_s,
'dc_language_s': self.dc_language_s,
'dc_publisher_s': self.dc_publisher_s,
'dc_rights_s': self.dc_rights_s,
'dc_source_sm': list(self.dc_source_sm or []),
'dc_subject_sm': list(self.dc_subject_sm or []),
'dc_title_s': self.dc_title_s,
'dc_type_s': self.dc_type_s,
'dct_isPartOf_sm': list(self.dct_isPartOf_sm or []),
'dct_issued_dt': self.dct_issued_dt,
'dct_provenance_s': self.dct_provenance_s,
'dct_references_s': json.dumps(self.dct_references_s),
'dct_spatial_sm': list(self.dct_spatial_sm or []),
'dct_temporal_sm': list(self.dct_temporal_sm or []),
'geoblacklight_version': self.geoblacklight_version,
'layer_geom_type_s': self.layer_geom_type_s,
'layer_id_s': self.layer_id_s,
'layer_modified_dt': self.layer_modified_dt,
'layer_slug_s': self.layer_slug_s,
'solr_geom': self.solr_geom,
}
return {k: v for k, v in record.items() if v}

def to_json(self):
return json.dumps(self.as_dict())


class MitRecord(Record):
dct_provenance_s = 'MIT'
geoblacklight_version = '1.0'
Expand All @@ -60,3 +232,9 @@ def layer_modified_dt(self):
def layer_modified_dt(self, value):
self._layer_modified_dt = \
arrow.get(value).format('YYYY-MM-DDTHH:mm:ss') + 'Z'


class InvalidDataError(Exception):
def __init__(self, field, value):
self.field = field
self.value = value
Loading

0 comments on commit c2eb55c

Please sign in to comment.