Skip to content

Commit

Permalink
Correctly handle layers with multiple XML files
Browse files Browse the repository at this point in the history
Some data layers can legitimately contain more than one XML file. We
need to be able to differentiate between them in order to locate the
FGDC metadata. This provides some basic support to do this. A more
robust implementation should incrementally parse the beginning of an XML
file to find the root element.
  • Loading branch information
Mike Graves committed Mar 16, 2018
1 parent 4e08916 commit b51147a
Showing 1 changed file with 46 additions and 8 deletions.
54 changes: 46 additions & 8 deletions slingshot/app.py
@@ -1,5 +1,6 @@
import base64
import os
import re
import shutil
import uuid
from zipfile import ZipFile
Expand Down Expand Up @@ -179,6 +180,7 @@ def commit(self):
class GeoBag(object):
def __init__(self, bag):
self.bag = bag
self._fgdc = None
try:
self.record = MitRecord.from_file(self.gbl_record)
except Exception:
Expand All @@ -197,7 +199,21 @@ def gbl_record(self):

@property
def fgdc(self):
return self._file_by_ext('.xml')
"""Full path to FGDC file."""
# There can sometimes be multiple XML files in a package. We will
# try to find an FGDC file among them.
if not self._fgdc:
files = self._files_by_ext('.xml')
if len(files) > 1:
for f in files:
with open(f, encoding="utf-8") as fp:
head = fp.read(1024)
if re.match('\s*<metadata>', head, re.I):
self._fgdc = f
break
else:
self._fgdc = files[0]
return self._fgdc

def save(self):
self.record.to_file(self.gbl_record)
Expand All @@ -206,15 +222,29 @@ def save(self):
def is_valid(self):
return self.bag.is_valid()

def _file_by_ext(self, ext):
def _files_by_ext(self, ext):
"""Return list of full paths to files with given extension.
Raises :class:`slingshot.app.MissingFile` exception when no
file with the given extension is found.
"""
fnames = [f for f in self.bag.payload_files()
if f.lower().endswith(ext.lower())]
if not fnames:
raise MissingFile('Could not find file with extension {}'
.format(ext))
return [os.path.join(str(self.bag), name) for name in fnames]

def _file_by_ext(self, ext):
"""Return full path to a single file with the given extension.
Raises :class:`slingshot.app.TooManyFiles` exception when more
than one file with the specified extension is found.
"""
fnames = self._files_by_ext(ext)
if len(fnames) > 1:
raise Exception('Multiple files with extension {}'.format(ext))
elif not fnames:
raise Exception('Could not find file with extension {}'
.format(ext))
return os.path.join(str(self.bag), fnames.pop())
raise TooManyFiles('Multiple files with extension {}'.format(ext))
return fnames[0]


class GeoTiffBag(GeoBag):
Expand All @@ -228,7 +258,7 @@ def name(self):
def tif(self):
try:
return self._file_by_ext('.tif')
except Exception:
except MissingFile:
return self._file_by_ext('.tiff')


Expand Down Expand Up @@ -337,3 +367,11 @@ def close(self):
self.shx.close()
if self.dbf:
self.dbf.close()


class MissingFile(Exception):
"""Required file in spatial data package is missing."""


class TooManyFiles(Exception):
"""Too many files of the same extension found in spatial data package."""

0 comments on commit b51147a

Please sign in to comment.