Skip to content

Commit

Permalink
[#219] support templates when exporting
Browse files Browse the repository at this point in the history
  • Loading branch information
quicklizard99 committed Mar 15, 2016
1 parent 97ed70c commit ee42d75
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 83 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ This is an overview of major changes. Refer to the git repository for a full log

Version 0.1.22
-------------
- Added #219 - Command-line tools to use metadata and cookie-cutter templates (@quicklizard99)

Version 0.1.21
-------------
Expand Down
37 changes: 2 additions & 35 deletions inselect/gui/format_validation_problems.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,5 @@
"""Functions for formatting metadata validation problems
"""

# These functions in gui rather than lib to allow for translations using Qt's
# tr()

from itertools import chain


def format_missing_mandatory(missing_mandatory):
msg = u'Box [{0}] [{1}] lacks mandatory field [{2}]'
for index, label, field in missing_mandatory:
yield msg.format(1 + index, label, field)


def format_failed_parse(failed_parse):
msg = u'Could not parse value of [{0}] for box [{1}] [{2}]'
for index, label, field in failed_parse:
yield msg.format(1 + index, label, field)


def format_missing_label(missing_label):
msg = u'Missing object label for box [{0}]'
for index in missing_label:
yield msg.format(1 + index)


def format_duplicated_labels(duplicated_labels):
msg = u'Duplicated object label [{0}]'
for duplicated in duplicated_labels:
yield msg.format(duplicated)


def format_validation_problems(v):
return chain(format_missing_mandatory(v.missing_mandatory),
format_failed_parse(v.failed_parse),
format_missing_label(v.missing_label),
format_duplicated_labels(v.duplicated_labels))
# TODO Hook validation functions into Qt's tr()
from inselect.lib.validate_document import format_validation_problems # noqa
10 changes: 6 additions & 4 deletions inselect/lib/document_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,15 @@ def save_crops(self, document, progress=None):
shutil.rmtree(str(crops_dir), ignore_errors=True)

# Rename temp dir
msg = 'Moving temp crops dir [{0}] to [{1}]'
debug_print(msg.format(tempdir, crops_dir))
debug_print(u'Moving temp crops dir [{0}] to [{1}]'.format(
tempdir, crops_dir
))
tempdir.rename(crops_dir)
tempdir = None

msg = 'Saved [{0}] crops to [{1}]'
debug_print(msg.format(document.n_items, crops_dir))
debug_print(u'Saved [{0}] crops to [{1}]'.format(
document.n_items, crops_dir
))

return crops_dir
finally:
Expand Down
8 changes: 4 additions & 4 deletions inselect/lib/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def ingest_image(source, dest_dir,
"""
dest = dest_dir / source.name
if source != dest and dest.is_file():
raise InselectError('Destination image [{0}] exists'.format(dest))
raise InselectError(u'Destination image [{0}] exists'.format(dest))
else:
debug_print('Ingesting [{0}] to [{1}]'.format(source, dest))
debug_print(u'Ingesting [{0}] to [{1}]'.format(source, dest))

if source != dest:
source.rename(dest)
Expand All @@ -48,7 +48,7 @@ def ingest_image(source, dest_dir,
doc.ensure_thumbnail(thumbnail_width_pixels)

if default_metadata_items:
debug_print('Adding [{0}] default metadata items'.format(
debug_print(u'Adding [{0}] default metadata items'.format(
len(default_metadata_items)
))
doc.set_items(default_metadata_items)
Expand All @@ -64,6 +64,6 @@ def ingest_image(source, dest_dir,
make_readonly(doc.thumbnail.path)

# TODO LH Copy EXIF tags?
debug_print('Ingested [{0}] to [{1}]'.format(source, dest))
debug_print(u'Ingested [{0}] to [{1}]'.format(source, dest))

return doc
12 changes: 10 additions & 2 deletions inselect/lib/user_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from collections import namedtuple, OrderedDict
from functools import partial
from pathlib import Path

import persist_user_template
from inselect.lib.parse import parse_matches_regex
Expand Down Expand Up @@ -82,6 +83,13 @@ def __str__(self):
msg = 'UserTemplate [{0}] with {1} fields'
return msg.format(self.name, len(self.fields))

@classmethod
def load(cls, path):
"""Returns a new instance of UserTemplate using the YAML document at path
"""
with Path(path).open(encoding='utf8') as infile:
return cls.from_file(infile)

@classmethod
def from_file(cls, stream):
"""Returns a new instance of UserTemplate using the YAML document in
Expand Down Expand Up @@ -153,11 +161,11 @@ def validate_field(self, field, value):
parse = self.parse_mapping[field]
try:
parse(value)
debug_print('Parsed [{0}] [{1}]'.format(field, value))
debug_print(u'Parsed [{0}] [{1}]'.format(field, value))
return True
except ValueError:
# Could not be parsed
debug_print('Failed to parse [{0}] [{1}]'.format(field, value))
debug_print(u'Failed to parse [{0}] [{1}]'.format(field, value))
return False
else:
return True
42 changes: 38 additions & 4 deletions inselect/lib/validate_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _visit_box(template, visitor, index, box):
try:
parse(md[field])
except ValueError:
visitor.failed_parse(index, box_label, field)
visitor.failed_parse(index, box_label, field, md[field])


def _visit_labels(document, template, visitor):
Expand All @@ -48,7 +48,7 @@ def _visit_labels(document, template, visitor):
visitor.duplicated_labels(label)

MissingMandatory = namedtuple('MissingMandatory', ['index', 'label', 'field'])
FailedParse = namedtuple('FailedParse', ['index', 'label', 'field'])
FailedParse = namedtuple('FailedParse', ['index', 'label', 'field', 'value'])


class CollectProblemsVisitor(object):
Expand All @@ -63,8 +63,8 @@ def __init__(self):
def missing_mandatory(self, index, label, field):
self._missing_mandatory.append(MissingMandatory(index, label, field))

def failed_parse(self, index, label, field):
self._failed_parse.append(FailedParse(index, label, field))
def failed_parse(self, index, label, field, value):
self._failed_parse.append(FailedParse(index, label, field, value))

def missing_label(self, index):
self._missing_label.append(index)
Expand Down Expand Up @@ -93,3 +93,37 @@ def any_problems(self):
self.failed_parse or
self.missing_label or
self.duplicated_labels)

from itertools import chain


def format_missing_mandatory(missing_mandatory):
msg = u'Box [{0}] [{1}] lacks mandatory field [{2}]'
for index, label, field in missing_mandatory:
yield msg.format(1 + index, label, field)


def format_failed_parse(failed_parse):
msg = u'Could not parse value of [{0}] [{1}] for box [{2}] [{3}]'
for index, label, field, value in failed_parse:
yield msg.format(field, value, 1 + index, label)


def format_missing_label(missing_label):
msg = u'Missing object label for box [{0}]'
for index in missing_label:
yield msg.format(1 + index)


def format_duplicated_labels(duplicated_labels):
msg = u'Duplicated object label [{0}]'
for duplicated in duplicated_labels:
yield msg.format(duplicated)


def format_validation_problems(v):
"Generator function of validation failure messages for ValidationProblems v"
return chain(format_missing_mandatory(v.missing_mandatory),
format_failed_parse(v.failed_parse),
format_missing_label(v.missing_label),
format_duplicated_labels(v.duplicated_labels))
31 changes: 22 additions & 9 deletions inselect/scripts/export_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,36 @@
from inselect.lib.document import InselectDocument
from inselect.lib.document_export import DocumentExport
from inselect.lib.templates.dwc import DWC
from inselect.lib.user_template import UserTemplate
from inselect.lib.utils import debug_print
from inselect.lib.validate_document import format_validation_problems


# TODO Export with template name
# TODO Recursive option

def export_csv(dir, overwrite_existing):
def export_csv(dir, overwrite_existing, template):
dir = Path(dir)
# TODO Template name as argument
export = DocumentExport(DWC)
export = DocumentExport(UserTemplate.load(template) if template else DWC)
for p in dir.glob('*' + InselectDocument.EXTENSION):
try:
debug_print('Loading [{0}]'.format(p))
doc = InselectDocument.load(p)
validation = export.validation_problems(doc)
csv_path = export.csv_path(doc)
if not overwrite_existing and csv_path.is_file():
print('CSV file [{0}] exists - skipping'.format(csv_path))
if validation.any_problems:
print(
u'Not exporting metadata for [{0}] because there are '
u'validation problems'.format(p)
)
for msg in format_validation_problems(validation):
print(msg)
elif not overwrite_existing and csv_path.is_file():
print(u'CSV file [{0}] exists - skipping'.format(csv_path))
else:
print('Writing CSV for [{0}]'.format(p))
print(u'Writing CSV for [{0}]'.format(p))
export.export_csv(doc)
except Exception:
print('Error saving CSV from [{0}]'.format(p))
print(u'Error saving CSV from [{0}]'.format(p))
traceback.print_exc()


Expand All @@ -49,14 +57,19 @@ def main(args):
parser.add_argument("dir", help='Directory containing Inselect documents')
parser.add_argument('-o', '--overwrite', action='store_true',
help='Overwrite existing metadata files')
parser.add_argument(
'-t', '--template', help="Path to a '{0}' file that will be used to "
'export the data'.format(UserTemplate.EXTENSION)
)
parser.add_argument('-d', '--debug', action='store_true')
parser.add_argument('-v', '--version', action='version',
version='%(prog)s ' + inselect.__version__)
args = parser.parse_args(args)

inselect.lib.utils.DEBUG_PRINT = args.debug

export_csv(args.dir, args.overwrite)
export_csv(args.dir, args.overwrite, args.template)


if __name__ == '__main__':
main(sys.argv[1:])
10 changes: 5 additions & 5 deletions inselect/scripts/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,24 +31,24 @@ def ingest_from_directory(inbox, docs, cookie_cutter=None):
inbox, docs = Path(inbox), Path(docs)
cookie_cutter = Path(cookie_cutter) if cookie_cutter else None
if not inbox.is_dir():
raise InselectError('Inbox directory [{0}] does not exist'.format(inbox))
raise InselectError(u'Inbox directory [{0}] does not exist'.format(inbox))

if not docs.is_dir():
print('Create document directory [{0}]'.format(docs))
print(u'Create document directory [{0}]'.format(docs))
docs.mkdir(parents=True)

if cookie_cutter:
cookie_cutter = CookieCutter.load(cookie_cutter)

for source in (p for p in inbox.iterdir() if IMAGE_SUFFIXES_RE.match(p.name)):
print('Ingesting [{0}]'.format(source))
print(u'Ingesting [{0}]'.format(source))
try:
ingest_image(source, docs, cookie_cutter=cookie_cutter)
except Exception:
print('Error ingesting [{0}]'.format(source))
print(u'Error ingesting [{0}]'.format(source))
traceback.print_exc()
else:
print('Ingested [{0}]'.format(source))
print(u'Ingested [{0}]'.format(source))


def main(args):
Expand Down
6 changes: 3 additions & 3 deletions inselect/scripts/read_barcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def process_dir(self, dir):
try:
self.read_barcodes_in_document(InselectDocument.load(p))
except Exception:
print('Error reading barcodes in [{0}]'.format(p))
print(u'Error reading barcodes in [{0}]'.format(p))
traceback.print_exc()

def read_barcodes_in_document(self, doc):
Expand All @@ -52,14 +52,14 @@ def read_barcodes_in_document(self, doc):
if result:
strategy, barcodes = result
barcodes = u' '.join(b.data for b in barcodes)
debug_print('Crop [{0}] - found [{1}]'.format(index, barcodes))
debug_print(u'Crop [{0}] - found [{1}]'.format(index, barcodes))

# TODO LH This mapping to come from metadata config?
# TODO LH Could be more than one object, and hence barcode,
# on a crop
item['fields']['catalogNumber'] = barcodes
else:
debug_print('Crop [{0}] - no barcodes'.format(index))
debug_print(u'Crop [{0}] - no barcodes'.format(index))

doc.set_items(items)
doc.save()
Expand Down
36 changes: 24 additions & 12 deletions inselect/scripts/save_crops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,33 +17,41 @@

from inselect.lib.document import InselectDocument
from inselect.lib.document_export import DocumentExport
from inselect.lib.utils import debug_print
from inselect.lib.templates.dwc import DWC
from inselect.lib.utils import debug_print
from inselect.lib.user_template import UserTemplate
from inselect.lib.validate_document import format_validation_problems


# TODO Command-line argument for template
# TODO Recursive option
# TODO Ignore documents that fail validation; option to ignore failures

def save_crops(dir, overwrite_existing):
def save_crops(dir, overwrite_existing, template):
dir = Path(dir)
export = DocumentExport(DWC)
export = DocumentExport(UserTemplate.load(template) if template else DWC)
for p in dir.glob('*' + InselectDocument.EXTENSION):
try:
debug_print('Loading [{0}]'.format(p))
doc = InselectDocument.load(p)
if not overwrite_existing and doc.crops_dir.is_dir():
print('Crops dir [{0}] exists - skipping'.format(doc.crops_dir))
validation = export.validation_problems(doc)
if validation.any_problems:
print(
u'Not saving crops for [{0}] because there are validation '
u'problems'.format(p)
)
for msg in format_validation_problems(validation):
print(msg)
elif not overwrite_existing and doc.crops_dir.is_dir():
print(u'Crops dir [{0}] exists - skipping'.format(doc.crops_dir))
else:
print('Will save crops for [{0}] to [{1}]'.format(p, doc.crops_dir))
print(u'Will save crops for [{0}] to [{1}]'.format(p, doc.crops_dir))

debug_print('Loading full-resolution scanned image')
debug_print(u'Loading full-resolution scanned image')
doc.scanned.array

debug_print('Saving crops')
debug_print(u'Saving crops')
export.save_crops(doc)
except Exception:
print('Error saving crops from [{0}]'.format(p))
print(u'Error saving crops from [{0}]'.format(p))
traceback.print_exc()


Expand All @@ -54,14 +62,18 @@ def main():
parser.add_argument("dir", help='Directory containing Inselect documents')
parser.add_argument('-o', '--overwrite', action='store_true',
help='Overwrite existing crops directories')
parser.add_argument(
'-t', '--template', help="Path to a '{0}' file "
'that will be used to export the data'.format(UserTemplate.EXTENSION)
)
parser.add_argument('-d', '--debug', action='store_true')
parser.add_argument('-v', '--version', action='version',
version='%(prog)s ' + inselect.__version__)
args = parser.parse_args()

inselect.lib.utils.DEBUG_PRINT = args.debug

save_crops(args.dir, args.overwrite)
save_crops(args.dir, args.overwrite, args.template)


if __name__ == '__main__':
Expand Down

0 comments on commit ee42d75

Please sign in to comment.