Skip to content


Script to generate watermarks image table
Browse files Browse the repository at this point in the history
  • Loading branch information
thvitt committed Sep 15, 2021
1 parent f3f03f0 commit 8e7daa3
Showing 1 changed file with 193 additions and 0 deletions.
193 changes: 193 additions & 0 deletions utils/
@@ -0,0 +1,193 @@
#!/usr/bin/env python3

from collections import defaultdict
from pathlib import Path
from typing import Union
from importlib_metadata import metadata
from lxml import etree
import re

from sympy import pretty_print
from find_sigil_refs import encode_sigil
import sys
import os
import logging

logger = logging.getLogger(__name__)

this_script = Path(sys.argv[0])
project_root = this_script.parent.parent

NS={'f': ''}
# GSA_25-W_1362_wm_hand_drawn_150.jpg
IMG_FN_PATTERN = re.compile(r'GSA_25-W_(\d+)_wm_([a-z_]+)_(.*)')
CAT_LABELS = {'all': 'Blatt', 'detail': 'Detail', 'hand_drawn': 'Zeichnung'}

def etree_by_id(root: Path, idno_type: str):
result = {}
for mdfile in root.glob('**/*.xml'):
tree = etree.parse(str(mdfile))
id = tree.xpath(f'//f:idno[@type="{idno_type}"]/text()', namespaces=NS)
if id:
result[id[0]] = tree
return result

def collect_wm_imgs(imgfolder: Path):
by_sigpart = defaultdict(list)
for image in sorted(imgfolder.glob('*')):
match = IMG_FN_PATTERN.match(image.stem)
if match:
sigpart =
path = image,
category = CAT_LABELS.get(,,
resolution =
logger.warning('Ignoring unmatched file %s', image)
return by_sigpart

class WMLabels(dict):

def __init__(self, watermark_map:Path=None):
if watermark_map is None:
watermark_map = project_root / "src/main/xproc/xslt/watermark-labels.xml"
self.path = watermark_map
wm_tree = etree.parse(os.fspath(watermark_map))
self.tree = wm_tree
for el in wm_tree.xpath('//f:watermark', namespaces=NS):
id = el.get('id')
if id != "none":
self[id] = el.text

def register_image(self, wm_id, row_ref):
els = self.tree.xpath(f"//f:watermark[@id='{wm_id}']", namespaces=NS)
if els:
for el in els:
el.set('imgref', row_ref)
logger.warning('WMLabels: id %s not found in the label table, cannot link to %s', wm_id, row_ref)

def save(self, path: Path = None):
if path is None:
path = self.path
self.tree.write(os.fspath(path), pretty_print=True, encoding='utf-8')

def image_cell(image: dict, signature, td=True):
url='/img/watermarks/' + image['path'].name
thumb='/img/watermarks/thumb/' + image['path'].name
caption=f"{signature} ({image['category']}, {image['resolution']} dpi)"
link = f"""<a href="{url}" class="chocolat-image" title="{caption}"><img src="{thumb}" /></a>"""
if td:
return '<td>' + link + '</td>'
return link

def reorder_images(images: list[dict]) -> dict[str, Union[list, dict]]:
result = {'rest': []}
for image in images:
if image['category'] in result:
result[image['category']] = image
return result

def normalize_whitespace(text: str) -> str:
return re.sub(r'\s+', ' ', text).strip()

def generate_table(by_sigpart, idmap, wmmap: WMLabels):
rows = []
for sigpart, images in by_sigpart.items():
signature = f'GSA 25/W {sigpart}'
metadata = idmap.get(signature)
cols = []

# Image columns
img_by_cat = reorder_images(images)
for cat in ['Blatt', 'Detail', 'Zeichnung']:
img = img_by_cat.get(cat)
if img:
cols.append(image_cell(img, signature))
cols.append('<td>' + ''.join(image_cell(img, signature, td=False) for img in img_by_cat['rest']) + '</td>')

if metadata:
sigil = metadata.xpath('//f:idno[@type="faustedition"]/text()', namespaces=NS)[0]
sigil_t = encode_sigil(sigil)
wmids = metadata.xpath('//f:watermarkID/text() | //f:countermarkID/text()', namespaces=NS)
if wmids:
wm_raw = normalize_whitespace(wmids[0])
wm_normalized = wmmap.get(wm_raw)
if wm_normalized:
wm_id = re.sub(r'\W', '_', wm_normalized)
wm_link = f'<a href="/watermark-table#{wm_id}">{wm_normalized}</a>'
wmmap.register_image(wm_raw, 'wm_' + sigil_t)
wm_link = ''
wm_link = ''
row = f"""<tr id="wm_{sigil_t}">
<td><a href="/document?sigil={sigil_t}&view=structure">{sigil}</a></td>
<td><a href="/document?sigil={sigil_t}&view=structure">{signature}</a></td>
logger.warning('Signature %s not found', sigpart)
row = f"""<tr><td>{signature}</td>{''.join(cols)}</tr>"""
return ''.join(rows)

HEAD = """
<?php include "includes/header.php"?>
<!-- WARNING: This file can be re-generated using faust-gen/utils/ Do not edit directly. -->
<section class="main"
<article class="pure-u-1">
<p>Auf dieser Seite werden die 2013 im Goethe- und Schiller-Archiv angefertigten
Wasserzeichenaufnahmen verfügbar gemacht.</p>
<p>Vgl. den <a href="watermarks">ausführlichen Bericht über die Anfertigung der
Aufnahmen</a> sowie die <a href="watermark-table">Liste der in der Faustedition
vorkommenden Wasserzeichen</a>.</p>
<table class="pure-table" data-sortable="true">
<th data-sortable-type="sigil">Sigle</th>
<th data-sortable-type="sigil">Signatur</th>
<th data-sortable-type="alpha">Beschreibung (Link: Vorkommen)</th>

FOOT = """
<script type="text/javascript">
requirejs(['faust_common', 'svg-pan-zoom', 'sortable', 'jquery', 'jquery.table'],
function(Faust, svgPanZoom, Sortable, $, $table) {
// document.getElementById('breadcrumbs').appendChild(Faust.createBreadcrumbs([{"caption": "Über die Ausgabe", "link": "/intro"}, {"caption": "Wasserzeichen"}]));
<?php include "includes/footer.php"?>

if __name__ == '__main__':
images = collect_wm_imgs(project_root / 'src/main/web/img/watermarks')
metadata = etree_by_id(project_root / 'data/xml/document', 'gsa_2')
wm_map = WMLabels()
content = generate_table(images, metadata, wm_map)
output_file = project_root / 'src/main/web/archive_watermarks.php'
output_file.write_text(HEAD + content + FOOT, encoding='utf-8')

0 comments on commit 8e7daa3

Please sign in to comment.