Script to generate the table of testimonies from the wiki

Cf. faustedition/faust-web#129
faustedition · Jul 13, 2016 · 06b6c59 · 06b6c59
1 parent 59d202d
commit 06b6c59
Show file tree

Hide file tree

Showing 2 changed files with 129 additions and 1 deletion.
diff --git a/get_testimonies.py b/get_testimonies.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""
+This script reads the table of testimonies from an excel file in the wiki
+and writes it to a simple HTML table.
+
+
+Attributes:
+
+    selected_columns (list): Labels of the columns in the original table that should be selected.
+    column_labels (list): New labels for the selected_columns
+
+"""
+
+selected_columns = ['Gräf-Nr.', 'Pniower-Nr.', 'Datum.(von)', 'Dokumenttyp', 'Verfasser', 'Adressat', 'Druckort']
+column_labels = ['Gräf', 'Pniower', 'Datum', 'Quellengattung', 'Verfasser', 'Adressat', 'Druckort']
+
+
+import pandas as pd
+from lxml import html
+import requests
+from getpass import getpass
+import io
+
+def fetch_table():
+    """
+    Fetches the excel file from the wiki, interactively asking for a password
+    """
+    api = "https://faustedition.uni-wuerzburg.de/wiki/api.php"
+    xlsurl = "https://faustedition.uni-wuerzburg.de/wiki/images/b/b5/Dokumente_zur_Entstehungsgeschichte.xls"
+    lguser = input("Wiki User: ")
+    lgpass = getpass("Wiki Password: ")
+
+    s = requests.Session()
+    s.verify = False
+
+    loginparams = dict(
+        lgname=lguser,
+        lgpassword=lgpass,
+        action='login',
+        format='json')
+    login1 = s.post(api, params=loginparams)
+    token = login1.json()['login']['token']
+    loginparams["lgtoken"] = token
+    s.post(api, params=loginparams)
+    response = s.get(xlsurl, params=dict(token=token))
+    return io.BytesIO(response.content)
+
+def read_testimonies(buf, **kwargs):
+    """
+    Reads the table from the given object and filters the interesting columns.
+
+    Args:
+        buf: cf. :func:`pd.read_excel`
+        kwargs: passed on to pandas
+
+    Returns:
+        pd.DataFrame
+    """
+    raw_testimonies = pd.read_excel(buf, **kwargs)
+    testimonies = raw_testimonies[selected_columns]
+    testimonies.columns = pd.Index(column_labels)
+    testimonies.loc[:,'Datum'] = testimonies.loc[:,'Datum'].str.replace('00\.', '')
+    return testimonies
+
+
+def html_table(testimony_df):
+    """
+    Converts the dataframe to an HTML table, and adds appropriate attributes.
+    """
+    table = html.fromstring(testimony_df.to_html(na_rep='', index=False))
+
+    table.attrib['data-sortable'] = 'true'
+    table.attrib['class'] = 'pure-table'
+    headerrow = table.find('thead').find('tr')
+    del headerrow.attrib['style']
+    ths = headerrow.findall('th')
+    for th in ths:
+        th.attrib['data-sorted'] = 'false'
+        if th.text in ['Gräf', 'Pniower']:
+            th.attrib['data-sortable-type'] = 'numericplus'
+        elif th.text == 'Datum':
+            th.attrib['data-sortable-type'] = 'date-de'
+        elif th.text == 'Druckort':
+            th.attrib['data-sortable-type'] = 'bibliography'
+        else:
+            th.attrib['data-sortable-type'] = 'alpha'
+    return table
+
+def test():
+    table = html_table(read_testimonies('Dokumente_zur_Entstehungsgeschichte.xls'))
+    print(html.tostring(table, encoding="unicode"))
+
+
+def write_html(output, table):
+    prefix = """
+<?php $showFooter = false; ?>
+<?php /* ATTENTION: This file is generated by get_testimonies.py. DO NOT EDIT HERE */ ?>
+<?php include "includes/header.php"; ?>
+<section>
+
+  <article>
+      <div id="testimony-table-container">
+"""
+    suffix = """
+      </div>
+  </article>
+
+</section>
+<script type="text/javascript">
+  // set breadcrumbs
+  document.getElementById("breadcrumbs").appendChild(Faust.createBreadcrumbs([{caption: "Archiv", link: "archive"}, {caption: "Dokumente zur Entstehungsgeschichte"}]));
+</script>
+
+<?php include "includes/footer.php"; ?>
+"""
+    with open(output, mode="wt", encoding="utf-8") as out:
+        out.write(prefix)
+        out.write(html.tostring(table, encoding="unicode"))
+        out.write(suffix)
+
+def main():
+    df = read_testimonies(fetch_table())
+    write_html("src/main/web/archive_testimonies.php", html_table(df))
+
+if __name__ == '__main__':
+    main()
diff --git a/src/main/web b/src/main/web