Skip to content

Commit

Permalink
Make get-testimonies write XML.
Browse files Browse the repository at this point in the history
Instead of directly generating the HTML file, we now write an XML file
which can later be merged with the other information extracted from
the actual testimony files etc.

faustedition/faust-web#237
  • Loading branch information
thvitt committed Mar 6, 2017
1 parent 80e424c commit e0f1198
Showing 1 changed file with 45 additions and 5 deletions.
50 changes: 45 additions & 5 deletions get_testimonies.py
Expand Up @@ -13,15 +13,26 @@
"""

selected_columns = ['Gräf-Nr.', 'Pniower-Nr.', 'Datum.(von)', 'Dokumenttyp', 'Verfasser', 'Adressat', 'Druckort']
column_labels = ['Gräf', 'Pniower', 'Datum', 'Quellengattung', 'Verfasser', 'Adressat', 'Druckort']
selected_columns = ['Gräf-Nr.', 'Pniower-Nr.', 'QuZ', ' Biedermann-HerwigNr.',
'Datum.(von)', 'Dokumenttyp', 'Verfasser', 'Adressat', 'Druckort']
column_labels = ['Gräf', 'Pniower', 'QuZ', 'Bie3',
'Datum', 'Quellengattung', 'Verfasser', 'Adressat', 'Druckort']


import pandas as pd
from lxml import html
from lxml import html, etree
import requests
from getpass import getpass
import io
import sys


def to_id(row):
for column, prefix in [('Gräf', 'graef'), ('Pniower', 'pniower'),
('QuZ', 'quz'), ('Bie3', 'bie3')]:
if not pd.isnull(row[column]):
return prefix + '_' + str(row[column])


def fetch_table():
"""
Expand Down Expand Up @@ -62,8 +73,29 @@ def read_testimonies(buf, **kwargs):
testimonies = raw_testimonies[selected_columns]
testimonies.columns = pd.Index(column_labels)
testimonies.loc[:,'Datum'] = testimonies.loc[:,'Datum'].str.replace('00\.', '')
testimonies.loc[:,'ID'] = testimonies.apply(to_id, axis=1)
return testimonies

def to_xml(testimony_df):
ns = "http://www.faustedition.net/ns"
NS = "{"+ns+"}"
nsmap = {None : ns}
root = etree.Element(NS + "testimonies", nsmap=nsmap)
root.addprevious(etree.Comment("This document is sporadically re-generated from an Excel sheet in the wiki. Please don't edit it directly"))
for __, row in testimony_df.iterrows():
row_el = root.makeelement(NS + "testimony")
if not pd.isnull(row.ID):
row_el.attrib['id'] = row.ID
for label, value in row.iteritems():
if label is 'ID':
continue
el = row_el.makeelement(NS + 'field', attrib=dict(label=label))
if not pd.isnull(value):
el.text = str(value)
row_el.append(el)
root.append(row_el)
return etree.ElementTree(root)


def html_table(testimony_df):
"""
Expand Down Expand Up @@ -122,8 +154,16 @@ def write_html(output, table):

def main():
pd.set_option('max_colwidth', 10000)
df = read_testimonies(fetch_table())
write_html("src/main/web/archive_testimonies.php", html_table(df))
if len(sys.argv) > 1:
df = read_testimonies(sys.argv[1])
else:
df = read_testimonies(fetch_table())
# write_html("src/main/web/archive_testimonies.php", html_table(df))
xml = to_xml(df)
xml.write('src/main/xproc/xslt/testimony-table.xml',
encoding='utf-8',
xml_declaration=True,
pretty_print=True)

if __name__ == '__main__':
main()

0 comments on commit e0f1198

Please sign in to comment.