Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Script to generate the table of testimonies from the wiki
- Loading branch information
Showing
2 changed files
with
129 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#!/usr/bin/env python3 | ||
# coding: utf-8 | ||
|
||
""" | ||
This script reads the table of testimonies from an excel file in the wiki | ||
and writes it to a simple HTML table. | ||
Attributes: | ||
selected_columns (list): Labels of the columns in the original table that should be selected. | ||
column_labels (list): New labels for the selected_columns | ||
""" | ||
|
||
selected_columns = ['Gräf-Nr.', 'Pniower-Nr.', 'Datum.(von)', 'Dokumenttyp', 'Verfasser', 'Adressat', 'Druckort'] | ||
column_labels = ['Gräf', 'Pniower', 'Datum', 'Quellengattung', 'Verfasser', 'Adressat', 'Druckort'] | ||
|
||
|
||
import pandas as pd | ||
from lxml import html | ||
import requests | ||
from getpass import getpass | ||
import io | ||
|
||
def fetch_table(): | ||
""" | ||
Fetches the excel file from the wiki, interactively asking for a password | ||
""" | ||
api = "https://faustedition.uni-wuerzburg.de/wiki/api.php" | ||
xlsurl = "https://faustedition.uni-wuerzburg.de/wiki/images/b/b5/Dokumente_zur_Entstehungsgeschichte.xls" | ||
lguser = input("Wiki User: ") | ||
lgpass = getpass("Wiki Password: ") | ||
|
||
s = requests.Session() | ||
s.verify = False | ||
|
||
loginparams = dict( | ||
lgname=lguser, | ||
lgpassword=lgpass, | ||
action='login', | ||
format='json') | ||
login1 = s.post(api, params=loginparams) | ||
token = login1.json()['login']['token'] | ||
loginparams["lgtoken"] = token | ||
s.post(api, params=loginparams) | ||
response = s.get(xlsurl, params=dict(token=token)) | ||
return io.BytesIO(response.content) | ||
|
||
def read_testimonies(buf, **kwargs): | ||
""" | ||
Reads the table from the given object and filters the interesting columns. | ||
Args: | ||
buf: cf. :func:`pd.read_excel` | ||
kwargs: passed on to pandas | ||
Returns: | ||
pd.DataFrame | ||
""" | ||
raw_testimonies = pd.read_excel(buf, **kwargs) | ||
testimonies = raw_testimonies[selected_columns] | ||
testimonies.columns = pd.Index(column_labels) | ||
testimonies.loc[:,'Datum'] = testimonies.loc[:,'Datum'].str.replace('00\.', '') | ||
return testimonies | ||
|
||
|
||
def html_table(testimony_df): | ||
""" | ||
Converts the dataframe to an HTML table, and adds appropriate attributes. | ||
""" | ||
table = html.fromstring(testimony_df.to_html(na_rep='', index=False)) | ||
|
||
table.attrib['data-sortable'] = 'true' | ||
table.attrib['class'] = 'pure-table' | ||
headerrow = table.find('thead').find('tr') | ||
del headerrow.attrib['style'] | ||
ths = headerrow.findall('th') | ||
for th in ths: | ||
th.attrib['data-sorted'] = 'false' | ||
if th.text in ['Gräf', 'Pniower']: | ||
th.attrib['data-sortable-type'] = 'numericplus' | ||
elif th.text == 'Datum': | ||
th.attrib['data-sortable-type'] = 'date-de' | ||
elif th.text == 'Druckort': | ||
th.attrib['data-sortable-type'] = 'bibliography' | ||
else: | ||
th.attrib['data-sortable-type'] = 'alpha' | ||
return table | ||
|
||
def test(): | ||
table = html_table(read_testimonies('Dokumente_zur_Entstehungsgeschichte.xls')) | ||
print(html.tostring(table, encoding="unicode")) | ||
|
||
|
||
def write_html(output, table): | ||
prefix = """ | ||
<?php $showFooter = false; ?> | ||
<?php /* ATTENTION: This file is generated by get_testimonies.py. DO NOT EDIT HERE */ ?> | ||
<?php include "includes/header.php"; ?> | ||
<section> | ||
<article> | ||
<div id="testimony-table-container"> | ||
""" | ||
suffix = """ | ||
</div> | ||
</article> | ||
</section> | ||
<script type="text/javascript"> | ||
// set breadcrumbs | ||
document.getElementById("breadcrumbs").appendChild(Faust.createBreadcrumbs([{caption: "Archiv", link: "archive"}, {caption: "Dokumente zur Entstehungsgeschichte"}])); | ||
</script> | ||
<?php include "includes/footer.php"; ?> | ||
""" | ||
with open(output, mode="wt", encoding="utf-8") as out: | ||
out.write(prefix) | ||
out.write(html.tostring(table, encoding="unicode")) | ||
out.write(suffix) | ||
|
||
def main(): | ||
df = read_testimonies(fetch_table()) | ||
write_html("src/main/web/archive_testimonies.php", html_table(df)) | ||
|
||
if __name__ == '__main__': | ||
main() |
Submodule web
updated
2 files
+1 −0 | archive.php | |
+13,911 −0 | archive_testimonies.php |