Skip to content

Commit

Permalink
Added Saxon Stream. Not much faster
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Aug 26, 2016
1 parent e57fa60 commit b2cc138
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 25 deletions.
3 changes: 3 additions & 0 deletions docs/nemo_xslttwo_plugin.api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Transformers
.. autoclass:: nemo_xslttwo_plugin.SaxonShellTransform
:members:

.. autoclass:: nemo_xslttwo_plugin.SaxonStreamTransform
:members:

Commons
#######

Expand Down
122 changes: 97 additions & 25 deletions nemo_xslttwo_plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,51 @@
import tempfile
from lxml import etree
import logging
import os


class XSLError(Exception):
pass


def shell(cmd, stdin=None):
def shell(cmd):
""" Execute the external command and get its exitcode, stdout and stderr.
"""
:param cmd: List of command member to send to shell
:type cmd: [str]
:return: Output and Error as tuples
"""
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
out, err = proc.communicate()
return out, err


class SaxonShellTransform(object):
""" XSLT2 Transformer for Nemo using a shell command line and temporary file
:param saxon: Path to SaxonHE.jar file
:type saxon: str
:param xslt: Path to the XSLT to use
:type xslt: str
:param cache: Cache Handler
:type cache: capitains_nautilus.cache.BaseCache
:param logger: Logging Handler
:type logger: logging.Logger
:ivar saxon: Path to SaxonHE.jar file
:type saxon: str
:ivar xslt: Path to the XSLT to use
:type xslt: str
:ivar cache: Cache Handler
:type cache: capitains_nautilus.cache.BaseCache or werkzeug.contrib.cache.BaseCache
:ivar logger: Logging Handler
:type logger: logging.Logger
def __init__(self, saxon, xslt, cache=None, logger=None):
""" XSLT2 Transformer for Nemo using a shell command line and temporary file
:param saxon: Path to SaxonHE.jar file
:type saxon: str
:param xslt: Path to the XSLT to use
:type xslt: str
:param cache: Cache Handler
:type cache: capitains_nautilus.cache.BaseCache
:param logger: Logging Handler
:type logger: logging.Logger
:ivar saxon: Path to SaxonHE.jar file
:type saxon: str
:ivar xslt: Path to the XSLT to use
:type xslt: str
:ivar cache: Cache Handler
:type cache: capitains_nautilus.cache.BaseCache or werkzeug.contrib.cache.BaseCache
:ivar logger: Logging Handler
:type logger: logging.Logger
"""

"""
self.__saxon__ = saxon
self.__xslt__ = xslt
def __init__(self, saxon, xslt, cache=None, logger=None):
self.__saxon__ = os.path.abspath(saxon)
self.__xslt__ = os.path.abspath(xslt)
self.__cache__ = cache
if logger:
self.__logger__ = logger
Expand Down Expand Up @@ -95,3 +99,71 @@ def transform(self, _, xmlcontent, urn):
if self.cache:
self.cache.set(urn, output)
return output.decode("utf-8")


class SaxonStreamTransform(SaxonShellTransform):
""" XSLT2 Transformer for Nemo using a shell command line and streamed content
:param saxon: Path to SaxonHE.jar file
:type saxon: str
:param xslt: Path to the XSLT to use
:type xslt: str
:param cache: Cache Handler
:type cache: capitains_nautilus.cache.BaseCache
:param logger: Logging Handler
:type logger: logging.Logger
:ivar saxon: Path to SaxonHE.jar file
:type saxon: str
:ivar xslt: Path to the XSLT to use
:type xslt: str
:ivar cache: Cache Handler
:type cache: capitains_nautilus.cache.BaseCache or werkzeug.contrib.cache.BaseCache
:ivar logger: Logging Handler
:type logger: logging.Logger
"""
DELIMINATOR = b"\n--/END_OF_XML/--\n"

def __init__(self, saxon, xslt, cache=None, logger=None):
super(SaxonStreamTransform, self).__init__(saxon, xslt, cache, logger)

# Code Adaption from github.com/Connexions/cnx-mathml2svg
saxon_filename, self.saxon_dirname = os.path.basename(saxon), os.path.dirname(saxon)
self.__transform__ = [
"java", "-cp",
"{jarname}:.:{jardir}".format(jarname=saxon_filename, jardir=self.saxon_dirname),
"net.sf.saxon.Transform", "-s:-",
"-xsl:{xsl}".format(xsl=self.__xslt__)
# "-deliminator:{deli}".format(deli=type(self).DELIMINATOR.decode().replace("\n", ""))
]

def transform(self, _, xmlcontent, urn):
""" Transform some XML Content using a shell.
:param _: Unused variable fed by Nemo (Work metadata from Nemo)
:param xmlcontent: XML Node representing the text
:param urn: Urn of the passage, used as key for caching
:return: Transformed Source
:rtype: str
"""
if self.cache:
cached = self.cache.get(urn)
if cached:
return cached.decode("utf-8")
process = Popen(
self.__transform__,
stdin=PIPE,
stdout=PIPE,
stderr=PIPE,
close_fds=True,
cwd=self.saxon_dirname
)
xml = etree.tostring(xmlcontent, encoding="utf-8")
output, error = process.communicate(xml)
if error:
raise XSLError(error.decode())
else:
if self.cache:
self.cache.set(urn, output)
return output.decode("utf-8")
File renamed without changes.
102 changes: 102 additions & 0 deletions tests/test_saxonstream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from nemo_xslttwo_plugin import SaxonStreamTransform
from unittest import TestCase, mock
import logging
from capitains_nautilus.flask_ext import NautilusRetriever
from flask_nemo import Nemo
from flask import Flask
from werkzeug.contrib.cache import FileSystemCache


class TestSaxonStream(TestCase):

def setUp(self):
self.cache = FileSystemCache("./cache")
self.saxon = SaxonStreamTransform(
"./jars/saxon.jar",
"./tests/data/xsl/ciham.xsl",
cache=self.cache
)
self.nautilus = NautilusRetriever(
folders=[
"./tests/data/repo"
]
)
self.nautilus.logger.setLevel(logging.ERROR)

app = Flask("Nemo")
app.debug = True
nemo = Nemo(
app=app,
base_url="",
retriever=self.nautilus,
transform={
"default": self.saxon.transform
}
)

self.client = app.test_client()

def tearDown(self):
# We clean the cache folder to ensure that no cache is passed from one test to the other
self.cache.clear()

def test_simple_transformation(self):
""" Test transformation works fine"""
read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
data = read.data.decode()
self.assertIn(
'<span class="expan">et </span>', data,
"Text content should be transformed"
)
self.assertIn(
'Facsimilaire', data,
"Other content should be added"
)

cached = self.cache.get("urn:cts:froLit:jns915.jns1856.ciham-fro1:1").decode()
self.assertIn('<aside class="text-left">', cached, "Assert cache is made")

def test_cache_retrieved(self):
""" Test that cache is nicely used and built """
read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
data = read.data.decode()
self.assertIn(
'<span class="expan">et </span>', data,
"Text content should be transformed"
)
self.assertIn(
'Facsimilaire', data,
"Other content should be added"
)

cached = self.cache.get("urn:cts:froLit:jns915.jns1856.ciham-fro1:1").decode()
self.assertIn('<aside class="text-left">', cached, "Assert cache is made")

with mock.patch("nemo_xslttwo_plugin.shell") as shell:
read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
cached_response = read.data.decode()
self.assertEqual(
cached_response, data,
"Text content should the same in cache"
)
self.assertEqual(
shell.call_count, 0,
"Shell should not be called because we use cache"
)

def test_two_transformations(self):
""" Test transformation works fine"""
read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/1")
read = self.client.get("/read/froLit/jns915/jns1856/ciham-fro1/2")
data = read.data.decode()
self.assertIn(
'<span class="expan">et </span>', data,
"Text content should be transformed"
)
self.assertIn(
'Facsimilaire', data,
"Other content should be added"
)

cached = self.cache.get("urn:cts:froLit:jns915.jns1856.ciham-fro1:1").decode()
self.assertIn('<aside class="text-left">', cached, "Assert cache is made")

0 comments on commit b2cc138

Please sign in to comment.