Permalink
Browse files

Code has been adapted to Python 3. Beware, however, that it could not…

… be thoroughly tested under Python 3 because the HTML5 parser does not have a Python 3 version yet.
  • Loading branch information...
1 parent 9d29d12 commit 0e8b057e764d0e12162aa6ff18a2f0dab0a87963 @iherman iherman committed Aug 31, 2012
Showing with 217 additions and 295 deletions.
  1. +1 −1 PKG-INFO
  2. +1 −1 README.txt
  3. +70 −38 pyRdfa/__init__.py
  4. +6 −1 pyRdfa/embeddedRDF.py
  5. +26 −11 pyRdfa/extras/httpheader.py
  6. +0 −158 pyRdfa/graph.py
  7. +6 −0 pyRdfa/host/html5.py
  8. +7 −1 pyRdfa/options.py
  9. +1 −1 pyRdfa/parse.py
  10. +1 −8 pyRdfa/rdfs/__init__.py
  11. +26 −24 pyRdfa/rdfs/cache.py
  12. +8 −11 pyRdfa/rdfs/process.py
  13. +18 −15 pyRdfa/state.py
  14. +10 −5 pyRdfa/termorcurie.py
  15. +26 −18 pyRdfa/utils.py
  16. +10 −2 setup.py
View
2 PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: pyRdfa
-Version: 3.4.2
+Version: 3.4.3
Summary: RDFa 1.1 distiller
Author: Ivan Herman
Author-email: ivan@w3.org
View
2 README.txt
@@ -25,7 +25,7 @@ The package primarily depends on:
- simplejson<http://undefined.org/python/#simplejson> (in the additional packages folder), needed if the JSON serialization is used and if the underlying python version is 2.5 or lower
- isodate<http://hg.proclos.com/isodate> (in the additional packages folder) which, in some cases, is missing and RDFLib complains (?)
-The package has been tested on Python version 2.5 and higher. Python 2.6 or higher is strongly recommended. The package does not run with Python 3.
+The package has been tested on Python version 2.5 and higher. Python 2.6 or higher is strongly recommended. The package has been adapted to Python 3, though not yet thoroughly tested, because the html5lib does not have yet a Python 3 version.
For the details on RDFa 1.1, see:
View
108 pyRdfa/__init__.py
@@ -158,12 +158,19 @@
$Id: __init__.py,v 1.82 2012/08/21 10:28:50 ivan Exp $
"""
-__version__ = "3.4.2"
+__version__ = "3.4.3"
__author__ = 'Ivan Herman'
__contact__ = 'Ivan Herman, ivan@w3.org'
-__license__ = u'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
+__license__ = 'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
+
+import sys
+PY3 = (sys.version_info[0] >= 3)
+
+if PY3 :
+ from io import StringIO
+else :
+ from StringIO import StringIO
-import sys, StringIO
import os
import rdflib
@@ -183,7 +190,11 @@
from pyRdfa.extras.httpheader import acceptable_content_type, content_type
import xml.dom.minidom
-import urlparse
+
+if PY3 :
+ from urllib.parse import urlparse
+else :
+ from urlparse import urlparse
# Namespace, in the RDFLib sense, for the rdfa vocabulary
ns_rdfa = Namespace("http://www.w3.org/ns/rdfa#")
@@ -192,7 +203,7 @@
RDFA_VOCAB = ns_rdfa["usesVocabulary"]
# Namespace, in the RDFLib sense, for the XSD Datatypes
-ns_xsd = Namespace(u'http://www.w3.org/2001/XMLSchema#')
+ns_xsd = Namespace('http://www.w3.org/2001/XMLSchema#')
# Namespace, in the RDFLib sense, for the distiller vocabulary, used as part of the processor graph
ns_distill = Namespace("http://www.w3.org/2007/08/pyRdfa/vocab#")
@@ -396,10 +407,17 @@ def _get_input(self, name) :
@return: a file like object if opening "name" is possible and successful, "name" otherwise
"""
try :
- if isinstance(name, basestring) :
+ # Python 2 branch
+ isstring = isinstance(name, basestring)
+ except :
+ # Python 3 branch
+ isstring = isinstance(name, str)
+
+ try :
+ if isstring :
# check if this is a URI, ie, if there is a valid 'scheme' part
# otherwise it is considered to be a simple file
- if urlparse.urlparse(name)[0] != "" :
+ if urlparse(name)[0] != "" :
url_request = URIOpener(name)
self.base = url_request.location
if self.media_type == "" :
@@ -429,8 +447,8 @@ def _get_input(self, name) :
return file(name)
else :
return name
- except HTTPError, h :
- raise h
+ except HTTPError :
+ raise sys.exc_info()[1]
except :
(type, value, traceback) = sys.exc_info()
raise FailedSource(value)
@@ -528,25 +546,36 @@ def copyErrors(tog, options) :
tog.bind(k,ns)
options.reset_processor_graph()
return tog
+
+ # Separating this for a forward Python 3 compatibility
+ try :
+ # Python 2 branch
+ isstring = isinstance(name, basestring)
+ except :
+ # Python 3 branch
+ isstring = isinstance(name, str)
try :
# First, open the source... Possible HTTP errors are returned as error triples
input = None
try :
input = self._get_input(name)
- except FailedSource, f :
+ except FailedSource :
+ f = sys.exc_info()[1]
self.http_status = 400
if not rdfOutput : raise f
err = self.options.add_error(f.msg, FileReferenceError, name)
self.options.processor_graph.add_http_context(err, 400)
return copyErrors(graph, self.options)
- except HTTPError, h:
+ except HTTPError :
+ h = sys.exc_info()[1]
self.http_status = h.http_code
if not rdfOutput : raise h
err = self.options.add_error("HTTP Error: %s (%s)" % (h.http_code,h.msg), HTError, name)
self.options.processor_graph.add_http_context(err, h.http_code)
return copyErrors(graph, self.options)
- except Exception, e :
+ except Exception :
+ e = sys.exc_info()[1]
self.http_status = 500
# Something nasty happened:-(
if not rdfOutput : raise e
@@ -574,7 +603,7 @@ def copyErrors(tog, options) :
dom = parser.parse(input)
try :
- if isinstance(name, basestring) :
+ if isstring :
input.close()
input = self._get_input(name)
else :
@@ -596,7 +625,8 @@ def copyErrors(tog, options) :
except ImportError :
msg = "HTML5 parser not available. Try installing html5lib <http://code.google.com/p/html5lib>"
raise ImportError(msg)
- except Exception, e :
+ except Exception :
+ e = sys.exc_info()[1]
# These are various parsing exception. Per spec, this is a case when
# error triples MUST be returned, ie, the usage of rdfOutput (which switches between an HTML formatted
# return page or a graph with error triples) does not apply
@@ -607,13 +637,16 @@ def copyErrors(tog, options) :
# If we got here, we have a DOM tree to operate on...
return self.graph_from_DOM(dom, graph, pgraph)
- except Exception, e :
+ except Exception :
# Something nasty happened during the generation of the graph...
(a,b,c) = sys.exc_info()
sys.excepthook(a,b,c)
- self.http_status = 500
- if not rdfOutput : raise e
- err = self.options.add_error(str(e), context = name)
+ if isinstance(b, ImportError) :
+ self.http_status = None
+ else :
+ self.http_status = 500
+ if not rdfOutput : raise b
+ err = self.options.add_error(str(b), context = name)
self.options.processor_graph.add_http_context(err, 500)
return copyErrors(graph, self.options)
@@ -637,7 +670,7 @@ def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) :
except :
graph = Graph()
- graph.bind("xsd", Namespace(u'http://www.w3.org/2001/XMLSchema#'))
+ graph.bind("xsd", Namespace('http://www.w3.org/2001/XMLSchema#'))
# the value of rdfOutput determines the reaction on exceptions...
for name in names :
self.graph_from_source(name, graph, rdfOutput)
@@ -683,10 +716,10 @@ def processURI(uri, outputFormat, form={}) :
"""
def _get_option(param, compare_value, default) :
param_old = param.replace('_','-')
- if param in form.keys() :
+ if param in list(form.keys()) :
val = form.getfirst(param).lower()
return val == compare_value
- elif param_old in form.keys() :
+ elif param_old in list(form.keys()) :
# this is to ensure the old style parameters are still valid...
# in the old days I used '-' in the parameters, the standard favours '_'
val = form.getfirst(param_old).lower()
@@ -698,14 +731,13 @@ def _get_option(param, compare_value, default) :
input = form["uploaded"].file
base = ""
elif uri == "text:" :
- import StringIO
- input = StringIO.StringIO(form.getfirst("text"))
+ input = StringIO(form.getfirst("text"))
base = ""
else :
input = uri
base = uri
- if "rdfa_version" in form.keys() :
+ if "rdfa_version" in list(form.keys()) :
rdfa_version = form.getfirst("rdfa_version")
else :
rdfa_version = None
@@ -714,7 +746,7 @@ def _get_option(param, compare_value, default) :
# Host language: HTML, XHTML, or XML
# Note that these options should be used for the upload and inline version only in case of a form
# for real uris the returned content type should be used
- if "host_language" in form.keys() :
+ if "host_language" in list(form.keys()) :
if form.getfirst("host_language").lower() == "xhtml" :
media_type = MediaTypes.xhtml
elif form.getfirst("host_language").lower() == "html" :
@@ -730,26 +762,26 @@ def _get_option(param, compare_value, default) :
transformers = []
- if "rdfa_lite" in form.keys() and form.getfirst("rdfa_lite").lower() == "true" :
+ if "rdfa_lite" in list(form.keys()) and form.getfirst("rdfa_lite").lower() == "true" :
from pyRdfa.transform.lite import lite_prune
transformers.append(lite_prune)
# The code below is left for backward compatibility only. In fact, these options are not exposed any more,
# they are not really in use
- if "extras" in form.keys() and form.getfirst("extras").lower() == "true" :
+ if "extras" in list(form.keys()) and form.getfirst("extras").lower() == "true" :
from pyRdfa.transform.metaname import meta_transform
from pyRdfa.transform.OpenID import OpenID_transform
from pyRdfa.transform.DublinCore import DC_transform
for t in [OpenID_transform, DC_transform, meta_transform] :
transformers.append(t)
else :
- if "extra-meta" in form.keys() and form.getfirst("extra-meta").lower() == "true" :
+ if "extra-meta" in list(form.keys()) and form.getfirst("extra-meta").lower() == "true" :
from pyRdfa.transform.metaname import meta_transform
transformers.append(meta_transform)
- if "extra-openid" in form.keys() and form.getfirst("extra-openid").lower() == "true" :
+ if "extra-openid" in list(form.keys()) and form.getfirst("extra-openid").lower() == "true" :
from pyRdfa.transform.OpenID import OpenID_transform
transformers.append(OpenID_transform)
- if "extra-dc" in form.keys() and form.getfirst("extra-dc").lower() == "true" :
+ if "extra-dc" in list(form.keys()) and form.getfirst("extra-dc").lower() == "true" :
from pyRdfa.transform.DublinCore import DC_transform
transformers.append(DC_transform)
@@ -761,9 +793,9 @@ def _get_option(param, compare_value, default) :
# On the other hand, the RDFa 1.1 doc clearly refers to 'rdfagraph' as the standard
# key.
a = None
- if "graph" in form.keys() :
+ if "graph" in list(form.keys()) :
a = form.getfirst("graph").lower()
- elif "rdfagraph" in form.keys() :
+ elif "rdfagraph" in list(form.keys()) :
a = form.getfirst("rdfagraph").lower()
if a != None :
if a == "processor" :
@@ -814,7 +846,7 @@ def _get_option(param, compare_value, default) :
# This is really for testing purposes only, it is an unpublished flag to force RDF output no
# matter what
try :
- graph = processor.rdf_from_source(input, outputFormat, rdfOutput = ("forceRDFOutput" in form.keys()) or not htmlOutput)
+ graph = processor.rdf_from_source(input, outputFormat, rdfOutput = ("forceRDFOutput" in list(form.keys())) or not htmlOutput)
if outputFormat == "n3" :
retval = 'Content-Type: text/rdf+n3; charset=utf-8\n'
elif outputFormat == "nt" or outputFormat == "turtle" :
@@ -826,7 +858,8 @@ def _get_option(param, compare_value, default) :
retval += '\n'
retval += graph
return retval
- except HTTPError, h :
+ except HTTPError :
+ (type,h,traceback) = sys.exc_info()
import cgi
retval = 'Content-type: text/html; charset=utf-8\nStatus: %s \n\n' % h.http_code
@@ -846,7 +879,6 @@ def _get_option(param, compare_value, default) :
(type,value,traceback) = sys.exc_info()
import traceback, cgi
- import StringIO
retval = 'Content-type: text/html; charset=utf-8\nStatus: %s\n\n' % processor.http_status
retval += "<html>\n"
@@ -855,7 +887,7 @@ def _get_option(param, compare_value, default) :
retval += "</head><body>\n"
retval += "<h1>Exception in distilling RDFa</h1>\n"
retval += "<pre>\n"
- strio = StringIO.StringIO()
+ strio = StringIO()
traceback.print_exc(file=strio)
retval += strio.getvalue()
retval +="</pre>\n"
@@ -868,9 +900,9 @@ def _get_option(param, compare_value, default) :
retval +="<dt>Uploaded file</dt>\n"
else :
retval +="<dt>URI received:</dt><dd><code>'%s'</code></dd>\n" % cgi.escape(uri)
- if "host_language" in form.keys() :
+ if "host_language" in list(form.keys()) :
retval +="<dt>Media Type:</dt><dd>%s</dd>\n" % media_type
- if "graph" in form.keys() :
+ if "graph" in list(form.keys()) :
retval +="<dt>Requested graphs:</dt><dd>%s</dd>\n" % form.getfirst("graph").lower()
else :
retval +="<dt>Requested graphs:</dt><dd>default</dd>\n"
View
7 pyRdfa/embeddedRDF.py
@@ -10,7 +10,12 @@
@version: $Id: embeddedRDF.py,v 1.14 2012/05/18 15:31:13 ivan Exp $
"""
-from StringIO import StringIO
+# Python 3 foolproof way...
+try :
+ from io import StringIO
+except :
+ from StringIO import StringIO
+
from pyRdfa.host import HostLanguage, accept_embedded_rdf_xml, accept_embedded_turtle
from pyRdfa.utils import return_XML
import re, sys
View
37 pyRdfa/extras/httpheader.py
@@ -75,11 +75,14 @@
Note: I have made a small modification on the regexp for internet date,
to make it more liberal (ie, accept a time zone string of the form +0000)
Ivan Herman <http://www.ivan-herman.net>, March 2011.
+
+ Have added statements to make it (hopefully) Python 3 compatible.
+ Ivan Herman <http://www.ivan-herman.net>, August 2012.
"""
__author__ = "Deron Meranda <http://deron.meranda.us/>"
-__date__ = "2011-03-08"
-__version__ = "1.01"
+__date__ = "2012-08-31"
+__version__ = "1.02"
__credits__ = """Copyright (c) 2005 Deron E. Meranda <http://deron.meranda.us/>
Licensed under GNU LGPL 2.1 or later. See <http://www.fsf.org/>.
@@ -105,6 +108,9 @@
DIGIT = '0123456789'
HEX = '0123456789ABCDEFabcdef'
+import sys
+PY3 = (sys.version_info[0] >= 3)
+
# Try to get a set/frozenset implementation if possible
try:
type(frozenset)
@@ -131,7 +137,10 @@
def _is_string( obj ):
"""Returns True if the object is a string or unicode type."""
- return isinstance(obj,str) or isinstance(obj,unicode)
+ if PY3 :
+ return isinstance(obj,str)
+ else :
+ return isinstance(obj,str) or isinstance(obj,unicode)
def http_datetime( dt=None ):
@@ -501,10 +510,9 @@ def _test_comments():
def _testrm( a, b, collapse ):
b2 = remove_comments( a, collapse )
if b != b2:
- print 'Comment test failed:'
- print ' remove_comments( %s, collapse_spaces=%s ) -> %s' \
- % (repr(a), repr(collapse), repr(b2))
- print ' expected %s' % repr(b)
+ print( 'Comment test failed:' )
+ print( ' remove_comments( %s, collapse_spaces=%s ) -> %s' % (repr(a), repr(collapse), repr(b2)) )
+ print( ' expected %s' % repr(b) )
return 1
return 0
failures = 0
@@ -1341,14 +1349,17 @@ def __str__(self):
"""String value."""
s = '%s/%s' % (self.major, self.minor)
if self.parmdict:
- extra = '; '.join([ '%s=%s' % (a[0],quote_string(a[1],False)) \
- for a in self.parmdict.items()])
+ extra = '; '.join([ '%s=%s' % (a[0],quote_string(a[1],False)) for a in self.parmdict.items()])
s += '; ' + extra
return s
def __unicode__(self):
"""Unicode string value."""
- return unicode(self.__str__())
+ # In Python 3 this is probably unnecessary in general, this is just to avoid possible syntax issues. I.H.
+ if PY3 :
+ return str(self.__str__())
+ else :
+ return unicode(self.__str__())
def __repr__(self):
"""Python representation of this object."""
@@ -1766,7 +1777,11 @@ def __str__(self):
def __unicode__(self):
"""The unicode string form of this language tag."""
- return unicode(self.__str__())
+ # Probably unnecessary in Python 3
+ if PY3 :
+ return str(self.__str__())
+ else :
+ return unicode(self.__str__())
def __repr__(self):
"""The python representation of this language tag."""
View
158 pyRdfa/graph.py
@@ -1,158 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Wrapper around RDFLib's Graph object. The issue is that, in RDFLib 2.X, the turtle and the RDF/XML serialization both have some issues (bugs and ugly output). As a result, the package’s own serializers should be registered and used. On the other hand, in RDFLib 3.X this becomes unnecessary, it is better to keep to the library’s own version. This wrapper provides a subclass of RDFLib’s Graph overriding the serialize method to register, if necessary, a different serializer and use that one.
-
-Also, some bindings (in the RDFLib sense) are done automatically, to ensure a nicer output for widely used schemas…
-
-@summary: Shell around RDLib's Graph
-@organization: U{World Wide Web Consortium<http://www.w3.org>}
-@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
-@license: This software is available for use under the
-U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
-
-@var _bindings: Default bindings. This is just for the beauty of things: bindings are added to the graph to make the output nicer. If this is not done, RDFlib defines prefixes like "_1:", "_2:" which is, though correct, ugly…
-"""
-
-"""
-$Id: graph.py,v 1.6 2012/03/23 14:06:25 ivan Exp $ $Date: 2012/03/23 14:06:25 $
-
-"""
-
-import rdflib
-if rdflib.__version__ >= "3.0.0" :
- from rdflib import Graph
-else :
- from rdflib.Graph import Graph
-from rdflib import Namespace
-
-_xml_serializer_name = "my-rdfxml"
-_turtle_serializer_name = "my-turtle"
-_json_serializer_name = "my-json-ld"
-
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
-
-# Default bindings. This is just for the beauty of things: bindings are added to the graph to make the output nicer. If this is not done, RDFlib defines prefixes like "_1:", "_2:" which is, though correct, ugly...
-_bindings = [
-]
-
-
-#########################################################################################################
-class MyGraph(Graph) :
- """
- Wrapper around RDFLib's Graph object. The issue is that the serializers in RDFLib are buggy:-(
-
- In RDFLib 2.X both the Turtle and the RDF/XML serializations have issues (bugs and ugly output). In RDFLib 3.X
- the Turtle serialization seems to be fine, but the RDF/XML has problems:-(
-
- This wrapper provides a subclass of RDFLib’s Graph overriding the serialize method to register,
- if necessary, a different serializer and use that one.
-
- @cvar xml_serializer_registered_2: flag to avoid duplicate registration for RDF/XML for rdflib 2.*
- @type xml_serializer_registered_2: boolean
- @cvar xml_serializer_registered_3: flag to avoid duplicate registration for RDF/XML for rdflib 3.*
- @type xml_serializer_registered_3: boolean
- @cvar json_serializer_registered: flag to avoid duplicate registration for JSON-LD for rdflib 3.*
- @type json_serializer_registered: boolean
- @cvar turtle_serializer_registered_2: flag to avoid duplicate registration for Turtle for rdflib 2.*
- @type turtle_serializer_registered_2: boolean
- """
- xml_serializer_registered_2 = False
- xml_serializer_registered_3 = False
- turtle_serializer_registered_2 = False
- json_serializer_registered = False
-
- def __init__(self) :
- Graph.__init__(self)
- for (prefix,uri) in _bindings :
- self.bind(prefix,Namespace(uri))
-
- def _register_XML_serializer_3(self) :
- """The default XML Serializer of RDFLib 3.X is buggy, mainly when handling lists. An L{own version<serializers.prettyXMLserializer_3>} is
- registered in RDFlib and used in the rest of the package.
- """
- if not MyGraph.xml_serializer_registered_3 :
- from rdflib.plugin import register
- from rdflib.serializer import Serializer
- if rdflib.__version__ > "3.1.0" :
- register(_xml_serializer_name, Serializer,
- "pyRdfa.serializers.prettyXMLserializer_3_2", "PrettyXMLSerializer")
- else :
- register(_xml_serializer_name, Serializer,
- "pyRdfa.serializers.prettyXMLserializer_3", "PrettyXMLSerializer")
- MyGraph.xml_serializer_registered_3 = True
-
- def _register_JSON_serializer_3(self) :
- """JSON LD serializer
- """
- if not MyGraph.json_serializer_registered :
- from rdflib.plugin import register
- from rdflib.serializer import Serializer
- register(_json_serializer_name, Serializer,
- "pyRdfa.serializers.jsonserializer", "JsonSerializer")
- MyGraph.json_serializer_registered = True
-
- def _register_XML_serializer_2(self) :
- """The default XML Serializer of RDFLib 2.X is buggy, mainly when handling lists.
- An L{own version<serializers.prettyXMLserializer>} is
- registered in RDFlib and used in the rest of the package. This is not used for RDFLib 3.X.
- """
- if not MyGraph.xml_serializer_registered_2 :
- from rdflib.plugin import register
- from rdflib.syntax import serializer, serializers
- register(_xml_serializer_name, serializers.Serializer,
- "pyRdfa.serializers.prettyXMLserializer", "PrettyXMLSerializer")
- MyGraph.xml_serializer_registered_2 = True
-
- def _register_Turtle_serializer_2(self) :
- """The default Turtle Serializers of RDFLib 2.X is buggy and not very nice as far as the output is concerned.
- An L{own version<serializers.TurtleSerializer>} is registered in RDFLib and used in the rest of the package.
- This is not used for RDFLib 3.X.
- """
- if not MyGraph.turtle_serializer_registered_2 :
- from rdflib.plugin import register
- from rdflib.syntax import serializer, serializers
- register(_turtle_serializer_name, serializers.Serializer,
- "pyRdfa.serializers.turtleserializer", "TurtleSerializer")
- MyGraph.turtle_serialzier_registered_2 = True
-
- def add(self, (s,p,o)) :
- """Overriding the Graph's add method to filter out triples with possible None values. It may happen
- in case, for example, a host language is not properly set up for the distiller"""
- if s == None or p == None or o == None :
- return
- else :
- Graph.add(self, (s,p,o))
-
- def serialize(self, format = "xml") :
- """Overriding the Graph's serialize method to adjust the output format"""
- if rdflib.__version__ >= "3.0.0" :
- # this is the easy case
- if format == "xml" or format == "pretty-xml" :
- self._register_XML_serializer_3()
- return Graph.serialize(self, format=_xml_serializer_name)
- elif format == "json-ld" or format == "json" :
- # The new version of the serialziers in RDFLib 3.2.X require this extra round...
- # I do not have the patience of working out why that is so.
- self._register_JSON_serializer_3()
- stream = StringIO()
- Graph.serialize(self, format=_json_serializer_name, destination = stream)
- return stream.getvalue()
- elif format == "nt" :
- return Graph.serialize(self, format="nt")
- elif format == "n3" or format == "turtle" :
- retval =""
- return Graph.serialize(self, format="turtle")
- else :
- if format == "xml" or format == "pretty-xml" :
- self._register_XML_serializer_2()
- return Graph.serialize(self, format=_xml_serializer_name)
- elif format == "nt" :
- return Graph.serialize(self, format="nt")
- elif format == "n3" or format == "turtle" :
- self._register_Turtle_serializer_2()
- return Graph.serialize(self, format=_turtle_serializer_name)
-
-
View
6 pyRdfa/host/html5.py
@@ -15,6 +15,12 @@
$Id: html5.py,v 1.10 2012/06/28 11:58:14 ivan Exp $
$Date: 2012/06/28 11:58:14 $
"""
+try :
+ from functools import reduce
+except :
+ # Not important. This import is necessary in Python 3 only and the newer versions of Python 2.X it is there
+ # for a forward compatibility with Python 3
+ pass
# The handling of datatime is a little bit more complex... better put this in a separate function for a better management
from datetime import datetime
View
8 pyRdfa/options.py
@@ -67,6 +67,12 @@ def add_triples(self, msg, top_class, info_class, context, node) :
@return: the bnode that serves as a subject for the errors. The caller may add additional information
@rtype: BNode
"""
+ # Python 3 foolproof way
+ try :
+ is_context_string = isinstance(context, basestring)
+ except :
+ is_context_string = isinstance(context, str)
+
bnode = BNode()
if node != None:
@@ -82,7 +88,7 @@ def add_triples(self, msg, top_class, info_class, context, node) :
self.graph.add((bnode, ns_rdf["type"], info_class))
self.graph.add((bnode, ns_dc["description"], Literal(full_msg)))
self.graph.add((bnode, ns_dc["date"], Literal(datetime.datetime.utcnow().isoformat(),datatype=ns_xsd["dateTime"])))
- if context and (isinstance(context,URIRef) or isinstance(context, basestring)):
+ if context and (isinstance(context,URIRef) or is_context_string):
htbnode = BNode()
self.graph.add( (bnode, ns_rdfa["context"],htbnode) )
self.graph.add( (htbnode, ns_rdf["type"], ns_ht["Request"]) )
View
2 pyRdfa/parse.py
@@ -300,7 +300,7 @@ def header_check(p_obj) :
graph.add( (state.get_list_origin(), prop, ns_rdf["nil"]) )
else :
heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ]
- for i in xrange(0, len(vals)) :
+ for i in range(0, len(vals)) :
graph.add( (heads[i], ns_rdf["first"], vals[i]) )
graph.add( (heads[i], ns_rdf["rest"], heads[i+1]) )
# Anchor the list
View
9 pyRdfa/rdfs/__init__.py
@@ -15,12 +15,7 @@
"""
-__version__ = "3.0.2"
-__author__ = 'Ivan Herman'
-__contact__ = 'Ivan Herman, ivan@w3.org'
-__license__ = u'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
-
-import sys, StringIO
+import sys
import os
import rdflib
@@ -37,8 +32,6 @@
from rdflib.RDF import RDFNS as ns_rdf
from rdflib.Graph import Graph
-import xml.dom.minidom
-import urlparse
from pyRdfa import RDFaError, pyRdfaError
from pyRdfa import ns_rdfa, ns_xsd, ns_distill
View
50 pyRdfa/rdfs/cache.py
@@ -11,6 +11,8 @@
"""
import os, sys, datetime, re
+PY3 = (sys.version_info[0] >= 3)
+
import rdflib
from rdflib import URIRef
from rdflib import Literal
@@ -25,8 +27,6 @@
from rdflib.RDF import RDFNS as ns_rdf
from rdflib.Graph import Graph
-import urllib, urlparse, urllib2
-
from pyRdfa import HTTPError, RDFaError
from pyRdfa.host import MediaTypes, HostLanguage
from pyRdfa.utils import create_file_name, URIOpener, quote_URI
@@ -49,7 +49,11 @@
from pyRdfa.utils import URIOpener
#===========================================================================================
-import cPickle as pickle
+if PY3 :
+ import pickle
+else :
+ import cPickle as pickle
+
# Protocol to be used for pickle files. 0 is good for debug, it stores the data in ASCII; 1 is better for deployment,
# it stores data in binary format. Care should be taken for consistency; when changing from 0 to 1 or back, all
# cached data should be removed/regenerated, otherwise mess may occur...
@@ -137,7 +141,7 @@ def __init__(self, options = None) :
if not os.path.isdir(self.app_data_dir) :
try :
os.mkdir(self.app_data_dir)
- except Exception, e:
+ except Exception :
(type,value,traceback) = sys.exc_info()
if self.report: options.add_info("Could not create the vocab cache area %s" % value, VocabCachingInfo)
return
@@ -162,8 +166,9 @@ def __init__(self, options = None) :
# This is then put into a pickle file to put the stake in the ground...
try :
_dump(self.indeces, self.index_fname)
- except Exception, e:
- if self.report: options.add_info("Could not create the vocabulary index %s" % e.msg, VocabCachingInfo)
+ except Exception :
+ (type,value,traceback) = sys.exc_info()
+ if self.report: options.add_info("Could not create the vocabulary index %s" % value, VocabCachingInfo)
else :
if self.report: options.add_info("Vocabulary cache directory is not writeable", VocabCachingInfo)
self.cache_writeable = False
@@ -179,7 +184,7 @@ def add_ref(self, uri, vocab_reference) :
self.indeces[uri] = vocab_reference
try :
_dump(self.indeces, self.index_fname)
- except Exception, e:
+ except Exception :
(type,value,traceback) = sys.exc_info()
if self.report: self.options.add_info("Could not store the cache index %s" % value, VocabCachingInfo)
@@ -249,7 +254,7 @@ def __init__(self, URI, options = None) :
CachedVocabIndex.__init__(self, options)
vocab_reference = self.get_ref(URI)
self.caching = True
- except Exception, e :
+ except Exception :
# what this means is that the caching becomes impossible through some system error...
(type,value,traceback) = sys.exc_info()
if self.report: options.add_info("Could not access the vocabulary cache area %s" % value, VocabCachingInfo, URI)
@@ -276,7 +281,7 @@ def __init__(self, URI, options = None) :
fname = os.path.join(self.app_data_dir, self.filename)
try :
self.graph = _load(fname)
- except Exception, e :
+ except Exception :
# what this means is that the caching becomes impossible VocabCachingInfo
(type,value,traceback) = sys.exc_info()
sys.excepthook(type,value,traceback)
@@ -297,7 +302,7 @@ def __init__(self, URI, options = None) :
try :
self.graph = _load(fname)
self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(hours=1)
- except Exception, e :
+ except Exception :
# what this means is that the caching becomes impossible VocabCachingInfo
(type,value,traceback) = sys.exc_info()
sys.excepthook(type,value,traceback)
@@ -322,7 +327,7 @@ def _store_caches(self) :
fname = os.path.join(self.app_data_dir, self.filename)
try :
_dump(self.graph, fname)
- except Exception, e :
+ except Exception :
(type,value,traceback) = sys.exc_info()
if self.report : self.options.add_info("Could not write cache file %s (%s)", (fname,value), VocabCachingInfo, self.uri)
# Update the index
@@ -340,11 +345,11 @@ def __init__(self) :
self.vocab_cache_report = True
def pr(self, wae, txt, warning_type, context) :
- print "===="
- if warning_type != None : print warning_type
- print wae + ": " + txt
- if context != None: print context
- print "===="
+ print( "====" )
+ if warning_type != None : print( warning_type )
+ print( wae + ": " + txt )
+ if context != None: print( context )
+ print( "====" )
def add_warning(self, txt, warning_type=None, context=None) :
"""Add a warning to the processor graph.
@@ -378,16 +383,13 @@ def add_error(self, txt, err_type=None, context=None) :
for uri in args :
# This should write the cache
- print ">>>>> Writing Cache <<<<<"
+ print( ">>>>> Writing Cache <<<<<" )
writ = CachedVocab(uri,options = LocalOption(),report = True)
# Now read it back and print the content for tracing
- print ">>>>> Reading Cache <<<<<"
+ print( ">>>>> Reading Cache <<<<<" )
rd = CachedVocab(uri,options = LocalOption(),report = True)
- print "URI: " + uri
- print "default vocab: " + rd.vocabulary
- print "terms: ",
- print rd.terms
- print "prefixes: ",
- print rd.ns
+ print( "URI: " + uri )
+ print( "default vocab: " + rd.vocabulary )
+ print( "terms: %s prefixes: %s" % (rd.terms,rd.ns) )
View
19 pyRdfa/rdfs/process.py
@@ -12,12 +12,7 @@
"""
-__version__ = "3.0.2"
-__author__ = 'Ivan Herman'
-__contact__ = 'Ivan Herman, ivan@w3.org'
-__license__ = u'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231'
-
-import sys, StringIO
+import sys
import os
import rdflib
@@ -82,13 +77,15 @@ def return_to_cache(msg) :
try :
content = URIOpener(uri,
{'Accept' : 'text/html;q=0.8, application/xhtml+xml;q=0.8, text/turtle;q=1.0, application/rdf+xml;q=0.9'})
- except HTTPError, e :
- return_to_cache(e.msg)
+ except HTTPError :
+ (type,value,traceback) = sys.exc_info()
+ return_to_cache(value)
return (None,None)
- except RDFaError, e :
- return_to_cache(e.msg)
+ except RDFaError :
+ (type,value,traceback) = sys.exc_info()
+ return_to_cache(value)
return (None,None)
- except Exception, e :
+ except Exception :
(type,value,traceback) = sys.exc_info()
return_to_cache(value)
return (None,None)
View
33 pyRdfa/state.py
@@ -52,8 +52,11 @@
import re
import random
-import urlparse
-import urllib
+if py_v_major >= 3 :
+ from urllib.parse import urlparse, urlunparse, urlsplit, urljoin
+else :
+ from urlparse import urlparse, urlunparse, urlsplit, urljoin
+
from types import *
class ListStructure :
@@ -115,8 +118,8 @@ def remove_frag_id(uri) :
"""
try :
# To be on the safe side:-)
- t = urlparse.urlparse(uri)
- return urlparse.urlunparse((t[0],t[1],t[2],t[3],t[4],""))
+ t = urlparse(uri)
+ return urlunparse((t[0],t[1],t[2],t[3],t[4],""))
except :
return uri
@@ -170,9 +173,9 @@ def remove_frag_id(uri) :
# This value can be overwritten by a @version attribute
if node.hasAttribute("version") :
top_version = node.getAttribute("version")
- if top_version.find("RDFa 1.0") != -1 :
+ if top_version.find("RDFa 1.0") != -1 or top_version.find("RDFa1.0") != -1 :
self.rdfa_version = "1.0"
- elif top_version.find("RDFa 1.1") != -1 :
+ elif top_version.find("RDFa 1.1") != -1 or top_version.find("RDFa1.1") != -1 :
self.rdfa_version = "1.1"
# this is just to play safe. I believe this should actually not happen...
@@ -208,7 +211,7 @@ def remove_frag_id(uri) :
#-----------------------------------------------------------------
# this will be used repeatedly, better store it once and for all...
- self.parsedBase = urlparse.urlsplit(self.base)
+ self.parsedBase = urlsplit(self.base)
#-----------------------------------------------------------------
# generate and store the local CURIE handling class instance
@@ -284,7 +287,7 @@ def create_URIRef(uri, check = True) :
"""
from pyRdfa import uri_schemes
val = uri.strip()
- if check and urlparse.urlsplit(val)[0] not in uri_schemes :
+ if check and urlsplit(val)[0] not in uri_schemes :
self.options.add_warning(err_URI_scheme % val.strip(), node=self.node.nodeName)
return URIRef(val)
@@ -300,11 +303,11 @@ def join(base, v, check = True) :
@return: an RDFLib URIRef instance
"""
# UGLY!!! There is a bug for a corner case in python version <= 2.5.X
- if len(v) > 0 and v[0] == '?' and py_v_minor <= 5 :
+ if len(v) > 0 and v[0] == '?' and (py_v_major < 3 and py_v_minor <= 5) :
return create_URIRef(base+v, check)
####
- joined = urlparse.urljoin(base, v)
+ joined = urljoin(base, v)
try :
if v[-1] != joined[-1] and (v[-1] == "#" or v[-1] == "?") :
return create_URIRef(joined + v[-1], check)
@@ -325,7 +328,7 @@ def join(base, v, check = True) :
# the ':' _does_ appear in the URI but not in a scheme position is taken
# care of properly...
- key = urlparse.urlsplit(val)[0]
+ key = urlsplit(val)[0]
if key == "" :
# relative URI, to be combined with local file name:
return join(self.base, val, check = False)
@@ -374,7 +377,7 @@ def _CURIEorURI(self, val) :
return self._URI(val)
else :
# there is an unlikely case where the retval is actually a URIRef with a relative URI. Better filter that one out
- if isinstance(retval, BNode) == False and urlparse.urlsplit(str(retval))[0] == "" :
+ if isinstance(retval, BNode) == False and urlsplit(str(retval))[0] == "" :
# yep, there is something wrong, a new URIRef has to be created:
return URIRef(self.base+str(retval))
else :
@@ -399,7 +402,7 @@ def _TERMorCURIEorAbsURI(self, val) :
if val == "" :
return None
- from termorcurie import ncname, termname
+ from pyRdfa.termorcurie import ncname, termname
if termname.match(val) :
# This is a term, must be handled as such...
retval = self.term_or_curie.term_to_URI(val)
@@ -415,7 +418,7 @@ def _TERMorCURIEorAbsURI(self, val) :
return retval
elif self.rdfa_version >= "1.1" :
# See if it is an absolute URI
- scheme = urlparse.urlsplit(val)[0]
+ scheme = urlsplit(val)[0]
if scheme == "" :
# bug; there should be no relative URIs here
self.options.add_warning(err_non_legal_CURIE_ref % val, UnresolvablePrefix, node=self.node.nodeName)
@@ -503,7 +506,7 @@ def get_list_props(self) :
Return the list of property values in the list structure
@return: list of URIRef
"""
- return self.list_mapping.mapping.keys()
+ return list(self.list_mapping.mapping.keys())
def get_list_value(self,prop) :
"""
View
15 pyRdfa/termorcurie.py
@@ -26,7 +26,12 @@
import re, sys
import xml.dom.minidom
import random
-import urlparse, urllib2
+
+if sys.version_info[0] >= 3 :
+ from urllib.parse import urlsplit
+else :
+ from urlparse import urlsplit
+
import rdflib
from rdflib import URIRef
@@ -112,9 +117,9 @@ def __init__(self, state, top_level) :
if state.rdfa_version < "1.1" or top_level == False :
return
- from initialcontext import initial_context as context_data
- from host import initial_contexts as context_ids
- from host import default_vocabulary
+ from pyRdfa.initialcontext import initial_context as context_data
+ from pyRdfa.host import initial_contexts as context_ids
+ from pyRdfa.host import default_vocabulary
for id in context_ids[state.options.host_language] :
# This gives the id of a initial context, valid for this media type:
@@ -352,7 +357,7 @@ def char_check(s, not_allowed = ['#','[',']']) :
if s.find(c) != -1 : return False
return True
# Creating an artificial http URI to fool the urlparse module...
- scheme, netloc, url, query, fragment = urlparse.urlsplit('http:' + val)
+ scheme, netloc, url, query, fragment = urlsplit('http:' + val)
if netloc != "" and self.state.rdfa_version >= "1.1" :
self.state.options.add_warning(err_absolute_reference % (netloc, val), UnresolvableReference, node=self.state.node.nodeName)
return False
View
44 pyRdfa/utils.py
@@ -17,7 +17,20 @@
$Date: 2012/05/17 15:02:48 $
"""
import os, os.path, sys, imp, datetime
-import urllib, urlparse, urllib2
+
+# Python 3 vs. 2 switch
+if sys.version_info[0] >= 3 :
+ from urllib.request import Request, urlopen
+ from urllib.parse import urljoin, quote
+ from http.server import BaseHTTPRequestHandler
+ from urllib.error import HTTPError as urllib_HTTPError
+else :
+ from urllib2 import Request, urlopen
+ from urllib2 import HTTPError as urllib_HTTPError
+ from urlparse import urljoin
+ from urllib import quote
+ from BaseHTTPServer import BaseHTTPRequestHandler
+
from pyRdfa.extras.httpheader import content_type, parse_http_datetime
import rdflib
@@ -61,14 +74,14 @@ def __init__(self, name, additional_headers = {}) :
"""
try :
# Note the removal of the fragment ID. This is necessary, per the HTTP spec
- req = urllib2.Request(url=name.split('#')[0])
+ req = Request(url=name.split('#')[0])
for key in additional_headers :
req.add_header(key, additional_headers[key])
if 'Accept' not in additional_headers :
req.add_header('Accept', 'text/html, application/xhtml+xml')
- self.data = urllib2.urlopen(req)
+ self.data = urlopen(req)
self.headers = self.data.info()
if URIOpener.CONTENT_TYPE in self.headers :
@@ -92,7 +105,7 @@ def __init__(self, name, additional_headers = {}) :
break
if URIOpener.CONTENT_LOCATION in self.headers :
- self.location = urlparse.urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION])
+ self.location = urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION])
else :
self.location = name
@@ -114,12 +127,13 @@ def __init__(self, name, additional_headers = {}) :
# The last modified date format was wrong, sorry, forget it...
pass
- except urllib2.HTTPError, e :
+ except urllib_HTTPError :
+ e = sys.exc_info()[1]
from pyRdfa import HTTPError
- import BaseHTTPServer
- msg = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code]
+ msg = BaseHTTPRequestHandler.responses[e.code]
raise HTTPError('%s' % msg[1], e.code)
- except Exception, e :
+ except Exception :
+ e = sys.exc_info()[1]
from pyRdfa import RDFaError
raise RDFaError('%s' % e)
@@ -146,7 +160,7 @@ def quote_URI(uri, options = None) :
if options != None :
options.add_warning(err_unusual_char_in_URI % suri)
break
- return urllib.quote(suri, _unquotedChars)
+ return quote(suri, _unquotedChars)
#########################################################################################################
@@ -155,7 +169,7 @@ def create_file_name(uri) :
Create a suitable file name from an (absolute) URI. Used, eg, for the generation of a file name for a cached vocabulary file.
"""
suri = uri.strip()
- final_uri = urllib.quote(suri,_unquotedChars)
+ final_uri = quote(suri,_unquotedChars)
# Remove some potentially dangereous characters
return final_uri.replace(' ','_').replace('%','_').replace('-','_').replace('+','_').replace('/','_').replace('?','_').replace(':','_').replace('=','_').replace('#','_')
@@ -234,13 +248,7 @@ def dump(node) :
@param node: DOM node
"""
- print node.toprettyxml(indent="", newl="")
+ print( node.toprettyxml(indent="", newl="") )
-
-
-##################
-# Testing
-if __name__ == '__main__':
- u = URIOpener("http://www.ivan-herman.net/foaf.html")
- print u.charset
+
View
12 setup.py
@@ -1,10 +1,18 @@
from distutils.core import setup
setup(name="pyRdfa",
description="pyRdfa Libray",
- version="3.4.2",
+ version="3.4.3",
author="Ivan Herman",
author_email="ivan@w3.org",
maintainer="Ivan Herman",
maintainer_email="ivan@w3.org",
- packages=['pyRdfa','pyRdfa.transform','pyRdfa.extras','pyRdfa.rdfs','pyRdfa.host','pyRdfaExtras',pyRdfaExtras.extras,pyRdfaExtras.serializers])
+ packages=['pyRdfa',
+ 'pyRdfa.transform',
+ 'pyRdfa.extras',
+ 'pyRdfa.rdfs',
+ 'pyRdfa.host',
+ 'pyRdfaExtras',
+ 'pyRdfaExtras.extras',
+ 'pyRdfaExtras.serializers'
+ ])

0 comments on commit 0e8b057

Please sign in to comment.