/
property.py
289 lines (255 loc) · 11.3 KB
/
property.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# -*- coding: utf-8 -*-
"""
Implementation of the C{@property} value handling.
RDFa 1.0 and RDFa 1.1 are fairly different. RDFa 1.0 generates only literals, see
U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>} for the details.
On the other hand, RDFa 1.1, beyond literals, can also generate URI references. Hence the duplicate method in the L{ProcessProperty} class, one for RDFa 1.0 and the other for RDFa 1.1.
@summary: RDFa Literal generation
@requires: U{RDFLib package<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
"""
"""
$Id: property.py,v 1.9 2012-03-23 14:06:25 ivan Exp $
$Date: 2012-03-23 14:06:25 $
"""
import re
import rdflib
from rdflib import BNode
from rdflib import Literal, URIRef, Namespace
if rdflib.__version__ >= "3.0.0" :
from rdflib import RDF as ns_rdf
from rdflib.term import XSDToPython
else :
from rdflib.RDF import RDFNS as ns_rdf
from rdflib.Literal import XSDToPython
from pyRdfa import IncorrectBlankNodeUsage, IncorrectLiteral, err_no_blank_node, ns_xsd
from pyRdfa.utils import has_one_of_attributes
from pyRdfa.host.html5 import handled_time_types
XMLLiteral = ns_rdf["XMLLiteral"]
class ProcessProperty :
"""Generate the value for C{@property} taking into account datatype, etc.
Note: this class is created only if the C{@property} is indeed present, no need to check.
@ivar node: DOM element node
@ivar graph: the (RDF) graph to add the properies to
@ivar subject: the RDFLib URIRef serving as a subject for the generated triples
@ivar state: the current state to be used for the CURIE-s
@type state: L{state.ExecutionContext}
@ivar typed_resource: Typically the bnode generated by a @typeof
"""
def __init__(self, node, graph, subject, state, typed_resource = None) :
"""
@param node: DOM element node
@param graph: the (RDF) graph to add the properies to
@param subject: the RDFLib URIRef serving as a subject for the generated triples
@param state: the current state to be used for the CURIE-s
@param state: L{state.ExecutionContext}
@param typed_resource: Typically the bnode generated by a @typeof; in RDFa 1.1, that becomes the object for C{@property}
"""
self.node = node
self.graph = graph
self.subject = subject
self.state = state
self.typed_resource = typed_resource
def generate(self) :
"""
Common entry point for the RDFa 1.0 and RDFa 1.1 versions; bifurcates based on the RDFa version, as retrieved from the state object.
"""
if self.state.rdfa_version >= "1.1" :
self.generate_1_1()
else :
self.generate_1_0()
def generate_1_1(self) :
"""Generate the property object, 1.1 version"""
#########################################################################
# See if the target is _not_ a literal
irirefs = ("resource", "href", "src")
noiri = ("content", "datatype", "rel", "rev")
notypediri = ("content", "datatype", "rel", "rev", "about")
if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri) :
# @href/@resource/@src takes the lead here...
object = self.state.getResource(irirefs)
elif has_one_of_attributes(self.node, "typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None :
# a @typeof creates a special branch in case the typed resource was set during parsing
object = self.typed_resource
else :
# We have to generate a literal
# Get, if exists, the value of @datatype
datatype = ''
dtset = False
if self.node.hasAttribute("datatype") :
dtset = True
dt = self.node.getAttribute("datatype")
if dt != "" :
datatype = self.state.getURI("datatype")
# Supress lange is set in case some elements explicitly want to supress the effect of language
# There were discussions, for example, that the <time> element should do so. Although,
# after all, this was reversed, the functionality is kept in the code in case another
# element might need it...
if self.state.lang != None and self.state.supress_lang == False :
lang = self.state.lang
else :
lang = ''
# The simple case: separate @content attribute
if self.node.hasAttribute("content") :
val = self.node.getAttribute("content")
# Handling the automatic uri conversion case
if dtset == False :
object = Literal(val, lang=lang)
else :
object = self._create_Literal(val, datatype=datatype, lang=lang)
# The value of datatype has been set, and the keyword paramaters take care of the rest
else :
# see if there *is* a datatype (even if it is empty!)
if dtset :
if datatype == XMLLiteral :
object = Literal(self._get_XML_literal(self.node), datatype=XMLLiteral)
else :
object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
else :
object = self._create_Literal(self._get_literal(self.node), lang=lang)
if object != None :
for prop in self.state.getURI("property") :
if not isinstance(prop, BNode) :
if self.node.hasAttribute("inlist") :
self.state.add_to_list_mapping(prop, object)
else :
self.graph.add( (self.subject, prop, object) )
else :
self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
# return
def generate_1_0(self) :
"""Generate the property object, 1.0 version"""
#########################################################################
# We have to generate a literal indeed.
# Get, if exists, the value of @datatype
datatype = ''
dtset = False
if self.node.hasAttribute("datatype") :
dtset = True
dt = self.node.getAttribute("datatype")
if dt != "" :
datatype = self.state.getURI("datatype")
if self.state.lang != None :
lang = self.state.lang
else :
lang = ''
# The simple case: separate @content attribute
if self.node.hasAttribute("content") :
val = self.node.getAttribute("content")
# Handling the automatic uri conversion case
if dtset == False :
object = Literal(val, lang=lang)
else :
object = self._create_Literal(val, datatype=datatype, lang=lang)
# The value of datatype has been set, and the keyword paramaters take care of the rest
else :
# see if there *is* a datatype (even if it is empty!)
if dtset :
# yep. The Literal content is the pure text part of the current element:
# We have to check whether the specified datatype is, in fact, an
# explicit XML Literal
if datatype == XMLLiteral :
object = Literal(self._get_XML_literal(self.node),datatype=XMLLiteral)
else :
object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
else :
# no controlling @datatype. We have to see if there is markup in the contained
# element
if True in [ n.nodeType == self.node.ELEMENT_NODE for n in self.node.childNodes ] :
# yep, and XML Literal should be generated
object = self._create_Literal(self._get_XML_literal(self.node), datatype=XMLLiteral)
else :
# At this point, there might be entities in the string that are returned as real characters by the dom
# implementation. That should be turned back
object = self._create_Literal(self._get_literal(self.node), lang=lang)
for prop in self.state.getURI("property") :
if not isinstance(prop,BNode) :
self.graph.add( (self.subject,prop,object) )
else :
self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
# return
######################################################################################################################################
def _putBackEntities(self, str) :
"""Put 'back' entities for the '&','<', and '>' characters, to produce a proper XML string.
Used by the XML Literal extraction.
@param str: string to be converted
@return: string with entities
@rtype: string
"""
return str.replace('&','&').replace('<','<').replace('>','>')
def _get_literal(self, Pnode):
"""
Get (recursively) the full text from a DOM Node.
@param Pnode: DOM Node
@return: string
"""
rc = ""
for node in Pnode.childNodes:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
elif node.nodeType == node.ELEMENT_NODE :
rc = rc + self._get_literal(node)
# The decision of the group in February 2008 is not to normalize the result by default.
# This is reflected in the default value of the option
if self.state.options.space_preserve :
return rc
else :
return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()
# end getLiteral
def _get_XML_literal(self, Pnode) :
"""
Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
via a C{node.toxml} call of the xml minidom implementation.)
@param Pnode: DOM Node
@return: string
"""
rc = ""
for node in Pnode.childNodes:
if node.nodeType == node.TEXT_NODE:
rc = rc + self._putBackEntities(node.data)
elif node.nodeType == node.ELEMENT_NODE :
# Decorate the element with namespaces and lang values
#for prefix in prefixes :
# if prefix in state.term_or_curie.xmlns and not node.hasAttribute("xmlns:%s" % prefix) :
# node.setAttribute("xmlns:%s" % prefix,"%s" % state.term_or_curie.xmlns[prefix])
for prefix in self.state.term_or_curie.xmlns :
if not node.hasAttribute("xmlns:%s" % prefix) :
node.setAttribute("xmlns:%s" % prefix,"%s" % self.state.term_or_curie.xmlns[prefix])
# Set the default namespace, if not done (and is available)
if not node.getAttribute("xmlns") and self.state.defaultNS != None :
node.setAttribute("xmlns", self.state.defaultNS)
# Get the lang, if necessary
if not node.getAttribute("xml:lang") and self.state.lang != None :
node.setAttribute("xml:lang", self.state.lang)
rc = rc + node.toxml()
return rc
# end getXMLLiteral
def _create_Literal(self, val, datatype = '', lang = '') :
"""
Create a literal, taking into account the datatype and language.
@return: Literal
"""
if datatype == None or datatype == '' :
return Literal(val, lang=lang)
#elif datatype == ns_xsd["string"] :
# return Literal(val)
else :
# This is a bit convoluted... the default setup of rdflib does not gracefully react if the
# datatype cannot properly be converted to Python. I have to copy and reuse some of the
# rdflib code to get this working...
# To make things worse: rdlib 3.1.0 does not handle the various xsd date types properly, ie,
# the conversion function below will generate errors. Ie, the check should be skipped for those
if ("%s" % datatype) in handled_time_types and rdflib.__version__ < "3.2.0" :
convFunc = False
else :
convFunc = XSDToPython.get(datatype, None)
if convFunc :
try :
pv = convFunc(val)
# If we got there the literal value and its datatype match
except :
self.state.options.add_warning("Incompatible value (%s) and datatype (%s) in Literal definition." % (val, datatype), warning_type=IncorrectLiteral, node=self.node.nodeName)
return Literal(val, datatype=datatype)