Skip to content

Commit

Permalink
Merge pull request #128 from Capitains/structured-metadata
Browse files Browse the repository at this point in the history
2.0.0 rc1
  • Loading branch information
PonteIneptique committed Apr 26, 2017
2 parents 0760d58 + f99b810 commit 14b4b9c
Show file tree
Hide file tree
Showing 15 changed files with 528 additions and 104 deletions.
2 changes: 1 addition & 1 deletion MyCapytain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
"""

__version__ = "2.0.0b20"
__version__ = "2.0.0rc1"
15 changes: 14 additions & 1 deletion MyCapytain/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
XPATH_NAMESPACES = {
"tei": "http://www.tei-c.org/ns/1.0",
"ti": "http://chs.harvard.edu/xmlns/cts",
"xml": "http://www.w3.org/XML/1998/namespace"
"cpt": "http://purl.org/capitains/ns/1.0#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"xml": "http://www.w3.org/XML/1998/namespace",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
}


Expand All @@ -19,10 +22,13 @@ class RDF_NAMESPACES:
:type TEI: Namespace
:cvar DC: DC Elements
:type DC: Namespace
:cvar CAPITAINS: CapiTainS Ontology
:type CAPITAINS: Namespace
"""
CTS = Namespace("http://chs.harvard.edu/xmlns/cts/")
DTS = Namespace("http://w3id.org/dts-ontology/")
TEI = Namespace("http://www.tei-c.org/ns/1.0/")
CAPITAINS = Namespace("http://purl.org/capitains/ns/1.0#")


class Mimetypes:
Expand Down Expand Up @@ -68,6 +74,12 @@ class XML:
CTS = "text/xml:CTS"
TEI = "text/xml:tei"

class CapiTainS:
""" CapiTainS Guideline XML Structured metadata
"""
CTS = "text/xml:CTS_CapiTainS"

class PYTHON:
""" Python Native Objects
Expand All @@ -93,6 +105,7 @@ class MyCapytain:
__MYCAPYTAIN_TRIPLE_GRAPH__.bind("dts", RDF_NAMESPACES.DTS)
__MYCAPYTAIN_TRIPLE_GRAPH__.bind("tei", RDF_NAMESPACES.TEI)
__MYCAPYTAIN_TRIPLE_GRAPH__.bind("skos", SKOS)
__MYCAPYTAIN_TRIPLE_GRAPH__.bind("cpt", RDF_NAMESPACES.CAPITAINS)


def set_graph(graph):
Expand Down
112 changes: 94 additions & 18 deletions MyCapytain/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"""
from __future__ import unicode_literals
from MyCapytain.common.utils import make_xml_node
from MyCapytain.common.constants import Mimetypes, get_graph
from MyCapytain.common.base import Exportable
from rdflib import BNode, Literal, Graph
from rdflib import BNode, Literal, Graph, URIRef, term


class Metadata(Exportable):
Expand All @@ -23,7 +24,7 @@ class Metadata(Exportable):
:cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
:cvar STORE: RDF Store
"""
EXPORT_TO = [Mimetypes.JSON.Std, Mimetypes.XML.RDF, Mimetypes.XML.RDFa, Mimetypes.JSON.LD]
EXPORT_TO = [Mimetypes.JSON.Std, Mimetypes.XML.RDF, Mimetypes.XML.RDFa, Mimetypes.JSON.LD, Mimetypes.XML.CapiTainS.CTS]
DEFAULT_EXPORT = Mimetypes.JSON.Std

def __init__(self, node=None, *args, **kwargs):
Expand Down Expand Up @@ -52,12 +53,33 @@ def add(self, key, value, lang=None):
:param value: Object of the triple
:param lang: Language of the triple if applicable
"""
if not isinstance(value, Literal):
if not isinstance(value, Literal) and lang is not None:
value = Literal(value, lang=lang)
elif not isinstance(value, (BNode, URIRef)):
value, _type = term._castPythonToLiteral(value)
if _type is None:
value = Literal(value)
else:
value = Literal(value, datatype=_type)
self.graph.add((self.asNode(), key, value))

def get(self, key, lang=None):
""" Returns a triple related to this node
""" Returns triple related to this node. Can filter on lang
:param key: Predicate of the triple
:param lang: Language of the triple if applicable
:rtype: Literal or BNode or URIRef
"""
if lang is not None:
for o in self.graph.objects(self.asNode(), key):
if o.language == lang:
yield o
else:
for o in self.graph.objects(self.asNode(), key):
yield o

def get_single(self, key, lang=None):
""" Returns a single triple related to this node.
:param key: Predicate of the triple
:param lang: Language of the triple if applicable
Expand All @@ -74,15 +96,6 @@ def get(self, key, lang=None):
for o in self.graph.objects(self.asNode(), key):
return o

def get_all(self, key):
""" Returns a triple related to this node
:param key: Predicate of the triple
:rtype: List of [Literal or BNode or URIRef]
"""
for o in self.graph.objects(self.asNode(), key):
yield o

def __getitem__(self, item):
""" Quick access method. If
Expand All @@ -94,19 +107,70 @@ def __getitem__(self, item):
Metadata[Title] == [Metadata.get(Title, lang=lang1), Metadata.get(Title, lang=lang2)]
"""
if isinstance(item, tuple):
return self.get(item[0], item[1])
return list(self.graph[self.asNode():item])
return self.get_single(item[0], item[1])
return list(self.get(item))

def __export__(self, output=Mimetypes.JSON.Std, **kwargs):
def remove(self, predicate=None, obj=None):
""" Remove triple matching the predicate or the object
:param predicate: Predicate to match, None to match all
:param obj: Object to match, None to match all
"""
self.graph.remove((self.asNode(), predicate, obj))

def unlink(self, subj=None, predicate=None):
""" Remove triple where Metadata is the object
:param subj: Subject to match, None to match all
:param predicate: Predicate to match, None to match all
"""
self.graph.remove((subj, predicate, self.asNode()))

def predicate_object(self, predicate=None, obj=None):
""" Retrieve predicate and object around this object
:param predicate: Predicate to match, None to match all
:param obj: Object to match, None to match all
:return: List of resources
"""
def __export__(self, output=Mimetypes.JSON.Std, only=None, exclude=None, **kwargs):
""" Export a set of Metadata
:param output: Mimetype to export to
:param only: Includes only term from given namespaces
:param exclude: Includes only term from given namespaces
:return: Formatted Export
.. warning:: exclude and warning cannot be used together
"""
graph = Graph()
graph.namespace_manager = get_graph().namespace_manager
for predicate, object in self.graph[self.asNode()]:
graph.add((self.asNode(), predicate, object))

if only is not None:
_only = only
only = [str(s) for s in only]
for predicate in set(self.graph.predicates(subject=self.asNode())):
if str(predicate) not in only:
prefix, namespace, name = self.graph.compute_qname(predicate)
if str(namespace) in only:
_only.append(predicate)
for predicate, obj in self.graph[self.asNode()]:
if predicate in _only:
graph.add((self.asNode(), predicate, obj))
elif exclude is not None:
_only = []
exclude = [str(s) for s in exclude]
for predicate in set(self.graph.predicates(subject=self.asNode())):
prefix, namespace, name = self.graph.compute_qname(predicate)
if str(predicate) not in exclude and not str(namespace) in exclude:
_only.append(predicate)
for predicate, obj in self.graph[self.asNode()]:
if predicate in _only:
graph.add((self.asNode(), predicate, obj))
else:
for predicate, object in self.graph[self.asNode()]:
graph.add((self.asNode(), predicate, object))

if output == Mimetypes.JSON.Std:
out = {}
Expand All @@ -129,6 +193,18 @@ def __export__(self, output=Mimetypes.JSON.Std, **kwargs):
del graph
return out

elif output == Mimetypes.XML.CapiTainS.CTS:
strings = []
for pred, obj in graph.predicate_objects(self.asNode()):
kwargs = {}
if hasattr(obj, "language") and obj.language is not None:
kwargs["xml:lang"] = obj.language
if hasattr(obj, "datatype") and obj.datatype is not None:
kwargs["rdf:type"] = obj.datatype
strings.append(make_xml_node(graph, pred, text=obj, attributes=kwargs, complete=True))
del graph
return "\n".join(strings)

@staticmethod
def getOr(subject, predicate, *args, **kwargs):
""" Retrieve a metadata node or generate a new one
Expand Down
13 changes: 12 additions & 1 deletion MyCapytain/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def LiteralToDict(value):
return None
return str(value)


class Subgraph(object):
""" Utility class to generate subgraph around one or more items
Expand Down Expand Up @@ -149,7 +150,6 @@ def add(self, *args, **kwargs):
self.graph.add(*args, **kwargs)



def xmliter(node):
""" Provides a simple XML Iter method which complies with either _Element or _ObjectifiedElement
Expand Down Expand Up @@ -451,3 +451,14 @@ def nested_set(dictionary, keys, value):
nested_get(dictionary, keys[:-1])[keys[-1]] = value


def expand_namespace(nsmap, string):
""" If the string starts with a known prefix in nsmap, replace it by full URI
:param nsmap: Dictionary of prefix -> uri of namespace
:param string: String in which to replace the namespace
:return: Expanded string with no namespace
"""
for ns in nsmap:
if string.startswith(ns+":"):
return string.replace(ns+":", nsmap[ns])
return string
56 changes: 51 additions & 5 deletions MyCapytain/resources/collections/cts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
"""
from __future__ import unicode_literals

from MyCapytain.resources.prototypes import text
from rdflib import URIRef, Literal
from rdflib.namespace import XSD
from lxml.objectify import IntElement, FloatElement

from MyCapytain.resources.prototypes.cts import inventory as cts
from MyCapytain.common.reference import Citation as CitationPrototype
from MyCapytain.common.utils import xmlparser
from MyCapytain.common.utils import xmlparser, expand_namespace
from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes, RDF_NAMESPACES


Expand Down Expand Up @@ -80,6 +83,43 @@ def xpathDict(xml, xpath, cls, parent, **kwargs):
)


def __parse_structured_metadata__(obj, xml):
""" Parse an XML object for structured metadata
:param obj: Object whose metadata are parsed
:param xml: XML that needs to be parsed
"""
for metadata in xml.xpath("cpt:structured-metadata/*", namespaces=XPATH_NAMESPACES):
tag = metadata.tag
if "{" in tag:
ns, tag = tuple(tag.split("}"))
tag = URIRef(ns[1:]+tag)
s_m = str(metadata)
if s_m.startswith("urn:") or s_m.startswith("http:") or s_m.startswith("https:") or s_m.startswith("hdl:"):
obj.metadata.add(
tag,
URIRef(metadata)
)
elif '{http://www.w3.org/XML/1998/namespace}lang' in metadata.attrib:
obj.metadata.add(
tag,
s_m,
lang=metadata.attrib['{http://www.w3.org/XML/1998/namespace}lang']
)
else:
if "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}datatype" in metadata.attrib:
datatype = metadata.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}datatype"]
if not datatype.startswith("http") and ":" in datatype:
datatype = expand_namespace(metadata.nsmap, datatype)
obj.metadata.add(tag, Literal(s_m, datatype=URIRef(datatype)))
elif isinstance(metadata, IntElement):
obj.metadata.add(tag, Literal(int(metadata), datatype=XSD.integer))
elif isinstance(metadata, FloatElement):
obj.metadata.add(tag, Literal(float(metadata), datatype=XSD.float))
else:
obj.metadata.add(tag, s_m)


class XmlCtsTextMetadata(cts.CtsTextMetadata):
""" Represents a CTS CtsTextMetadata
Expand Down Expand Up @@ -118,10 +158,10 @@ def parse_metadata(obj, xml):

# Added for commentary
for child in xml.xpath("ti:about", namespaces=XPATH_NAMESPACES):
#lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
#if lg is not None:
obj.set_link(RDF_NAMESPACES.CTS.term("about"), child.get('urn'))

__parse_structured_metadata__(obj, xml)

"""
online = xml.xpath("ti:online", namespaces=NS)
if len(online) > 0:
Expand Down Expand Up @@ -160,6 +200,7 @@ def parse(resource, parent=None):
XmlCtsTranslationMetadata.parse_metadata(o, xml)
return o


class XmlCtsCommentaryMetadata(cts.CtsCommentaryMetadata, XmlCtsTextMetadata):
""" Create a commentary subtyped PrototypeText object
"""
Expand All @@ -174,6 +215,7 @@ def parse(resource, parent=None):
XmlCtsCommentaryMetadata.parse_metadata(o, xml)
return o


class XmlCtsWorkMetadata(cts.CtsWorkMetadata):
""" Represents a CTS Textgroup in XML
"""
Expand Down Expand Up @@ -202,9 +244,11 @@ def parse(resource, parent=None):
# Parse children
xpathDict(xml=xml, xpath='ti:edition', cls=XmlCtsEditionMetadata, parent=o)
xpathDict(xml=xml, xpath='ti:translation', cls=XmlCtsTranslationMetadata, parent=o)
# Added for commentary
# Added for commentary
xpathDict(xml=xml, xpath='ti:commentary', cls=XmlCtsCommentaryMetadata, parent=o)

__parse_structured_metadata__(o, xml)

return o


Expand All @@ -229,6 +273,8 @@ def parse(resource, parent=None):

# Parse Works
xpathDict(xml=xml, xpath='ti:work', cls=XmlCtsWorkMetadata, parent=o)

__parse_structured_metadata__(o, xml)
return o


Expand Down

0 comments on commit 14b4b9c

Please sign in to comment.