Merge pull request #128 from Capitains/structured-metadata

2.0.0 rc1
Capitains · Apr 26, 2017 · 14b4b9c · 14b4b9c
2 parents 0760d58 + f99b810
commit 14b4b9c
Show file tree

Hide file tree

Showing 15 changed files with 528 additions and 104 deletions.
diff --git a/MyCapytain/__init__.py b/MyCapytain/__init__.py
@@ -9,4 +9,4 @@
 
 """
 
-__version__ = "2.0.0b20"
+__version__ = "2.0.0rc1"
diff --git a/MyCapytain/common/constants.py b/MyCapytain/common/constants.py
@@ -6,7 +6,10 @@
 XPATH_NAMESPACES = {
     "tei": "http://www.tei-c.org/ns/1.0",
     "ti": "http://chs.harvard.edu/xmlns/cts",
-    "xml": "http://www.w3.org/XML/1998/namespace"
+    "cpt": "http://purl.org/capitains/ns/1.0#",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "xml": "http://www.w3.org/XML/1998/namespace",
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 }
 
 
@@ -19,10 +22,13 @@ class RDF_NAMESPACES:
     :type TEI: Namespace
     :cvar DC: DC Elements
     :type DC: Namespace
+    :cvar CAPITAINS: CapiTainS Ontology
+    :type CAPITAINS: Namespace
     """
     CTS = Namespace("http://chs.harvard.edu/xmlns/cts/")
     DTS = Namespace("http://w3id.org/dts-ontology/")
     TEI = Namespace("http://www.tei-c.org/ns/1.0/")
+    CAPITAINS = Namespace("http://purl.org/capitains/ns/1.0#")
 
 
 class Mimetypes:
@@ -68,6 +74,12 @@ class XML:
         CTS = "text/xml:CTS"
         TEI = "text/xml:tei"
 
+        class CapiTainS:
+            """ CapiTainS Guideline XML Structured metadata
+
+            """
+            CTS = "text/xml:CTS_CapiTainS"
+
     class PYTHON:
         """ Python Native Objects
 
@@ -93,6 +105,7 @@ class MyCapytain:
 __MYCAPYTAIN_TRIPLE_GRAPH__.bind("dts", RDF_NAMESPACES.DTS)
 __MYCAPYTAIN_TRIPLE_GRAPH__.bind("tei", RDF_NAMESPACES.TEI)
 __MYCAPYTAIN_TRIPLE_GRAPH__.bind("skos", SKOS)
+__MYCAPYTAIN_TRIPLE_GRAPH__.bind("cpt", RDF_NAMESPACES.CAPITAINS)
 
 
 def set_graph(graph):

diff --git a/MyCapytain/common/metadata.py b/MyCapytain/common/metadata.py
@@ -8,9 +8,10 @@
 
 """
 from __future__ import unicode_literals
+from MyCapytain.common.utils import make_xml_node
 from MyCapytain.common.constants import Mimetypes, get_graph
 from MyCapytain.common.base import Exportable
-from rdflib import BNode, Literal, Graph
+from rdflib import BNode, Literal, Graph, URIRef, term
 
 
 class Metadata(Exportable):
@@ -23,7 +24,7 @@ class Metadata(Exportable):
     :cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
     :cvar STORE: RDF Store
     """
-    EXPORT_TO = [Mimetypes.JSON.Std, Mimetypes.XML.RDF, Mimetypes.XML.RDFa, Mimetypes.JSON.LD]
+    EXPORT_TO = [Mimetypes.JSON.Std, Mimetypes.XML.RDF, Mimetypes.XML.RDFa, Mimetypes.JSON.LD, Mimetypes.XML.CapiTainS.CTS]
     DEFAULT_EXPORT = Mimetypes.JSON.Std
 
     def __init__(self, node=None, *args, **kwargs):
@@ -52,12 +53,33 @@ def add(self, key, value, lang=None):
         :param value: Object of the triple
         :param lang: Language of the triple if applicable
         """
-        if not isinstance(value, Literal):
+        if not isinstance(value, Literal) and lang is not None:
             value = Literal(value, lang=lang)
+        elif not isinstance(value, (BNode, URIRef)):
+            value, _type = term._castPythonToLiteral(value)
+            if _type is None:
+                value = Literal(value)
+            else:
+                value = Literal(value, datatype=_type)
         self.graph.add((self.asNode(), key, value))
 
     def get(self, key, lang=None):
-        """ Returns a triple related to this node
+        """ Returns triple related to this node. Can filter on lang
+
+        :param key: Predicate of the triple
+        :param lang: Language of the triple if applicable
+        :rtype: Literal or BNode or URIRef
+        """
+        if lang is not None:
+            for o in self.graph.objects(self.asNode(), key):
+                if o.language == lang:
+                    yield o
+        else:
+            for o in self.graph.objects(self.asNode(), key):
+                yield o
+
+    def get_single(self, key, lang=None):
+        """ Returns a single triple related to this node.
 
         :param key: Predicate of the triple
         :param lang: Language of the triple if applicable
@@ -74,15 +96,6 @@ def get(self, key, lang=None):
             for o in self.graph.objects(self.asNode(), key):
                 return o
 
-    def get_all(self, key):
-        """ Returns a triple related to this node
-
-        :param key: Predicate of the triple
-        :rtype: List of [Literal or BNode or URIRef]
-        """
-        for o in self.graph.objects(self.asNode(), key):
-            yield o
-
     def __getitem__(self, item):
         """ Quick access method. If
 
@@ -94,19 +107,70 @@ def __getitem__(self, item):
             Metadata[Title] == [Metadata.get(Title, lang=lang1), Metadata.get(Title, lang=lang2)]
         """
         if isinstance(item, tuple):
-            return self.get(item[0], item[1])
-        return list(self.graph[self.asNode():item])
+            return self.get_single(item[0], item[1])
+        return list(self.get(item))
 
-    def __export__(self, output=Mimetypes.JSON.Std, **kwargs):
+    def remove(self, predicate=None, obj=None):
+        """ Remove triple matching the predicate or the object
+
+        :param predicate: Predicate to match, None to match all
+        :param obj: Object to match, None to match all
+        """
+        self.graph.remove((self.asNode(), predicate, obj))
+
+    def unlink(self, subj=None, predicate=None):
+        """ Remove triple where Metadata is the object
+
+        :param subj: Subject to match, None to match all
+        :param predicate: Predicate to match, None to match all
+        """
+        self.graph.remove((subj, predicate, self.asNode()))
+
+    def predicate_object(self, predicate=None, obj=None):
+        """ Retrieve predicate and object around this object
+
+        :param predicate: Predicate to match, None to match all
+        :param obj: Object to match, None to match all
+
+        :return: List of resources
+        """
+    def __export__(self, output=Mimetypes.JSON.Std, only=None, exclude=None, **kwargs):
         """ Export a set of Metadata
 
         :param output: Mimetype to export to
+        :param only: Includes only term from given namespaces
+        :param exclude: Includes only term from given namespaces
         :return: Formatted Export
+
+        .. warning:: exclude and warning cannot be used together
         """
         graph = Graph()
         graph.namespace_manager = get_graph().namespace_manager
-        for predicate, object in self.graph[self.asNode()]:
-            graph.add((self.asNode(), predicate, object))
+
+        if only is not None:
+            _only = only
+            only = [str(s) for s in only]
+            for predicate in set(self.graph.predicates(subject=self.asNode())):
+                if str(predicate) not in only:
+                    prefix, namespace, name = self.graph.compute_qname(predicate)
+                    if str(namespace) in only:
+                        _only.append(predicate)
+            for predicate, obj in self.graph[self.asNode()]:
+                if predicate in _only:
+                    graph.add((self.asNode(), predicate, obj))
+        elif exclude is not None:
+            _only = []
+            exclude = [str(s) for s in exclude]
+            for predicate in set(self.graph.predicates(subject=self.asNode())):
+                prefix, namespace, name = self.graph.compute_qname(predicate)
+                if str(predicate) not in exclude and not str(namespace) in exclude:
+                    _only.append(predicate)
+            for predicate, obj in self.graph[self.asNode()]:
+                if predicate in _only:
+                    graph.add((self.asNode(), predicate, obj))
+        else:
+            for predicate, object in self.graph[self.asNode()]:
+                graph.add((self.asNode(), predicate, object))
 
         if output == Mimetypes.JSON.Std:
             out = {}
@@ -129,6 +193,18 @@ def __export__(self, output=Mimetypes.JSON.Std, **kwargs):
             del graph
             return out
 
+        elif output == Mimetypes.XML.CapiTainS.CTS:
+            strings = []
+            for pred, obj in graph.predicate_objects(self.asNode()):
+                kwargs = {}
+                if hasattr(obj, "language") and obj.language is not None:
+                    kwargs["xml:lang"] = obj.language
+                if hasattr(obj, "datatype") and obj.datatype is not None:
+                    kwargs["rdf:type"] = obj.datatype
+                strings.append(make_xml_node(graph, pred, text=obj, attributes=kwargs, complete=True))
+            del graph
+            return "\n".join(strings)
+
     @staticmethod
     def getOr(subject, predicate, *args, **kwargs):
         """ Retrieve a metadata node or generate a new one

diff --git a/MyCapytain/common/utils.py b/MyCapytain/common/utils.py
@@ -87,6 +87,7 @@ def LiteralToDict(value):
         return None
     return str(value)
 
+
 class Subgraph(object):
     """ Utility class to generate subgraph around one or more items
 
@@ -149,7 +150,6 @@ def add(self, *args, **kwargs):
         self.graph.add(*args, **kwargs)
 
 
-
 def xmliter(node):
     """ Provides a simple XML Iter method which complies with either _Element or _ObjectifiedElement
 
@@ -451,3 +451,14 @@ def nested_set(dictionary,  keys, value):
     nested_get(dictionary, keys[:-1])[keys[-1]] = value
 
 
+def expand_namespace(nsmap, string):
+    """ If the string starts with a known prefix in nsmap, replace it by full URI
+
+    :param nsmap: Dictionary of prefix -> uri of namespace
+    :param string: String in which to replace the namespace
+    :return: Expanded string with no namespace
+    """
+    for ns in nsmap:
+        if string.startswith(ns+":"):
+            return string.replace(ns+":", nsmap[ns])
+    return string
diff --git a/MyCapytain/resources/collections/cts.py b/MyCapytain/resources/collections/cts.py
@@ -9,10 +9,13 @@
 """
 from __future__ import unicode_literals
 
-from MyCapytain.resources.prototypes import text
+from rdflib import URIRef, Literal
+from rdflib.namespace import XSD
+from lxml.objectify import IntElement, FloatElement
+
 from MyCapytain.resources.prototypes.cts import inventory as cts
 from MyCapytain.common.reference import Citation as CitationPrototype
-from MyCapytain.common.utils import xmlparser
+from MyCapytain.common.utils import xmlparser, expand_namespace
 from MyCapytain.common.constants import XPATH_NAMESPACES, Mimetypes, RDF_NAMESPACES
 
 
@@ -80,6 +83,43 @@ def xpathDict(xml, xpath, cls, parent, **kwargs):
         )
 
 
+def __parse_structured_metadata__(obj, xml):
+    """ Parse an XML object for structured metadata
+
+    :param obj: Object whose metadata are parsed
+    :param xml: XML that needs to be parsed
+    """
+    for metadata in xml.xpath("cpt:structured-metadata/*", namespaces=XPATH_NAMESPACES):
+        tag = metadata.tag
+        if "{" in tag:
+            ns, tag = tuple(tag.split("}"))
+            tag = URIRef(ns[1:]+tag)
+            s_m = str(metadata)
+            if s_m.startswith("urn:") or s_m.startswith("http:") or s_m.startswith("https:") or s_m.startswith("hdl:"):
+                obj.metadata.add(
+                    tag,
+                    URIRef(metadata)
+                )
+            elif '{http://www.w3.org/XML/1998/namespace}lang' in metadata.attrib:
+                obj.metadata.add(
+                    tag,
+                    s_m,
+                    lang=metadata.attrib['{http://www.w3.org/XML/1998/namespace}lang']
+                )
+            else:
+                if "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}datatype" in metadata.attrib:
+                    datatype = metadata.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}datatype"]
+                    if not datatype.startswith("http") and ":" in datatype:
+                        datatype = expand_namespace(metadata.nsmap, datatype)
+                    obj.metadata.add(tag, Literal(s_m, datatype=URIRef(datatype)))
+                elif isinstance(metadata, IntElement):
+                    obj.metadata.add(tag, Literal(int(metadata), datatype=XSD.integer))
+                elif isinstance(metadata, FloatElement):
+                    obj.metadata.add(tag, Literal(float(metadata), datatype=XSD.float))
+                else:
+                    obj.metadata.add(tag, s_m)
+
+
 class XmlCtsTextMetadata(cts.CtsTextMetadata):
     """ Represents a CTS CtsTextMetadata
 
@@ -118,10 +158,10 @@ def parse_metadata(obj, xml):
 
         # Added for commentary
         for child in xml.xpath("ti:about", namespaces=XPATH_NAMESPACES):
-            #lg = child.get("{http://www.w3.org/XML/1998/namespace}lang")
-            #if lg is not None:
             obj.set_link(RDF_NAMESPACES.CTS.term("about"), child.get('urn'))
 
+        __parse_structured_metadata__(obj, xml)
+
         """
         online = xml.xpath("ti:online", namespaces=NS)
         if len(online) > 0:
@@ -160,6 +200,7 @@ def parse(resource, parent=None):
         XmlCtsTranslationMetadata.parse_metadata(o, xml)
         return o
 
+
 class XmlCtsCommentaryMetadata(cts.CtsCommentaryMetadata, XmlCtsTextMetadata):
     """ Create a commentary subtyped PrototypeText object
     """
@@ -174,6 +215,7 @@ def parse(resource, parent=None):
         XmlCtsCommentaryMetadata.parse_metadata(o, xml)
         return o
 
+
 class XmlCtsWorkMetadata(cts.CtsWorkMetadata):
     """ Represents a CTS Textgroup in XML
     """
@@ -202,9 +244,11 @@ def parse(resource, parent=None):
         # Parse children
         xpathDict(xml=xml, xpath='ti:edition', cls=XmlCtsEditionMetadata, parent=o)
         xpathDict(xml=xml, xpath='ti:translation', cls=XmlCtsTranslationMetadata, parent=o)
-       # Added for commentary
+        # Added for commentary
         xpathDict(xml=xml, xpath='ti:commentary', cls=XmlCtsCommentaryMetadata, parent=o)
 
+        __parse_structured_metadata__(o, xml)
+
         return o
 
 
@@ -229,6 +273,8 @@ def parse(resource, parent=None):
 
         # Parse Works
         xpathDict(xml=xml, xpath='ti:work', cls=XmlCtsWorkMetadata, parent=o)
+
+        __parse_structured_metadata__(o, xml)
         return o