Skip to content

Commit

Permalink
Added Metadata Namespace Property
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Dec 15, 2016
1 parent 2154180 commit da959b8
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 75 deletions.
90 changes: 65 additions & 25 deletions MyCapytain/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
"""
from __future__ import unicode_literals
from six import text_type

from random import randint
from types import GeneratorType
from collections import defaultdict, OrderedDict
from MyCapytain.common.utils import Mimetypes, NS, RDF_PREFIX
from MyCapytain.common.utils import Mimetypes, RDF_PREFIX, Namespace, RDF_MAPPING
from MyCapytain.errors import UnknownNamespace


class Metadatum(object):
Expand All @@ -21,6 +23,8 @@ class Metadatum(object):
:type name: text_type
:param children: List of tuples, where first element is the key, and second the value
:type children: List
:param namespace: Object representing a namespace
:type namespace: Namespace
:Example:
>>> a = Metadatum("label", [("lat", "Amores"), ("fre", "Les Amours")])
Expand All @@ -32,17 +36,50 @@ class Metadatum(object):
"""

def __init__(self, name, children=None):
def __init__(self, name, children=None, namespace=None):
""" Initiate a Metadatum object
"""
self.name = name
self.children = OrderedDict()
self.default = None
self.__namespace__ = namespace
if "//" in name and namespace is None:
uri, self.name = tuple(name.rsplit("/"))
if uri not in RDF_MAPPING:
prefix = "ns{}".format(randint(1, 4096))
else:
prefix = RDF_MAPPING[uri]
self.namespace = Namespace(uri, prefix)

if ":" in name and namespace is None:
prefix, self.name = tuple(name.split(":"))

if prefix not in RDF_PREFIX:
raise UnknownNamespace(
"%s is unknown. Update MyCapytain.common.utils.RDF_PREFIX to support this prefix" % prefix
)
self.namespace = Namespace(RDF_PREFIX[prefix], prefix)

if children is not None and isinstance(children, list):
for tup in children:
self[tup[0]] = tup[1]

@property
def namespace(self):
""" Namespace of the metadata entry """
return self.__namespace__

@namespace.setter
def namespace(self, namespace):
""" Set namespace property
:param namespace: Namespace to set
:type namespace: Namespace
"""
if namespace is not None and not isinstance(namespace, Namespace):
raise TypeError("Only None and Namespace value are accepted")
self.__namespace__ = namespace

def __getitem__(self, key):
""" Add an iterable access method
Expand Down Expand Up @@ -208,7 +245,7 @@ def __init__(self, keys=None):
self.metadata = defaultdict(Metadatum)
self.__keys__ = []

if keys is not None:
if keys is not None and isinstance(keys, (list, set, GeneratorType)):
for key in keys:
self[key] = Metadatum(name=key)

Expand Down Expand Up @@ -370,46 +407,49 @@ def keys(self):
"""
return self.__keys__

def export(self, mime=Mimetypes.JSON.Std):
if mime == Mimetypes.JSON.Std:
def export(self, output=Mimetypes.JSON.Std):
""" Export a set of Metadata
:param output: Mimetype to export to
:return: Formatted Export
"""
if output == Mimetypes.JSON.Std:
return {
key: getattr(value, "__getstate__")() for key, value in self.metadata.items()
}
elif mime == Mimetypes.JSON.DTS:
elif output == Mimetypes.JSON.DTS.Std:
descs = {

}
for key in sorted(self.metadata.keys()):
metadatum = self.metadata[key]
ks = key.split(":")
if len(ks) == 2:
ns, k = tuple(ks)
if metadatum.namespace is not None:
ns = metadatum.namespace.uri
else:
ns, k = RDF_PREFIX["cts"], key
if ns in RDF_PREFIX:
ns = RDF_PREFIX[ns]
ns = ""

for lang, value in metadatum:
if lang not in descs:
descs[lang] = {"@language": lang}
descs[lang][ns+k] = value
descs[lang][ns+metadatum.name] = value
return [value for value in descs.values()]

elif mime == Mimetypes.XML.RDF:
elif output == Mimetypes.XML.RDF:
out = ""
for key in sorted(self.metadata.keys()):
metadatum = self.metadata[key]
ks = key.split(":")
if len(ks) == 2:
ns, k = tuple(ks)
if ns in RDF_PREFIX:
ns = RDF_PREFIX[ns]
if metadatum.namespace is None:
out += "".join([
"<{0} xml:lang=\"{1}\">{2}</{0}>".format(metadatum.name, lang, value)
for lang, value in metadatum
])
else:
ns, k = NS["ti"], key
out += "".join([
"<{1} xmlns=\"{0}/\" xml:lang=\"{2}\">{3}</{1}>".format(ns, k, lang, value)
for lang, value in metadatum
])
out += "".join([
"<{1} xmlns=\"{0}\" xml:lang=\"{2}\">{3}</{1}>".format(
metadatum.namespace.uri, metadatum.name, lang, value
)
for lang, value in metadatum
])
return """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description>
"""+out+"""
Expand Down
18 changes: 13 additions & 5 deletions MyCapytain/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,25 @@
from six import text_type
from functools import reduce

from collections import OrderedDict
from collections import OrderedDict, namedtuple
from lxml import etree
from io import IOBase, StringIO
import re
from copy import copy
from lxml.objectify import ObjectifiedElement, parse


__strip = re.compile("([ ]{2,})+")
__parser__ = etree.XMLParser(collect_ids=False, resolve_entities=False)

""" Namespace """
Namespace = namedtuple("Namespace", ["uri", "prefix"])


class NAMESPACES:
CTS = Namespace("http://chs.harvard.edu/xmlns/cts/", "ti")
TEI = Namespace("http://www.tei-c.org/ns/1.0/", "tei")
DC = Namespace("http://purl.org/dc/elements/1.1/", "dc")


def xmliter(node):
""" Provides a simple XML Iter method which complies with either _Element or _ObjectifiedElement
Expand Down Expand Up @@ -79,12 +87,12 @@ def normalize(string):
"dbpedia": "http://dbpedia.org/resource/",

"tei": "http://www.tei-c.org/ns/1.0/",
"cts": "http://chs.harvard.edu/xmlns/cts/"
"ti": "http://chs.harvard.edu/xmlns/cts/"
}

#: Mapping of known domains to RDF Classical Prefixes
RDF_MAPPING = {
'http://chs.harvard.edu/xmlns/cts/': 'cts',
'http://chs.harvard.edu/xmlns/cts/': 'ti',
'http://dbpedia.org/ontology/': 'dbo',
'http://dbpedia.org/property/': 'dbp',
'http://dbpedia.org/resource/': 'dbpedia',
Expand Down Expand Up @@ -434,4 +442,4 @@ class MyCapytain:
"""
ReadableText = "Capitains/ReadableText"

PLAINTEXT = "text/plain"
PLAINTEXT = "text/plain"
5 changes: 5 additions & 0 deletions MyCapytain/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,8 @@ class MissingAttribute(Exception):
class UnknownObjectError(ValueError):
""" This error is thrown when an object does not exist in an inventory or in an API
"""


class UnknownNamespace(ValueError):
""" This error is thrown when a namespace is unknown
"""
25 changes: 15 additions & 10 deletions MyCapytain/resources/prototypes/cts/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@

from MyCapytain.resources.prototypes.metadata import Collection
from MyCapytain.common.reference import URN
from MyCapytain.common.metadata import Metadata
from MyCapytain.common.utils import RDF_PREFIX
from MyCapytain.common.metadata import Metadata, Metadatum
from MyCapytain.common.utils import RDF_PREFIX, NAMESPACES
from MyCapytain.errors import InvalidURN
from collections import defaultdict
from copy import copy, deepcopy
Expand All @@ -32,7 +32,7 @@ def __init__(self, resource=None):
super(CTSCollection, self).__init__()

if hasattr(type(self), "CTSMODEL"):
self.properties[RDF_PREFIX["cts"]+"model"] = RDF_PREFIX["cts"] + type(self).CTSMODEL
self.properties[RDF_PREFIX["ti"]+"model"] = RDF_PREFIX["ti"] + type(self).CTSMODEL

self.resource = None
if resource is not None:
Expand Down Expand Up @@ -165,7 +165,7 @@ def TEXT_URI(self):
:return: CTS Ontology Edition or Translation object
:rtype: str
"""
return RDF_PREFIX["cts"] + self.subtype
return RDF_PREFIX["ti"] + self.subtype

def __init__(self, resource=None, urn=None, parents=None, subtype="Edition"):
super(Text, self).__init__()
Expand All @@ -177,7 +177,10 @@ def __init__(self, resource=None, urn=None, parents=None, subtype="Edition"):
self.parents = list()
self.subtype = subtype
self.validate = None
self.metadata = Metadata(keys=["label", "description", "namespaceMapping"])
self.metadata = Metadata()
self.metadata["label"] = Metadatum(name="label", namespace=NAMESPACES.CTS)
self.metadata["description"] = Metadatum(name="description", namespace=NAMESPACES.CTS)
self.metadata["namespaceMapping"] = Metadatum(name="namespaceMapping", namespace=NAMESPACES.CTS)

if urn is not None:
self.urn = URN(urn)
Expand Down Expand Up @@ -264,7 +267,7 @@ class Work(CTSCollection):
"""

DC_TITLE_KEY = "title"
TYPE_URI = RDF_PREFIX["cts"] + "Work"
TYPE_URI = RDF_PREFIX["ti"] + "Work"

def __init__(self, resource=None, urn=None, parents=None):
super(Work, self).__init__()
Expand All @@ -273,7 +276,8 @@ def __init__(self, resource=None, urn=None, parents=None):
self.urn = None
self.texts = defaultdict(Text)
self.parents = list()
self.metadata = Metadata(keys=["title"])
self.metadata = Metadata()
self.metadata["title"] = Metadatum(name="title", namespace=NAMESPACES.CTS)

if urn is not None:
self.urn = URN(urn)
Expand Down Expand Up @@ -356,7 +360,7 @@ class TextGroup(CTSCollection):
:type parents: Tuple.<TextInventory>
"""
DC_TITLE_KEY = "groupname"
TYPE_URI = RDF_PREFIX["cts"] + "TextGroup"
TYPE_URI = RDF_PREFIX["ti"] + "TextGroup"

@property
def members(self):
Expand All @@ -368,7 +372,8 @@ def __init__(self, resource=None, urn=None, parents=None):
self.urn = None
self.works = defaultdict(Work)
self.parents = list()
self.metadata = Metadata(keys=["groupname"])
self.metadata = Metadata()
self.metadata["groupname"] = Metadatum(name="groupname", namespace=NAMESPACES.CTS)

if urn is not None:
self.urn = URN(urn)
Expand Down Expand Up @@ -428,7 +433,7 @@ class TextInventory(CTSCollection):
:param id: Identifier of the TextInventory
:type id: str
"""
TYPE_URI = RDF_PREFIX["cts"] + "TextInventory"
TYPE_URI = RDF_PREFIX["ti"] + "TextInventory"

@property
def members(self):
Expand Down
13 changes: 9 additions & 4 deletions MyCapytain/resources/prototypes/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from copy import deepcopy

from MyCapytain.common.metadata import Metadata
from MyCapytain.common.utils import RDF_PREFIX, Mimetypes
from MyCapytain.common.utils import RDF_PREFIX, Mimetypes, NAMESPACES


class Collection(object):
Expand All @@ -33,14 +33,19 @@ def title(self):
:rtype: Metadata
"""
if self.__title__ is not None:
return self.__title__
if hasattr(type(self), "DC_TITLE_KEY") and self.DC_TITLE_KEY:
__title = Metadata(keys="dc:title")
__title["dc:title"] = deepcopy(self.metadata[type(self).DC_TITLE_KEY])
return __title
self.__title__ = Metadata(keys=["dc:title"])
self.__title__["dc:title"] = deepcopy(self.metadata[type(self).DC_TITLE_KEY])
self.__title__["dc:title"].namespace = NAMESPACES.DC
self.__title__["dc:title"].name = "title"
return self.__title__

def __init__(self):
self.metadata = Metadata()
self.__id__ = None
self.__title__ = None
self.properties = {
RDF_PREFIX["dts"]+"model": "http://w3id.org/dts-ontology/collection",
RDF_PREFIX["rdf"]+"type": self.TYPE_URI
Expand Down

0 comments on commit da959b8

Please sign in to comment.