Skip to content

Commit

Permalink
Merge branch 'dts-parser'
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Feb 7, 2017
2 parents 733ed1a + 3a850d1 commit f1fa902
Show file tree
Hide file tree
Showing 11 changed files with 486 additions and 128 deletions.
2 changes: 1 addition & 1 deletion MyCapytain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@
"""

__version__ = "2.0.0b3"
__version__ = "2.0.0b4"
2 changes: 1 addition & 1 deletion MyCapytain/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class MyCapytain:
:cvar ReadableText: MyCapytain.resources.prototypes.text.CitableText
"""
ReadableText = "Capitains/ReadableText"
TextualElement = "Capitains/TextualElement"

PLAINTEXT = "text/plain"

Expand Down
64 changes: 41 additions & 23 deletions MyCapytain/resolvers/cts/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
"""
import io
import logging
import os.path
from glob import glob
from math import ceil

from MyCapytain.common.reference import URN, Reference
from MyCapytain.common.utils import xmlparser
from MyCapytain.errors import InvalidURN
from MyCapytain.resolvers.prototypes import Resolver
from MyCapytain.resources.collections.cts import TextInventory, TextGroup, Work, Citation, Text as InventoryText, \
Translation, Edition
from MyCapytain.resources.prototypes.cts.inventory import TextInventoryCollection
from MyCapytain.resources.texts.locals.tei import Text
from MyCapytain.resolvers.prototypes import Resolver
from MyCapytain.errors import InvalidURN
from MyCapytain.common.reference import URN, Reference
from glob import glob
import os.path
from math import ceil
import logging
from MyCapytain.resolvers.utils import CollectionDispatcher


class CTSCapitainsLocalResolver(Resolver):
Expand Down Expand Up @@ -45,10 +47,18 @@ def inventory(self):
def texts(self):
return self.__texts__

def __init__(self, resource, name=None, logger=None):
def __init__(self, resource, name=None, logger=None, dispatcher=None):
""" Initiate the XMLResolver
"""
self.__inventory__ = TextInventory()
if dispatcher is None:
inventory_collection = TextInventoryCollection(identifier="defaultTic")
ti = TextInventory("default")
ti.parent = inventory_collection
ti.set_label("Default collection", "eng")
self.dispatcher = CollectionDispatcher(inventory_collection)
else:
self.dispatcher = dispatcher
self.__inventory__ = self.dispatcher.collection
self.__texts__ = []
self.name = name

Expand Down Expand Up @@ -85,26 +95,24 @@ def parse(self, resource):
textgroup = TextGroup.parse(
resource=__xml__
)
str_urn = str(textgroup.urn)
if str_urn in self.inventory.textgroups:
self.inventory.textgroups[str_urn].update(textgroup)
tg_urn = str(textgroup.urn)
if tg_urn in self.inventory:
self.inventory[tg_urn].update(textgroup)
else:
self.inventory.textgroups[str_urn] = textgroup
self.dispatcher.dispatch(textgroup, path=__cts__)

for __subcts__ in glob("{parent}/*/__cts__.xml".format(parent=os.path.dirname(__cts__))):
with io.open(__subcts__) as __xml__:
work = Work.parse(
resource=__xml__,
parent=self.inventory.textgroups[str_urn]
parent=self.inventory[tg_urn]
)
work_urn = str(work.urn)
if work_urn in self.inventory.textgroups[str_urn].works:
self.inventory.textgroups[str_urn].works[work_urn].update(work)
else:
self.inventory.textgroups[str_urn].works[work_urn] = work
if work_urn in self.inventory[tg_urn].works:
self.inventory[work_urn].update(work)

for __textkey__ in work.texts:
__text__ = self.inventory.textgroups[str_urn].works[work_urn].texts[__textkey__]
__text__ = self.inventory[__textkey__]
__text__.path = "{directory}/{textgroup}.{work}.{version}.xml".format(
directory=os.path.dirname(__subcts__),
textgroup=__text__.urn.textgroup,
Expand Down Expand Up @@ -133,7 +141,7 @@ def parse(self, resource):
del t
__text__.citation = cites[-1]
self.logger.info("%s has been parsed ", __text__.path)
if __text__.citation:
if __text__.citation.isEmpty() is False:
self.texts.append(__text__)
else:
self.logger.error("%s has no passages", __text__.path)
Expand Down Expand Up @@ -256,8 +264,16 @@ def getMetadata(self, objectId=None, **filters):
"""
if objectId is None:
return self.inventory
elif objectId in self.inventory.children.keys():
return self.inventory[objectId]
texts, _, _ = self.__getTextMetadata__(urn=objectId)
inventory = TextInventory()

# We store inventory names and if there is only one we recreate the inventory
inv_names = [text.parent.parent.parent.id for text in texts]
if len(set(inv_names)) == 1:
inventory = TextInventory(name=inv_names[0])
else:
inventory = TextInventory()
# For each text we found using the filter
for text in texts:
tg_urn = str(text.parent.parent.urn)
Expand All @@ -271,9 +287,11 @@ def getMetadata(self, objectId=None, **filters):
Work(urn=wk_urn, parent=inventory.textgroups[tg_urn])

if isinstance(text, Edition):
Edition(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn])
x = Edition(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn])
x.citation = text.citation
elif isinstance(text, Translation):
Translation(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn])
x = Translation(urn=txt_urn, parent=inventory.textgroups[tg_urn].works[wk_urn])
x.citation = text.citation

return inventory[objectId]

Expand Down
31 changes: 31 additions & 0 deletions MyCapytain/resolvers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
class CollectionDispatcher:
"""
:param collection:
:param default_inventory_name:
"""
def __init__(self, collection, default_inventory_name=None):
self.collection = collection
if default_inventory_name is None:
default_inventory_name = list(self.collection.children.values())[0].id
self.__methods__ = [(default_inventory_name, lambda x, **k: True)]

@property
def methods(self):
return self.__methods__

def add(self, func, inventory_name=None):
self.methods.append((inventory_name, func))

def inventory(self, inventory_name):
def decorator(f):
self.add(func=f, inventory_name=inventory_name)
return f
return decorator

def dispatch(self, collection, **kwargs):
for inventory, method in self.methods[::-1]:
if method(collection, **kwargs) is True:
collection.parent = self.collection.children[inventory]
return
raise Exception("Text not dispatched %s" % collection.id)
75 changes: 0 additions & 75 deletions MyCapytain/resources/collections/cts.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,28 +84,8 @@ class Text(cts.PrototypeText):
""" Represents a CTS PrototypeText
"""
EXPORT_TO = [Mimetypes.PYTHON.MyCapytain.ReadableText, Mimetypes.PYTHON.ETREE]
DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
""" Create a {format} version of the PrototypeWork
:param output: Format to be chosen (Only XML for now)
:type output: basestring, citation
:param domain: Domain to prefix IDs
:type domain: str
:rtype: lxml.etree._Element
:returns: XML representation of the object
"""
if output == Mimetypes.PYTHON.ETREE:
return xmlparser(self.export(Mimetypes.XML.CTS))
elif output == Mimetypes.PYTHON.MyCapytain.ReadableText:
complete_metadata = self.metadata
for parent in self.parents:
if isinstance(parent, cts.PrototypeCTSCollection) and hasattr(parent, "metadata"):
complete_metadata = complete_metadata + parent.metadata
return text.CitableText(urn=self.urn, citation=self.citation, metadata=complete_metadata, **kwargs)

@staticmethod
def __findCitations(obj, xml, xpath="ti:citation"):
""" Find citation in current xml. Used as a loop for xmlparser()
Expand Down Expand Up @@ -176,27 +156,8 @@ def parse(resource, parent=None):


class Work(cts.PrototypeWork):

""" Represents a CTS Textgroup in XML
:cvar EXPORT_TO: List of exportable supported formats
:cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
"""
EXPORT_TO = [Mimetypes.PYTHON.ETREE]
DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
""" Create a {format} version of the PrototypeWork
:param output: Format to be chosen (Only XML for now)
:type output: basestring
:param domain: Domain to prefix IDs
:type domain: str
:rtype: lxml.etree._Element
:returns: XML representation of the object
"""
if output == Mimetypes.PYTHON.ETREE:
return xmlparser(self.export(Mimetypes.XML.CTS))

@staticmethod
def parse(resource, parent=None):
Expand Down Expand Up @@ -227,26 +188,8 @@ def parse(resource, parent=None):


class TextGroup(cts.PrototypeTextGroup):

""" Represents a CTS Textgroup in XML
:cvar EXPORT_TO: List of exportable supported formats
:cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
"""
EXPORT_TO = [Mimetypes.PYTHON.ETREE]
DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
""" Create a {output} version of the Textgroup
:param output: Format to be chosen
:type output: basestring
:param domain: Domain to prefix IDs when necessary
:type domain: str
:returns: Desired output formatted resource
"""
if output == Mimetypes.PYTHON.ETREE:
return xmlparser(self.export(Mimetypes.XML.CTS))

@staticmethod
def parse(resource, parent=None):
Expand All @@ -269,26 +212,8 @@ def parse(resource, parent=None):


class TextInventory(cts.PrototypeTextInventory):

""" Represents a CTS Inventory file
:cvar EXPORT_TO: List of exportable supported formats
:cvar DEFAULT_EXPORT: Default export (CTS XML Inventory)
"""
EXPORT_TO = [Mimetypes.PYTHON.ETREE]
DEFAULT_EXPORT = Mimetypes.PYTHON.ETREE

def __export__(self, output=Mimetypes.PYTHON.ETREE, domain="", **kwargs):
""" Create a {output} version of the PrototypeTextInventory
:param output: Format to be chosen
:type output: basestring
:param domain: Domain to prefix IDs when necessary
:type domain: str
:returns: Desired output formatted resource
"""
if output == Mimetypes.PYTHON.ETREE:
return xmlparser(self.export(output=Mimetypes.XML.CTS))

@staticmethod
def parse(resource):
Expand Down
63 changes: 63 additions & 0 deletions MyCapytain/resources/collections/dts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from MyCapytain.resources.prototypes.metadata import Collection
from rdflib import URIRef


class DTSCollection(Collection):
@staticmethod
def parse(resource, mimetype="application/json+ld"):
""" Given a dict representation of a json object, generate a DTS Collection
:param resource:
:param mimetype:
:return:
"""
obj = DTSCollection(identifier=resource["@id"])
obj.type = resource["type"]
obj.version = resource["version"]
for label in resource["label"]:
obj.set_label(label["value"], label["lang"])
for key, value in resource["metadata"].items():
term = URIRef(key)
if isinstance(value, list):
if isinstance(value[0], dict):
for subvalue in value:
obj.metadata.add(term, subvalue["@value"], subvalue["@lang"])
else:
for subvalue in value:
if subvalue.startswith("http") or subvalue.startswith("urn"):
obj.metadata.add(term, URIRef(subvalue))
else:
obj.metadata.add(term, subvalue)
else:
if value.startswith("http") or value.startswith("urn"):
obj.metadata.add(term, URIRef(value))
else:
obj.metadata.add(term, value)

for member in resource["members"]["contents"]:
subobj = DTSCollectionShort.parse(member)
subobj.parent = member

last = obj
for member in resource["parents"]:
subobj = DTSCollectionShort.parse(member)
last.parent = subobj

return obj


class DTSCollectionShort(DTSCollection):
@staticmethod
def parse(resource):
""" Given a dict representation of a json object, generate a DTS Collection
:param resource:
:param mimetype:
:return:
"""
obj = DTSCollectionShort(identifier=resource["@id"])
obj.type = resource["type"]
obj.model = resource["model"]
for label in resource["label"]:
obj.set_label(label["value"], label["lang"])
return obj

0 comments on commit f1fa902

Please sign in to comment.