Skip to content

Commit

Permalink
(CtsReferenceSet/CtsReference) Breaking change and massive rework of …
Browse files Browse the repository at this point in the history
…getReffs and getSiblings functions

These changes are related to the needs behind MyCapytain adoption of upcoming DTS. Reuse of native object were prefered.

- Introduction of `Reference` and `ReferenceSet`
    - `Reference` is a derivation tuple that have two properties (`start`, `end`) and one method `is_range()`
        - `start` and `end` are string or string-derivation
    - `ReferenceSet` is a list derivative and contains References
        - It has a `.citation` property about the structure of the current set
- (Breaking) Resolver Prototype and derivation returns ReferenceSet objects now in `getReffs()`
- (cts.Reference) Renamed CtsReference.
    - Parsing of sub-information is now on the fly on avoid performance hit with the move to ReferenceSet in `.getReffs()`
    - **Warning** : `len(CtsReference("1.1"))` should be replaced with `CtsReference("1.1").depth`
    - Supporting this new change, CtsReference is now a tuple. It still has a special `str(ref)` behavior as CTS URN do work with range-identifiers
    - `CtsReference.start` and `CtsReference.end` are now `CtsSinglePassageId`
- (cts.Reference) Introduction of CtsSinglePassageId
    - `CtsSinglePassageId` is derivation of str
    - It has a `list` property to support former syntax : `CtsReference("1.1").start.list` and returns the same type
    - It has a `depth` and `len` which are equivalent in this specific case
    - `subreference` is now parsed on the fly as `CtsWordReference`
- (cts.Reference) Introduction of `CtsWordReference`
    - Has `.word` and `.counter` properties for `@Achilles[1]`

- Update examples
- Update documentation
  • Loading branch information
PonteIneptique committed Aug 30, 2018
1 parent 162ee5a commit 6f0c245
Show file tree
Hide file tree
Showing 11 changed files with 150 additions and 131 deletions.
2 changes: 1 addition & 1 deletion MyCapytain/common/reference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
"""
from ._base import NodeId, BaseCitationSet, BaseReference, BaseReferenceSet
from ._capitains_cts import Citation, CtsReference, URN
from ._capitains_cts import Citation, CtsReference, CtsReferenceSet, URN
from ._dts_1 import DtsCitation, DtsCitationSet
19 changes: 17 additions & 2 deletions MyCapytain/common/reference/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def citation(self):
return self._citation

def __new__(cls, *refs, citation=None):
obj = list.__new__(BaseReferenceSet, refs)
obj = list.__new__(cls, refs)
obj._citation = None

if citation:
Expand Down Expand Up @@ -130,7 +130,7 @@ def depth(self):
:return: Depth of the citation scheme
"""
if len(self.children):
return 1 + max([child.depth for child in self.children])
return max([child.depth for child in self.children])
else:
return 0

Expand Down Expand Up @@ -321,6 +321,21 @@ def __export__(self, output=None, context=False, namespace_manager=None, **kwarg

return _out

@property
def depth(self):
""" Depth of the citation scheme
.. example:: If we have a Book, Poem, Line system, and the citation we are looking at is Poem, depth is 1
:rtype: int
:return: Depth of the citation scheme
"""
if len(self.children):
return 1 + max([child.depth for child in self.children])
else:
return 1


class NodeId(object):
""" Collection of directional references for a Tree
Expand Down
100 changes: 52 additions & 48 deletions MyCapytain/common/reference/_capitains_cts.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import re
from copy import copy
from typing import List
from typing import Optional, List, Union
from lxml.etree import _Element

from MyCapytain.common.constants import Mimetypes, get_graph, RDF_NAMESPACES, XPATH_NAMESPACES
from MyCapytain.common.utils import make_xml_node

from ._base import BaseCitation, BaseReference
from ._base import BaseCitation, BaseReference, BaseReferenceSet

REFSDECL_SPLITTER = re.compile(r"/+[*()|\sa-zA-Z0-9:\[\]@=\\{$'\".\s]+")
REFSDECL_REPLACER = re.compile(r"\$[0-9]+")
Expand All @@ -27,18 +27,19 @@ def __childOrNone__(liste):


class CtsWordReference(str):
def __new__(cls, word_reference):
def __new__(cls, word_reference: str):
word, counter = tuple(SUBREFERENCE.findall(word_reference)[0])

if len(counter) and word:
word, counter = str(word), int(counter)
elif len(counter) == 0 and word:
word, counter = str(word), 0
if counter:
counter = int(counter)
else:
counter = 0

obj = str.__new__(cls, "@"+word_reference)
obj.counter = counter
obj.word = word
return word

return obj

def tuple(self):
return self.word, self.counter
Expand All @@ -58,30 +59,34 @@ def __new__(cls, str_repr: str):
# Parsing the reference
temp_str_repr = str_repr
subreference = temp_str_repr.split("@")

if len(subreference) == 2:
obj._sub_reference = CtsWordReference(subreference[1])
temp_str_repr = subreference[0]

obj._list = temp_str_repr.split(".")
obj._list = temp_str_repr
return obj

@property
def list(self):
return self._list
def list(self) -> List[str]:
return list(iter(self))

@property
def subreference(self):
return self._sub_reference
def subreference(self) -> Optional[CtsWordReference]:
subref = self.split("@")
if len(subref) == 2:
return CtsWordReference(subref[1])

def __iter__(self):
return iter(self.list)
def __iter__(self) -> List[str]:
subref = self.split("@")[0]
yield from subref.split(".")

def __len__(self):
return len(self.list)
def __len__(self) -> int:
return self.count(".") + 1

@property
def depth(self):
return len(self.list)
def depth(self) -> int:
return self.count(".") + 1


class CtsReference(BaseReference):
Expand Down Expand Up @@ -125,26 +130,29 @@ def parent(self):
:rtype: CtsReference
"""
if len(self.parsed[0][1]) == 1 and len(self.parsed[1][1]) <= 1:
if self.start.depth == 1 and (self.end is None or self.end.depth <= 1):
return None
else:
if len(self.parsed[0][1]) > 1 and len(self.parsed[1][1]) == 0:
if self.start.depth > 1 and (self.end is None or self.end.depth == 0):
return CtsReference("{0}{1}".format(
".".join(list(self.parsed[0][1])[0:-1]),
self.parsed[0][3] or ""
".".join(self.start.list[:-1]),
self.start.subreference or ""
))
elif len(self.parsed[0][1]) > 1 and len(self.parsed[1][1]) > 1:
first = list(self.parsed[0][1])[0:-1]
last = list(self.parsed[1][1])[0:-1]
if first == last and self.parsed[1][3] is None \
and self.parsed[0][3] is None:
return CtsReference(".".join(first))
elif self.start.depth > 1 and self.end is not None and self.end.depth > 1:
_start = self.start.list[0:-1]
_end = self.end.list[0:-1]
if _start == _end and \
self.start.subreference is None and \
self.end.subreference is None:
return CtsReference(
".".join(_start)
)
else:
return CtsReference("{0}{1}-{2}{3}".format(
".".join(first),
self.parsed[0][3] or "",
".".join(list(self.parsed[1][1])[0:-1]),
self.parsed[1][3] or ""
".".join(_start),
self.start.subreference or "",
".".join(_end),
self.end.subreference or ""
))

@property
Expand Down Expand Up @@ -195,6 +203,7 @@ def subreference(self):
if not self.end:
return self.start.subreference

@property
def depth(self):
""" Return depth of highest reference level
Expand Down Expand Up @@ -225,22 +234,17 @@ def __str__(self):
"""
return self._str_repr

def __eq__(self, other):
""" Equality checker for Reference object

:param other: An object to be checked against
:rtype: boolean
:returns: Equality between other and self
class CtsReferenceSet(BaseReferenceSet):
def __contains__(self, item):
return BaseReferenceSet.__contains__(self, item) or \
CtsReference(item)

:Example:
>>> a = CtsReference(reference="1.1@Achiles[1]-1.2@Zeus[1]")
>>> b = CtsReference(reference="1.1")
>>> c = CtsReference(reference="1.1")
>>> (a == b) == False
>>> (c == b) == True
"""
return (isinstance(other, type(self))
and str(self) == str(other))
def index(self, obj: Union[str, CtsReference], *args, **kwargs) -> int:
_o = obj
if not isinstance(obj, CtsReference):
_o = CtsReference(obj)
return super(CtsReferenceSet, self).index(_o)


class URN(object):
Expand Down Expand Up @@ -768,7 +772,7 @@ def match(self, passageId):
passageId = CtsReference(passageId)

if self.is_root():
return self[len(passageId)-1]
return self[passageId.depth-1]
return self.root.match(passageId)

def fill(self, passage=None, xpath=None):
Expand Down
2 changes: 1 addition & 1 deletion MyCapytain/resolvers/prototypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def getTextualNode(
"""
raise NotImplementedError()

def getSiblings(self, textId: str, subreference: Union[str, BaseReference]) -> Tuple[str, str]:
def getSiblings(self, textId: str, subreference: Union[str, BaseReference]) -> Tuple[BaseReference, BaseReference]:
""" Retrieve the siblings of a textual node
:param textId: CtsTextMetadata Identifier
Expand Down
2 changes: 1 addition & 1 deletion MyCapytain/resources/texts/base/tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __export__(self, output=Mimetypes.PLAINTEXT, exclude=None, _preformatted=Fal
reffs = self.getReffs(level=len(self.citation))
text = nested_ordered_dictionary()
for reff in reffs:
_r = reff.split(".")
_r = str(reff).split(".") # Only works for non range of course
nested_set(text, _r, self.getTextualNode(_r).export(
Mimetypes.PLAINTEXT,
exclude=exclude,
Expand Down
39 changes: 19 additions & 20 deletions MyCapytain/resources/texts/local/capitains/cts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from MyCapytain.errors import DuplicateReference, MissingAttribute, RefsDeclError, EmptyReference, CitationDepthError, MissingRefsDecl
from MyCapytain.common.utils import copyNode, passageLoop, normalizeXpath
from MyCapytain.common.constants import XPATH_NAMESPACES, RDF_NAMESPACES
from MyCapytain.common.reference._capitains_cts import CtsReference, URN, Citation
from MyCapytain.common.reference import CtsReference, URN, Citation, CtsReferenceSet

from MyCapytain.resources.prototypes import text
from MyCapytain.resources.texts.base.tei import TEIResource
Expand Down Expand Up @@ -68,7 +68,7 @@ def getTextualNode(self, subreference=None, simple=False):
else:
start, end = subreference.start.list, subreference.end.list

if len(start) > len(self.citation):
if len(start) > self.citation.root.depth:
raise CitationDepthError("URN is deeper than citation scheme")

if simple is True:
Expand Down Expand Up @@ -120,7 +120,7 @@ def _getSimplePassage(self, reference=None):
)

resource = self.resource.xpath(
self.citation[reference.depth()-1].fill(reference),
self.citation[reference.depth-1].fill(reference),
namespaces=XPATH_NAMESPACES
)

Expand All @@ -147,7 +147,7 @@ def textObject(self):
text = self
return text

def getReffs(self, level=1, subreference=None):
def getReffs(self, level=1, subreference=None) -> CtsReferenceSet:
""" CtsReference available at a given level
:param level: Depth required. If not set, should retrieve first encountered level (1 based)
Expand All @@ -166,7 +166,7 @@ def getReffs(self, level=1, subreference=None):
subreference = CtsReference(subreference)
return self.getValidReff(level, subreference)

def getValidReff(self, level=None, reference=None, _debug=False):
def getValidReff(self, level: int=None, reference: CtsReference=None, _debug: bool=False) -> CtsReferenceSet:
""" Retrieve valid passages directly
:param level: Depth required. If not set, should retrieve first encountered level (1 based)
Expand All @@ -176,7 +176,6 @@ def getValidReff(self, level=None, reference=None, _debug=False):
:param _debug: Check on passages duplicates
:type _debug: bool
:returns: List of levels
:rtype: list(basestring, str)
.. note:: GetValidReff works for now as a loop using CapitainsCtsPassage, subinstances of CtsTextMetadata, to retrieve the valid \
informations. Maybe something is more powerfull ?
Expand Down Expand Up @@ -213,7 +212,7 @@ def getValidReff(self, level=None, reference=None, _debug=False):
if level <= len(passages[0]) and reference is not None:
level = len(passages[0]) + 1
if level > len(self.citation):
return []
return CtsReferenceSet()

nodes = [None] * (level - depth)

Expand Down Expand Up @@ -256,7 +255,7 @@ def getValidReff(self, level=None, reference=None, _debug=False):
print(empties)
warnings.warn(message, EmptyReference)

return passages
return CtsReferenceSet([CtsReference(reff) for reff in passages])

def xpath(self, *args, **kwargs):
""" Perform XPath on the passage XML
Expand Down Expand Up @@ -305,7 +304,7 @@ def __init__(self, resource, reference, citation, text, urn=None):
self.__children__ = None
self.__depth__ = 0
if reference is not None:
self.__depth__ = len(reference)
self.__depth__ = reference.depth
self.__prevnext__ = None

@property
Expand Down Expand Up @@ -391,29 +390,29 @@ def siblingsId(self):
if self.__prevnext__ is not None:
return self.__prevnext__

document_references = list(map(str, self.__text__.getReffs(level=self.depth)))
document_references = self.__text__.getReffs(level=self.depth)

range_length = 1
if self.reference.end is not None:
range_length = len(self.getReffs())

start = document_references.index(str(self.reference.start))
start = document_references.index(self.reference.start)

if start == 0:
# If the passage is already at the beginning
_prev = None
elif start - range_length < 0:
_prev = CtsReference(document_references[0])
_prev = document_references[0]
else:
_prev = CtsReference(document_references[start - 1])
_prev = document_references[start - 1]

if start + 1 == len(document_references):
# If the passage is already at the end
_next = None
elif start + range_length > len(document_references):
_next = CtsReference(document_references[-1])
_next = document_references[-1]
else:
_next = CtsReference(document_references[start + 1])
_next = document_references[start + 1]

self.__prevnext__ = (_prev, _next)
return self.__prevnext__
Expand Down Expand Up @@ -533,10 +532,10 @@ def __init__(self, reference, urn=None, citation=None, resource=None, text=None)
self.__children__ = None
self.__depth__ = self.__depth_2__ = 1

if self.reference.start:
self.__depth_2__ = self.__depth__ = len(CtsReference(self.reference.start))
if self.reference and self.reference.end:
self.__depth_2__ = len(CtsReference(self.reference.end))
if self.reference and self.reference.start:
self.__depth_2__ = self.__depth__ = self.reference.start.depth
if self.reference.is_range() and self.reference.end:
self.__depth_2__ = self.reference.end.depth

self.__prevnext__ = None # For caching purpose

Expand Down Expand Up @@ -603,7 +602,7 @@ def siblingsId(self):
if self.__prevnext__:
return self.__prevnext__

document_references = list(map(str, self.__text__.getReffs(level=self.depth)))
document_references = self.__text__.getReffs(level=self.depth)

if self.reference.end:
start, end = self.reference.start, self.reference.end
Expand Down
Loading

0 comments on commit 6f0c245

Please sign in to comment.