Skip to content

Commit

Permalink
Major improvements [ Fixed #42 #41 #9 #38 ] (#43)
Browse files Browse the repository at this point in the history
- Remove specificity of .texts for the resolver : now reusing .readableDescendants of .inventory
- Fixed this way the bug of #41 and #40 apparently
- Textgroup can now be removed  on parsing through .REMOVE_EMPTY constant
- Caching does not crash anymore (MyCapytain fix)

Fixed #42
Fixed #38 
Fixed #9
Fixed #41
  • Loading branch information
PonteIneptique committed Feb 23, 2017
1 parent 0abb239 commit 3b65b35
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 47 deletions.
55 changes: 27 additions & 28 deletions capitains_nautilus/cts/resolver.py
Expand Up @@ -36,6 +36,7 @@ class NautilusCTSResolver(CTSCapitainsLocalResolver):
"""
TIMEOUT = None
NautilusCTSResolver = False
REMOVE_EMPTY = True

def __init__(self, resource, name=None, logger=None, cache=None, dispatcher=None):
""" Initiate the XMLResolver
Expand Down Expand Up @@ -66,23 +67,18 @@ def __init__(self, resource, name=None, logger=None, cache=None, dispatcher=None

self.__cache__ = cache
self.__resources__ = resource
self.__parsed__ = False

self.inventory_cache_key = _cache_key("Nautilus", self.name, "Inventory", "Resources")
self.texts_metadata_cache_key = _cache_key("Nautilus", self.name, "Inventory", "TextsMetadata")
self.texts_parsed_cache_key = _cache_key("Nautilus", self.name, "Inventory", "TextsParsed")

# Parse if no Cache
# self.get_or(self.inventory_cache_key, self.parse)

@property
def cache(self):
return self.__cache__

@property
def inventory(self):
if self.__parsed__ is False:
self.__inventory__ = self.get_or(self.inventory_cache_key, self.parse, self.__resources__, ret="inventory")
if self.__inventory__ is None or len(self.__inventory__.readableDescendants) == 0:
self.__inventory__ = self.get_or(self.inventory_cache_key, self.parse, self.__resources__)
return self.__inventory__

@inventory.setter
Expand All @@ -96,14 +92,7 @@ def texts(self):
:rtype: list
"""
if self.__texts__ is None:
self.__texts__ = self.get_or(self.texts_metadata_cache_key, self.parse, self.__resources__, ret="texts")
return self.__texts__

@texts.setter
def texts(self, value):
self.__texts__ = value
self.cache.set(self.texts_metadata_cache_key, value)
return self.inventory.readableDescendants

def xmlparse(self, file):
""" Parse a XML file
Expand Down Expand Up @@ -159,16 +148,15 @@ def flush(self):
for text in self.texts:
self.cache.delete(_cache_key(self.texts_parsed_cache_key, str(text.id)))
self.cache.delete(self.inventory_cache_key)
self.cache.delete(self.texts_metadata_cache_key)

def parse(self, resource=None, ret="inventory"):
def parse(self, resource=None):
""" Parse a list of directories ans
:param resource: List of folders
:param ret: Return a specific item ("inventory" or "texts")
"""
if resource is None:
resource = self.__resources__
textlists = []
removing = []
for folder in resource:
textgroups = glob("{base_folder}/data/*/__cts__.xml".format(base_folder=folder))
for __cts__ in textgroups:
Expand Down Expand Up @@ -222,32 +210,43 @@ def parse(self, resource=None, ret="inventory"):
del t
__text__.citation = cites[-1]
self.logger.info("%s has been parsed ", __text__.path)
if __text__.citation.isEmpty() is False:
textlists.append(__text__)
else:
if __text__.citation.isEmpty() is True:
removing.append(__textkey__)
self.logger.error("%s has no passages", __text__.path)
except Exception as E:
removing.append(__textkey__)
self.logger.error(
"%s does not accept parsing at some level (most probably citation) ",
__text__.path
)
else:
removing.append(__textkey__)
self.logger.error("%s is not present", __text__.path)
except MyCapytain.errors.UndispatchedTextError as E:
self.logger.error("Error dispatching %s ", __cts__)
if self.RAISE_ON_UNDISPATCHED is True:
raise UndispatchedTextError(E)
except Exception as E:
print(E)
self.logger.error("Error parsing %s ", __cts__)

for removable in removing:
del self.dispatcher.collection[removable]

removing = []

if self.REMOVE_EMPTY is True:
# Find resource with no readable descendants
for item in self.dispatcher.collection.descendants:
if item.readable != True and len(item.readableDescendants) == 0:
removing.append(item.id)

# Remove them only if they have not been removed before
for removable in removing:
if removable in self.dispatcher.collection:
del self.dispatcher.collection[removable]

self.inventory = self.dispatcher.collection
self.texts = textlists
self.__parsed__ = True
if ret == "texts":
return self.texts
else:
return self.inventory
return self.inventory

def __getText__(self, urn):
""" Returns a PrototypeText object
Expand Down
4 changes: 2 additions & 2 deletions capitains_nautilus/manager.py
Expand Up @@ -60,8 +60,8 @@ def flush():
def parse():
""" Preprocess the inventory and cache it """
resolver.logger.setLevel(logging.INFO)
ret = resolver.parse(ret="texts")
click.echo("Preprocessed %s texts" % len(ret))
ret = resolver.parse()
click.echo("Preprocessed %s texts" % len(ret.readableDescendants))

@CLI.command()
def reset():
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Expand Up @@ -3,5 +3,5 @@ Flask>=0.12
Werkzeug>=0.11.3
redis>=2.10.5
flask-caching>=1.2.0
MyCapytain>=2.0.0b8
MyCapytain>=2.0.0b14
logassert
4 changes: 2 additions & 2 deletions setup.py
Expand Up @@ -2,15 +2,15 @@

setup(
name='capitains_nautilus',
version="1.0.0b2",
version="1.0.0b4",
description='Resolver for Capitains Guidelines Repository',
url='http://github.com/Capitains/nautilus',
author='Thibault Clerice',
author_email='leponteineptique@gmail.com',
license='MIT',
packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
install_requires=[
"MyCapytain>=2.0.0b13",
"MyCapytain>=2.0.0b17",
"tornado>=4.3",
"Flask>=0.12",
"Werkzeug>=0.11.3",
Expand Down
3 changes: 3 additions & 0 deletions tests/cts/test_resolver.py
Expand Up @@ -612,6 +612,7 @@ def dispatchGreekLit(collection, path=None, **kwargs):
["./tests/testing_data/latinLit2"],
dispatcher=dispatcher
)
resolver.REMOVE_EMPTY = False
resolver.parse()
latin_stuff = resolver.getMetadata("urn:perseus:latinLit")
greek_stuff = resolver.getMetadata("urn:perseus:greekLit")
Expand Down Expand Up @@ -668,6 +669,7 @@ def dispatchLatinLit(collection, path=None, **kwargs):
["./tests/testing_data/latinLit2"],
dispatcher=dispatcher
)
resolver.REMOVE_EMPTY = False
resolver.parse()
except UndispatchedTextError as E:
self.fail("UndispatchedTextError should not have been raised")
Expand Down Expand Up @@ -706,6 +708,7 @@ def dispatchGreekLit(collection, path=None, **kwargs):
["./tests/testing_data/latinLit2"],
dispatcher=dispatcher
)
resolver.REMOVE_EMPTY = False
resolver.parse()

all = resolver.getMetadata().export(Mimetypes.XML.CTS)
Expand Down
14 changes: 0 additions & 14 deletions tests/test_manager.py
Expand Up @@ -65,10 +65,6 @@ def test_flush_cache(self):
self.cache_manager.get(self.resolver.inventory_cache_key), None,
"There should not be inventory anymore in cache"
)
self.assertIs(
self.cache_manager.get(self.resolver.texts_metadata_cache_key), None,
"There should not be inventory anymore in cache"
)

def test_process_cache(self):
""" Simulate python manager.py
Expand All @@ -81,11 +77,6 @@ def test_process_cache(self):
True,
"There should not be inventory in cache"
)
self.assertEqual(
self.cache_manager.get(self.resolver.texts_metadata_cache_key) is None,
True,
"There should not be texts metadata in cache"
)

# Running the tested command
out = self.cmd("parse")
Expand All @@ -99,8 +90,3 @@ def test_process_cache(self):
True,
"There should be inventory in cache"
)
self.assertEqual(
len(self.cache_manager.get(self.resolver.texts_metadata_cache_key)) > 0,
True,
"There should be texts metadata in cache"
)

0 comments on commit 3b65b35

Please sign in to comment.