Skip to content

Commit

Permalink
Added test for language attribute
Browse files Browse the repository at this point in the history
Wrote unit tests for this new test

Updated old unit tests to work with the new output
  • Loading branch information
sonofmun committed Apr 5, 2017
1 parent 0ae34a5 commit dcfc30a
Show file tree
Hide file tree
Showing 11 changed files with 14,020 additions and 8 deletions.
5 changes: 5 additions & 0 deletions HookTest/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ def unit(self, filepath):
additional["citations"] = unit.citation
additional["duplicates"] = unit.duplicates
additional["forbiddens"] = unit.forbiddens
additional['language'] = unit.lang
if self.countwords:
additional["words"] = unit.count
return self.cover(filepath, results, testtype=texttype, logs=logs, additional=additional), filepath, additional
Expand Down Expand Up @@ -484,6 +485,7 @@ def end(self):
"""
total_units = 0
total_words = 0
language_words = defaultdict(int)
show = list(HookTest.units.CTSUnit.readable.values())
if not self.verbose:
show.remove("Duplicate passages")
Expand Down Expand Up @@ -533,6 +535,7 @@ def end(self):
for x in unit.additional['citations']:
total_units += x[1]
total_words += unit.additional['words']
language_words[unit.additional['language']] += unit.additional['words']
else:
display_table = PT(["Identifier", "Nodes", "Failed Tests"])
display_table.align['Identifier', 'Nodes', "Failed Tests"] = "c"
Expand Down Expand Up @@ -590,6 +593,8 @@ def end(self):
results_table.add_row(["Total Citation Units", "{:,}".format(total_units)])
if self.countwords is True:
results_table.add_row(["Total Words", "{:,}".format(total_words)])
for l, words in language_words.items():
results_table.add_row(["Words in {}".format(l.upper()), "{:,}".format(words)])
print(results_table, flush=True)
#print(black('#*# texts={texts} texts_passing={t_pass} metadata={meta} metadata_passing={m_pass} coverage_units={cov} total_nodes={nodes} words={words}'.format(
# texts=num_texts, t_pass=t_pass, meta=self.m_files, m_pass=self.m_passing, cov=cov, nodes="{:,}".format(total_units), words="{:,}".format(total_words))))
Expand Down
32 changes: 30 additions & 2 deletions HookTest/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ class CTSUnit(TESTUnit):
"unique_passage", "inventory" ) yield at least one boolean (might be more) which represents the success of it.
"""

tests = ["parsable", "has_urn", "naming_convention", "refsDecl", "passages", "unique_passage", "inventory", "duplicate", "forbidden"]
tests = ["parsable", "has_urn", "naming_convention", "refsDecl", "passages", "unique_passage", "inventory", "duplicate", "forbidden", 'language']
readable = {
"parsable": "File parsing",
"refsDecl": "RefsDecl parsing",
Expand All @@ -396,7 +396,8 @@ class CTSUnit(TESTUnit):
"naming_convention": "Naming conventions",
"inventory": "Available in inventory",
"unique_passage": "Unique nodes found by XPath",
"count_words": "Word Counting"
"count_words": "Word Counting",
"language": "Correct xml:lang attribute"
}
splitter = re.compile(r'\S+', re.MULTILINE)

Expand All @@ -411,6 +412,7 @@ def __init__(self, path, countwords=False, *args, **kwargs):
self.duplicates = list()
self.forbiddens = list()
self.test_status = defaultdict(bool)
self.lang = ''
super(CTSUnit, self).__init__(path, *args, **kwargs)

def parsable(self):
Expand Down Expand Up @@ -630,6 +632,32 @@ def count_words(self):
status = self.count > 0
yield status

def language(self):
""" Tests to make sure an xml:lang element is on the correct node
"""
if self.scheme == "epidoc":
try:
self.lang = self.xml.xpath('/tei:TEI/tei:text/tei:body/tei:div[@type="edition" or @type="translation"]',
namespaces=TESTUnit.NS)[0].get('{http://www.w3.org/XML/1998/namespace}lang')
except:
self.lang = ''
if self.lang == '' or self.lang is None:
self.lang = 'UNK'
yield False
else:
yield True
elif self.scheme == "tei":
try:
self.lang = self.xml.xpath('/tei:TEI/tei:text/tei:body',
namespaces=TESTUnit.NS)[0].get('{http://www.w3.org/XML/1998/namespace}lang')
except:
self.lang = ''
if self.lang == '' or self.lang is None:
self.lang = 'UNK'
yield False
else:
yield True

def test(self, scheme, inventory=None):
""" Test a file with various checks
Expand Down
Loading

0 comments on commit dcfc30a

Please sign in to comment.