-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
More robust labbcat parser and added MAUS parser
- Loading branch information
1 parent
8c87e9f
commit 9417a4a
Showing
12 changed files
with
307 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from polyglotdb.structure import Hierarchy | ||
|
||
from ..types.parsing import * | ||
|
||
from ..parsers import MausParser | ||
|
||
|
||
def inspect_maus(path): | ||
""" | ||
Generate an :class:`~polyglotdb.io.parsers.maus.MausParser` | ||
for a specified text file for parsing it as a MAUS file | ||
Parameters | ||
---------- | ||
path : str | ||
Full path to text file | ||
Returns | ||
------- | ||
:class:`~polyglotdb.io.parsers.maus.MausParser` | ||
Autodetected parser for MAUS TextGrids | ||
""" | ||
|
||
annotation_types = [OrthographyTier(MausParser.word_label, 'word'), | ||
OrthographyTier(MausParser.phone_label, 'phone')] | ||
|
||
annotation_types[0].label = True | ||
annotation_types[1].label = True | ||
hierarchy = Hierarchy({'phone': 'word', 'word': None}) | ||
|
||
return MausParser(annotation_types, hierarchy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from collections import Counter | ||
from textgrid import TextGrid, IntervalTier | ||
from .aligner import AlignerParser | ||
|
||
|
||
class MausParser(AlignerParser): | ||
name = 'Maus' | ||
word_label = 'ort' | ||
phone_label = 'mau' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
File type = "ooTextFile" | ||
Object class = "TextGrid" | ||
|
||
xmin = 0 | ||
xmax = 5 | ||
tiers? <exists> | ||
size = 3 | ||
item []: | ||
item [1]: | ||
class = "IntervalTier" | ||
name = "ORT" | ||
xmin = 0 | ||
xmax = 5 | ||
intervals: size = 6 | ||
intervals [1]: | ||
xmin = 0 | ||
xmax = 0.5359299313286737 | ||
text = "ASPIRIN" | ||
intervals [2]: | ||
xmin = 0.5359299313286737 | ||
xmax = 0.7277498521584862 | ||
text = "" | ||
intervals [3]: | ||
xmin = 0.7277498521584862 | ||
xmax = 1.5463606347329 | ||
text = "PLANET" | ||
intervals [4]: | ||
xmin = 1.5463606347329 | ||
xmax = 2.9389864791485665 | ||
text = "" | ||
intervals [5]: | ||
xmin = 2.9389864791485665 | ||
xmax = 4.29344727410947 | ||
text = "JURASSIC" | ||
intervals [6]: | ||
xmin = 4.29344727410947 | ||
xmax = 5 | ||
text = "" | ||
item [2]: | ||
class = "IntervalTier" | ||
name = "KAN" | ||
xmin = 0 | ||
xmax = 5 | ||
intervals: size = 6 | ||
intervals [1]: | ||
xmin = 0 | ||
xmax = 0.5359299313286737 | ||
text = "AESPERIHN" | ||
intervals [2]: | ||
xmin = 0.5359299313286737 | ||
xmax = 0.7277498521584862 | ||
text = "" | ||
intervals [3]: | ||
xmin = 0.7277498521584862 | ||
xmax = 1.5463606347329 | ||
text = "PLAENEHT" | ||
intervals [4]: | ||
xmin = 1.5463606347329 | ||
xmax = 2.9389864791485665 | ||
text = "" | ||
intervals [5]: | ||
xmin = 2.9389864791485665 | ||
xmax = 4.29344727410947 | ||
text = "JHERARSIHK" | ||
intervals [6]: | ||
xmin = 4.29344727410947 | ||
xmax = 5 | ||
text = "" | ||
item [3]: | ||
class = "IntervalTier" | ||
name = "MAU" | ||
xmin = 0 | ||
xmax = 5 | ||
intervals: size = 21 | ||
intervals [1]: | ||
xmin = 0 | ||
xmax = 0.08225269466768025 | ||
text = "AE" | ||
intervals [2]: | ||
xmin = 0.08225269466768025 | ||
xmax = 0.14677708921589916 | ||
text = "S" | ||
intervals [3]: | ||
xmin = 0.14677708921589916 | ||
xmax = 0.2024776349370112 | ||
text = "P" | ||
intervals [4]: | ||
xmin = 0.2024776349370112 | ||
xmax = 0.2874071798979148 | ||
text = "ER" | ||
intervals [5]: | ||
xmin = 0.2874071798979148 | ||
xmax = 0.4065291390638574 | ||
text = "IH" | ||
intervals [6]: | ||
xmin = 0.4065291390638574 | ||
xmax = 0.5359299313286737 | ||
text = "N" | ||
intervals [7]: | ||
xmin = 0.5359299313286737 | ||
xmax = 0.7277498521584862 | ||
text = "<p:>" | ||
intervals [8]: | ||
xmin = 0.7277498521584862 | ||
xmax = 0.8227025752063423 | ||
text = "P" | ||
intervals [9]: | ||
xmin = 0.8227025752063423 | ||
xmax = 0.9230738556146828 | ||
text = "L" | ||
intervals [10]: | ||
xmin = 0.9230738556146828 | ||
xmax = 1.0471592297458732 | ||
text = "AE" | ||
intervals [11]: | ||
xmin = 1.0471592297458732 | ||
xmax = 1.2057865782471762 | ||
text = "N" | ||
intervals [12]: | ||
xmin = 1.2057865782471762 | ||
xmax = 1.3988082713401389 | ||
text = "EH" | ||
intervals [13]: | ||
xmin = 1.3988082713401389 | ||
xmax = 1.5463606347329 | ||
text = "T" | ||
intervals [14]: | ||
xmin = 1.5463606347329 | ||
xmax = 2.9389864791485665 | ||
text = "<p:>" | ||
intervals [15]: | ||
xmin = 2.9389864791485665 | ||
xmax = 3.4038857361685495 | ||
text = "JH" | ||
intervals [16]: | ||
xmin = 3.4038857361685495 | ||
xmax = 3.594149976503041 | ||
text = "ER" | ||
intervals [17]: | ||
xmin = 3.594149976503041 | ||
xmax = 3.8572109696611645 | ||
text = "AE" | ||
intervals [18]: | ||
xmin = 3.8572109696611645 | ||
xmax = 4.023007695334491 | ||
text = "S" | ||
intervals [19]: | ||
xmin = 4.023007695334491 | ||
xmax = 4.186800389187662 | ||
text = "IH" | ||
intervals [20]: | ||
xmin = 4.186800389187662 | ||
xmax = 4.29344727410947 | ||
text = "K" | ||
intervals [21]: | ||
xmin = 4.29344727410947 | ||
xmax = 5 | ||
text = "<p:>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import pytest | ||
import os | ||
|
||
from polyglotdb.io import inspect_maus | ||
|
||
from polyglotdb import CorpusContext | ||
|
||
from polyglotdb.exceptions import TextGridError, GraphQueryError, ParseError | ||
from textgrid import TextGrid, IntervalTier | ||
from polyglotdb.io.parsers.mfa import MfaParser | ||
|
||
def test_load_aus(maus_test_dir, graph_db): | ||
with CorpusContext('test_mfa', **graph_db) as c: | ||
c.reset() | ||
testFilePath = os.path.join(maus_test_dir, "maus_test.TextGrid") | ||
parser = inspect_maus(testFilePath) | ||
print(parser.speaker_parser) | ||
c.load(parser, testFilePath) | ||
assert (c.hierarchy.has_type_property('word', 'transcription')) | ||
|
||
q = c.query_graph(c.word).filter(c.word.label == 'JURASSIC') | ||
print(q) | ||
print(q.all()) | ||
q = q.filter(c.word.speaker.name == 'maus') | ||
#print(c.word.speaker.name) | ||
print(q.all()) | ||
q = q.order_by(c.word.begin) | ||
print(q.all()) | ||
q = q.columns(c.word.label) | ||
print(q.all()) | ||
results = q.all() | ||
assert (len(results) == 1) | ||
|
||
c.encode_pauses('<SIL>') | ||
|
||
c.encode_utterances(min_pause_length=0) | ||
|
||
q = c.query_graph(c.word).filter(c.word.label == 'PLANET') | ||
q = q.filter(c.word.speaker.name == 'maus') | ||
q = q.order_by(c.word.begin) | ||
q = q.columns(c.word.label, c.word.following.label.column_name('following')) | ||
results = q.all() | ||
assert (len(results) == 1) | ||
assert (results[0]['following'] == 'JURASSIC') | ||
|
||
q = c.query_speakers().filter(c.speaker.name == 'maus') | ||
q = q.columns(c.speaker.discourses.name.column_name('discourses')) | ||
|
||
s = q.get() | ||
|
||
assert (len(s['discourses']) == 1) | ||
assert (s['discourses'] == ['maus_test']) | ||
|
||
|
||
def test_mismatch_parser(timit_test_dir, graph_db): | ||
with CorpusContext('test_mismatch', **graph_db) as c: | ||
c.reset() | ||
parser = inspect_maus(timit_test_dir) | ||
with pytest.raises(ParseError): | ||
c.load(parser, timit_test_dir) | ||
|
||
|