Skip to content

Commit

Permalink
Partial fix for #591
Browse files Browse the repository at this point in the history
Loads with spelling+transcription. An all-spelling corpus is
problematic.
  • Loading branch information
jsmackie committed Nov 26, 2016
1 parent 344d202 commit 88ecded
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 8 deletions.
14 changes: 12 additions & 2 deletions corpustools/corpus/classes/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,13 @@ def __init__(self, update=False, **kwargs):
self.descriptors.append('Frequency')
self.Frequency = 0

if self._transcription_name is None:
for d in self.descriptors:
if isinstance(getattr(self,d,None), Transcription):
self._transcription_name = d
break


def initDefaults(self):
for attribute, default_value in Word.word_attributes.items():
if isinstance(default_value, list):
Expand All @@ -1002,8 +1009,11 @@ def frequency(self):

@property
def transcription(self):
#return self._transcription
return getattr(self, self._transcription_name, self._transcription)
try:
value = getattr(self, self._transcription_name, self._transcription)
except (TypeError, AttributeError):
value = self.Transcription
return value

@transcription.setter
def transcription(self, value):
Expand Down
5 changes: 2 additions & 3 deletions corpustools/corpus/classes/spontaneous.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

from collections import OrderedDict

from .lexicon import Transcription, Corpus, Attribute
from .lexicon import Transcription, Corpus, Attribute, Word

import os
import wave
Expand Down Expand Up @@ -359,7 +359,7 @@ def find_wordtype(self, wordtype):
return list(x for x in self if x.wordtype == wordtype)


class WordToken(object):
class WordToken():
"""
WordToken objects are individual productions of Words
Expand Down Expand Up @@ -434,7 +434,6 @@ def __init__(self,**kwargs):
self._transcription = None
self._freq_names = ['abs_freq', 'freq_per_mil', 'sfreq', 'lowercase_freq', 'log10_freq']


for key, value in kwargs.items():
if not all([letter.isupper() for letter in key]):
key = key.capitalize()
Expand Down
6 changes: 5 additions & 1 deletion corpustools/corpus/io/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c

discourse = Discourse(discourse_kwargs)

if not 'frequency' in [a.name.lower() for a in discourse.lexicon.attributes]:
if not 'Frequency' in [a.name for a in discourse.lexicon.attributes]:
# running text will not have a frequency attribute supplied by the user
# textgrids are also unlikely to have this attribute
discourse.lexicon.add_attribute(Attribute('frequency', 'numeric', 'Frequency'))
Expand All @@ -479,6 +479,10 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c
try:
#word_kwargs[at.output_name] = (at.attribute, annotations[at][n][0])
word_kwargs[at.attribute.name] = (at.attribute, annotations[at][n][0])
if at.attribute.att_type == 'tier':
#word_kwargs['_transcription_name'] = at.attribute.name
print('found a tier')
print(at.attribute.name)
except IndexError:
#word_kwargs[at.output_name] = (at.attribute, None)
word_kwargs[at.attribute.name] = (at.attribute, None)
Expand Down
7 changes: 5 additions & 2 deletions corpustools/corpus/io/textgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class PCTTextGrid(TextGrid):
def __init__(self):
super().__init__()

def name_filter(self,name):
return name.capitalize() if not all([x.isupper() for x in name]) else name

def read(self, f):
"""
Read the tiers contained in the Praat-formated TextGrid file
Expand All @@ -35,7 +38,7 @@ def read(self, f):
source.readline()
if source.readline().rstrip().split()[2] == '"IntervalTier"':
inam = source.readline().rstrip().split(' = ')[1].strip('"')
inam = inam.lower()
inam = self.name_filter(inam)
imin = round(float(source.readline().rstrip().split()[2]), 5)
imax = round(float(source.readline().rstrip().split()[2]), 5)
itie = IntervalTier(inam)
Expand All @@ -49,7 +52,7 @@ def read(self, f):
self.append(itie)
else: # pointTier
inam = source.readline().rstrip().split(' = ')[1].strip('"')
inam = inam.lower()
inam = self.name_filter(inam)
imin = round(float(source.readline().rstrip().split()[2]), 5)
imax = round(float(source.readline().rstrip().split()[2]), 5)
itie = PointTier(inam)
Expand Down

0 comments on commit 88ecded

Please sign in to comment.