Skip to content

Commit

Permalink
Minor bug fixes for running text
Browse files Browse the repository at this point in the history
  • Loading branch information
jsmackie committed Nov 28, 2016
1 parent 907be02 commit d8a512e
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 34 deletions.
27 changes: 17 additions & 10 deletions corpustools/corpus/classes/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,11 +968,12 @@ def __init__(self, update=False, **kwargs):
if key not in self.descriptors:
self.descriptors.append(key)

if self._spelling is None and self._transcription is None:
if self.spelling is None and self.transcription is None:
raise(ValueError('Words must be specified with at least a spelling or a transcription.'))
if self._spelling is None:
if self.spelling is None:
self.Spelling = ''.join(map(str,self._transcription))
self._spelling = self.Spelling
self._spelling_name = 'Spelling'
if not 'Spelling' in self.descriptors:
self.descriptors.append('Spelling')
if not 'Frequency' in self.descriptors:
Expand All @@ -984,7 +985,8 @@ def __init__(self, update=False, **kwargs):
if isinstance(getattr(self,d,None), Transcription):
self._transcription_name = d
break

else:
self._transcription = None

def initDefaults(self):
for attribute, default_value in Word.word_attributes.items():
Expand Down Expand Up @@ -1012,27 +1014,32 @@ def transcription(self):
try:
value = getattr(self, self._transcription_name, self._transcription)
except (TypeError, AttributeError):
value = self.Transcription
value = None #transcription doesn't exist
return value

@transcription.setter
def transcription(self, value):
setattr(self, self._transcription_name, value)
#self._transcription = value
if self._transcription_name is not None:
setattr(self, self._transcription_name, value)
self._transcription = value

@transcription.deleter
def transcription(self):
del self._transcription

@property
def spelling(self):
#return self._spelling
return getattr(self, self._spelling_name, self._transcription)
try:
value = getattr(self, self._spelling_name, self._spelling)
except (TypeError, AttributeError):
value = None #spelling doesn't exist
return value

@spelling.setter
def spelling(self, value):
setattr(self, self._spelling_name, value)
#self._spelling = value
if self._spelling_name is not None:
setattr(self, self._spelling_name, value)
self._spelling = value

@spelling.deleter
def spelling(self):
Expand Down
4 changes: 2 additions & 2 deletions corpustools/corpus/classes/spontaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,15 +448,15 @@ def __init__(self,**kwargs):
setattr(self, '_frequency', value)
elif att.att_type == 'tier':
value = Transcription(value)
#self._transcription = value
self._transcription = value

setattr(self, key, value)

elif isinstance(value, list):
# probably a transcription
value = Transcription(value)
setattr(self, key, value)
#self._transcription = value
self._transcription = value

elif isinstance(value, str):
try:
Expand Down
12 changes: 3 additions & 9 deletions corpustools/corpus/io/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c

ind = 0
limit = max([len(list(v)) for v in annotations.values()])
for n in range(limit):#len(list(annotations.values())[0])):
for n in range(limit):
if stop_check is not None and stop_check():
return
if call_back is not None:
Expand All @@ -477,14 +477,9 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c
continue
else:
try:
#word_kwargs[at.output_name] = (at.attribute, annotations[at][n][0])
word_kwargs[at.attribute.name] = (at.attribute, annotations[at][n][0])
except IndexError:
#word_kwargs[at.output_name] = (at.attribute, None)
word_kwargs[at.attribute.name] = (at.attribute, None)
#word_kwargs[at.output_name] = (at.attribute, annotations[at][n][0])
# word_kwargs = {at.output_name: (at.attribute, annotations[at][n][0])
# for at in annotations if not at.token and not at.ignored}
word = Word(**word_kwargs)
try:
word = discourse.lexicon.find(word.spelling)
Expand All @@ -509,14 +504,13 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c
word_token_kwargs['end'] = end if end is not None else ind + 1
if at.token:
word_token_kwargs['_transcription'] = (at.attribute, annotations[at][n][0])
# word_token_kwargs['begin'] = begin if begin is not None else ind
# word_token_kwargs['end'] = end if end is not None else ind + 1
word_token_kwargs['begin'] = begin if begin is not None else ind
word_token_kwargs['end'] = end if end is not None else ind + 1
word_token = WordToken(**word_token_kwargs)
discourse.add_word(word_token)
if any(a.token for a in annotations):
word.wordtokens.append(word_token)
ind += 1

return discourse

def data_to_discourse(data, lexicon = None, call_back=None, stop_check=None):
Expand Down
12 changes: 6 additions & 6 deletions corpustools/corpus/io/text_spelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def inspect_discourse_spelling(path, support_corpus_path = None):
list of AnnotationTypes
Autodetected AnnotationTypes for the text file
"""
a = AnnotationType('spelling', None, None, anchor = True, token = False)
a = AnnotationType('Spelling', None, None, anchor = True, token = False)
if os.path.isdir(path):
for root, subdirs, files in os.walk(path):
for filename in files:
Expand All @@ -46,7 +46,7 @@ def inspect_discourse_spelling(path, support_corpus_path = None):
a.add(trial, save = False)
annotation_types = [a]
if support_corpus_path is not None:
annotation_types += [AnnotationType('transcription', None, None, base = True)]
annotation_types += [AnnotationType('Transcription', None, None, base = True)]
return annotation_types

def spelling_text_to_data(corpus_name, path, annotation_types = None,
Expand All @@ -59,9 +59,9 @@ def spelling_text_to_data(corpus_name, path, annotation_types = None,
support = load_binary(support_corpus_path)
if annotation_types is None:
annotation_types = inspect_discourse_spelling(path, support_corpus_path)

for a in annotation_types:
a.reset()

data = DiscourseData(name, annotation_types)

lines = text_to_lines(path)
Expand All @@ -82,7 +82,7 @@ def spelling_text_to_data(corpus_name, path, annotation_types = None,
annotations = {}
for word in line:
spell = word.strip()
spell = ''.join(x for x in spell if not x in data['spelling'].ignored_characters)
spell = ''.join(x for x in spell if not x in data['Spelling'].ignored_characters)
if spell == '':
continue
word = Annotation(spell)
Expand All @@ -99,7 +99,7 @@ def spelling_text_to_data(corpus_name, path, annotation_types = None,
word.begins.append(level_count)
word.ends.append(level_count + len(tier_elements))
annotations[n] = tier_elements
annotations['spelling'] = [word]
annotations['Spelling'] = [word]
data.add_annotations(**annotations)

return data
Expand Down Expand Up @@ -163,7 +163,7 @@ def load_directory_spelling(corpus_name, path, annotation_types = None,
return corpus

def load_discourse_spelling(corpus_name, path, annotation_types = None,
lexicon = None,
lexicon = None, feature_system_path=None,
support_corpus_path = None, ignore_case = False,
stop_check = None, call_back = None):
"""
Expand Down
10 changes: 5 additions & 5 deletions corpustools/corpus/io/text_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def inspect_discourse_transcription(path):
trans_delimiters = ['.', ';', ',']

att = Attribute('transcription','tier','Transcription')
a = AnnotationType('transcription', None, None, attribute = att,
a = AnnotationType('Transcription', None, None, attribute = att,
base = True)

if os.path.isdir(path):
Expand Down Expand Up @@ -75,8 +75,8 @@ def transcription_text_to_data(corpus_name, path, annotation_types = None,

for a in annotation_types:
a.reset()
a = AnnotationType('spelling', None, None,
attribute = Attribute('spelling','spelling','Spelling'), anchor = True)
a = AnnotationType('Spelling', None, None,
attribute = Attribute('Spelling','spelling','Spelling'), anchor = True)

annotation_types.append(a)

Expand All @@ -88,7 +88,7 @@ def transcription_text_to_data(corpus_name, path, annotation_types = None,
call_back(0, len(lines))
cur = 0
trans_check = False
n = 'transcription'
n = 'Transcription'

for line in lines:
if stop_check is not None and stop_check():
Expand Down Expand Up @@ -118,7 +118,7 @@ def transcription_text_to_data(corpus_name, path, annotation_types = None,
tier_elements[0].begin = level_count
tier_elements[-1].end = level_count + len(tier_elements)
annotations[n] = tier_elements
annotations['spelling'] = [word]
annotations['Spelling'] = [word]
data.add_annotations(**annotations)
#if data[n].delimiter and not trans_check:
# raise(DelimiterError('The transcription delimiter specified does not create multiple segments. Please specify another delimiter.'))
Expand Down
2 changes: 1 addition & 1 deletion corpustools/gui/featuregui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,7 @@ def __init__(self, parent, settings, current_system):
layout.addWidget(acFrame)

note = QLabel('This window is only for adding and removing transcription/features systems.\n'
'When loading a corpus, you will be asked which one of these systems you want to use with your'
'When loading a corpus, you will be asked which one of these systems you want to use with your '
'corpus. If you have an existing corpus and you want to change systems, go to '
'the Features menu and select View/change feature system...')
note.setWordWrap(True)
Expand Down
3 changes: 2 additions & 1 deletion corpustools/gui/iogui.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,8 @@ def generateKwargs(self):
kwargs['word_path'] = kwargs.pop('path')
kwargs['phone_path'] = phone_path

if not any(['Transcription' in x.name for x in kwargs['annotation_types']]):
if (not self.textType == 'spelling' and
not any(['Transcription' in x.name for x in kwargs['annotation_types']])):
alert = QMessageBox()
alert.setWindowTitle('No transcription selected')
alert.setText('You did not select any transcription column for your corpus. '
Expand Down

0 comments on commit d8a512e

Please sign in to comment.