Minor bug fixes for running text

PhonologicalCorpusTools · Nov 28, 2016 · d8a512e · d8a512e
1 parent 907be02
commit d8a512e
Show file tree

Hide file tree

Showing 7 changed files with 36 additions and 34 deletions.
diff --git a/corpustools/corpus/classes/lexicon.py b/corpustools/corpus/classes/lexicon.py
@@ -968,11 +968,12 @@ def __init__(self, update=False, **kwargs):
             if key not in self.descriptors:
                 self.descriptors.append(key)
 
-        if self._spelling is None and self._transcription is None:
+        if self.spelling is None and self.transcription is None:
             raise(ValueError('Words must be specified with at least a spelling or a transcription.'))
-        if self._spelling is None:
+        if self.spelling is None:
             self.Spelling = ''.join(map(str,self._transcription))
             self._spelling = self.Spelling
+            self._spelling_name = 'Spelling'
             if not 'Spelling' in self.descriptors:
                 self.descriptors.append('Spelling')
         if not 'Frequency' in self.descriptors:
@@ -984,7 +985,8 @@ def __init__(self, update=False, **kwargs):
                 if isinstance(getattr(self,d,None), Transcription):
                     self._transcription_name = d
                     break
-
+            else:
+                self._transcription = None
 
     def initDefaults(self):
         for attribute, default_value in Word.word_attributes.items():
@@ -1012,27 +1014,32 @@ def transcription(self):
         try:
             value = getattr(self, self._transcription_name, self._transcription)
         except (TypeError, AttributeError):
-            value = self.Transcription
+            value = None #transcription doesn't exist
         return value
 
     @transcription.setter
     def transcription(self, value):
-        setattr(self, self._transcription_name, value)
-        #self._transcription = value
+        if self._transcription_name is not None:
+            setattr(self, self._transcription_name, value)
+        self._transcription = value
 
     @transcription.deleter
     def transcription(self):
         del self._transcription
 
     @property
     def spelling(self):
-        #return self._spelling
-        return getattr(self, self._spelling_name, self._transcription)
+        try:
+            value = getattr(self, self._spelling_name, self._spelling)
+        except (TypeError, AttributeError):
+            value = None #spelling doesn't exist
+        return value
 
     @spelling.setter
     def spelling(self, value):
-        setattr(self, self._spelling_name, value)
-        #self._spelling = value
+        if self._spelling_name is not None:
+            setattr(self, self._spelling_name, value)
+        self._spelling = value
 
     @spelling.deleter
     def spelling(self):

diff --git a/corpustools/corpus/classes/spontaneous.py b/corpustools/corpus/classes/spontaneous.py
@@ -448,15 +448,15 @@ def __init__(self,**kwargs):
                         setattr(self, '_frequency', value)
                 elif att.att_type == 'tier':
                     value = Transcription(value)
-                    #self._transcription = value
+                    self._transcription = value
 
                 setattr(self, key, value)
 
             elif isinstance(value, list):
                 # probably a transcription
                 value = Transcription(value)
                 setattr(self, key, value)
-                #self._transcription = value
+                self._transcription = value
 
             elif isinstance(value, str):
                 try:

diff --git a/corpustools/corpus/io/helper.py b/corpustools/corpus/io/helper.py
@@ -464,7 +464,7 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c
 
     ind = 0
     limit = max([len(list(v)) for v in annotations.values()])
-    for n in range(limit):#len(list(annotations.values())[0])):
+    for n in range(limit):
         if stop_check is not None and stop_check():
             return
         if call_back is not None:
@@ -477,14 +477,9 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c
                 continue
             else:
                 try:
-                    #word_kwargs[at.output_name] = (at.attribute, annotations[at][n][0])
                     word_kwargs[at.attribute.name] = (at.attribute, annotations[at][n][0])
                 except IndexError:
-                    #word_kwargs[at.output_name] = (at.attribute, None)
                     word_kwargs[at.attribute.name] = (at.attribute, None)
-                #word_kwargs[at.output_name] = (at.attribute, annotations[at][n][0])
-        # word_kwargs = {at.output_name: (at.attribute, annotations[at][n][0])
-        #                for at in annotations if not at.token and not at.ignored}
         word = Word(**word_kwargs)
         try:
             word = discourse.lexicon.find(word.spelling)
@@ -509,14 +504,13 @@ def data_to_discourse2(corpus_name=None, wav_path=None, annotation_types=None, c
                     word_token_kwargs['end'] = end if end is not None else ind + 1
                 if at.token:
                     word_token_kwargs['_transcription'] = (at.attribute, annotations[at][n][0])
-        # word_token_kwargs['begin'] = begin if begin is not None else ind
-        # word_token_kwargs['end'] = end if end is not None else ind + 1
+        word_token_kwargs['begin'] = begin if begin is not None else ind
+        word_token_kwargs['end'] = end if end is not None else ind + 1
         word_token = WordToken(**word_token_kwargs)
         discourse.add_word(word_token)
         if any(a.token for a in annotations):
             word.wordtokens.append(word_token)
         ind += 1
-
     return discourse
 
 def data_to_discourse(data, lexicon = None, call_back=None, stop_check=None):

diff --git a/corpustools/corpus/io/text_spelling.py b/corpustools/corpus/io/text_spelling.py
@@ -26,7 +26,7 @@ def inspect_discourse_spelling(path, support_corpus_path = None):
     list of AnnotationTypes
         Autodetected AnnotationTypes for the text file
     """
-    a = AnnotationType('spelling', None, None, anchor = True, token = False)
+    a = AnnotationType('Spelling', None, None, anchor = True, token = False)
     if os.path.isdir(path):
         for root, subdirs, files in os.walk(path):
             for filename in files:
@@ -46,7 +46,7 @@ def inspect_discourse_spelling(path, support_corpus_path = None):
                 a.add(trial, save = False)
     annotation_types = [a]
     if support_corpus_path is not None:
-        annotation_types += [AnnotationType('transcription', None, None, base = True)]
+        annotation_types += [AnnotationType('Transcription', None, None, base = True)]
     return annotation_types
 
 def spelling_text_to_data(corpus_name, path, annotation_types = None,
@@ -59,9 +59,9 @@ def spelling_text_to_data(corpus_name, path, annotation_types = None,
         support = load_binary(support_corpus_path)
     if annotation_types is None:
         annotation_types = inspect_discourse_spelling(path, support_corpus_path)
-
     for a in annotation_types:
         a.reset()
+
     data = DiscourseData(name, annotation_types)
 
     lines = text_to_lines(path)
@@ -82,7 +82,7 @@ def spelling_text_to_data(corpus_name, path, annotation_types = None,
         annotations = {}
         for word in line:
             spell = word.strip()
-            spell = ''.join(x for x in spell if not x in data['spelling'].ignored_characters)
+            spell = ''.join(x for x in spell if not x in data['Spelling'].ignored_characters)
             if spell == '':
                 continue
             word = Annotation(spell)
@@ -99,7 +99,7 @@ def spelling_text_to_data(corpus_name, path, annotation_types = None,
                 word.begins.append(level_count)
                 word.ends.append(level_count + len(tier_elements))
                 annotations[n] = tier_elements
-            annotations['spelling'] = [word]
+            annotations['Spelling'] = [word]
             data.add_annotations(**annotations)
 
     return data
@@ -163,7 +163,7 @@ def load_directory_spelling(corpus_name, path, annotation_types = None,
     return corpus
 
 def load_discourse_spelling(corpus_name, path, annotation_types = None,
-                            lexicon = None,
+                            lexicon = None, feature_system_path=None,
                             support_corpus_path = None, ignore_case = False,
                             stop_check = None, call_back = None):
     """

diff --git a/corpustools/corpus/io/text_transcription.py b/corpustools/corpus/io/text_transcription.py
@@ -30,7 +30,7 @@ def inspect_discourse_transcription(path):
     trans_delimiters = ['.', ';', ',']
 
     att = Attribute('transcription','tier','Transcription')
-    a = AnnotationType('transcription', None, None, attribute = att,
+    a = AnnotationType('Transcription', None, None, attribute = att,
                                             base = True)
 
     if os.path.isdir(path):
@@ -75,8 +75,8 @@ def transcription_text_to_data(corpus_name, path, annotation_types = None,
 
     for a in annotation_types:
         a.reset()
-    a = AnnotationType('spelling', None, None,
-                attribute = Attribute('spelling','spelling','Spelling'), anchor = True)
+    a = AnnotationType('Spelling', None, None,
+                attribute = Attribute('Spelling','spelling','Spelling'), anchor = True)
 
     annotation_types.append(a)
 
@@ -88,7 +88,7 @@ def transcription_text_to_data(corpus_name, path, annotation_types = None,
         call_back(0, len(lines))
         cur = 0
     trans_check = False
-    n = 'transcription'
+    n = 'Transcription'
 
     for line in lines:
         if stop_check is not None and stop_check():
@@ -118,7 +118,7 @@ def transcription_text_to_data(corpus_name, path, annotation_types = None,
             tier_elements[0].begin = level_count
             tier_elements[-1].end = level_count + len(tier_elements)
             annotations[n] = tier_elements
-            annotations['spelling'] = [word]
+            annotations['Spelling'] = [word]
             data.add_annotations(**annotations)
     #if data[n].delimiter and not trans_check:
     #    raise(DelimiterError('The transcription delimiter specified does not create multiple segments. Please specify another delimiter.'))

diff --git a/corpustools/gui/featuregui.py b/corpustools/gui/featuregui.py
@@ -1202,7 +1202,7 @@ def __init__(self, parent, settings, current_system):
         layout.addWidget(acFrame)
 
         note = QLabel('This window is only for adding and removing transcription/features systems.\n'
-                      'When loading a corpus, you will be asked which one of these systems you want to use with your'
+                      'When loading a corpus, you will be asked which one of these systems you want to use with your '
                       'corpus. If you have an existing corpus and you want to change systems, go to '
                       'the Features menu and select View/change feature system...')
         note.setWordWrap(True)

diff --git a/corpustools/gui/iogui.py b/corpustools/gui/iogui.py
@@ -924,7 +924,8 @@ def generateKwargs(self):
                 kwargs['word_path'] = kwargs.pop('path')
                 kwargs['phone_path'] = phone_path
 
-        if not any(['Transcription' in x.name for x in kwargs['annotation_types']]):
+        if (not self.textType == 'spelling' and
+                not any(['Transcription' in x.name for x in kwargs['annotation_types']])):
             alert = QMessageBox()
             alert.setWindowTitle('No transcription selected')
             alert.setText('You did not select any transcription column for your corpus. '