Switch from pygettext to xgettext (#248)

* Switch from pygettext to xgettext This commit switches the update_translations.py script to use xgettext instead of pygettext, mostly because xgettext is installed system wide, while pygettext moves around, especially when updating python versions. It also contains a few tweaks to the Naomi TTI system, which was scoring Hacker News higher than News when responding to the word "news" because every template in Hacker News contains the word news while one of the templates in News does not. I have therefore started penalizing the intent proportional to the weight of words that do not appear in the incoming request, which seems to work well so far, and penalizes "Hacker News" into a lower ranking if the word "Hacker" is not detected in the incoming request. * Skip WER if transcription is blank jiWER suddenly is throwing an error if the transcription is blank, so skip it. * Fixes to update_translations.py Fixed a couple of bugs introduced when updating update_translations.py to python3, and added some fixes to hopefully prevent msgcat from duplicating the header information when updating translations. * Flake8 issues Fixed some formatting issues, unused variables, unused import * Codacy subprocess complaint Codacy does not like subprocess. Added a nosec check. * Fixed plurals Fixed a bug which was preventing the plurals information from being written to the .po files * Better recovery after canceling setup I started setting up Naomi, but canceled the process before it completed. At that point, it had built the Pocketsphinx model, but had not set the locations of the hmm dir or fst file in the profile. Because it did not have to create them the next time I ran it, it did not set the default location of the fst file. * STTTrainer fix for new intents This fixes the Adapt Pocketsphinx STTTrainer plugin to extract intent templates from the new intent structure.
NaomiProject · Apr 8, 2020 · f18c800 · f18c800
1 parent faffb05
commit f18c800
Show file tree

Hide file tree

Showing 10 changed files with 235 additions and 85 deletions.
diff --git a/NaomiSTTTrainer.py b/NaomiSTTTrainer.py
@@ -484,10 +484,12 @@ def application(environ, start_response):
                                     recording_type = "unclear"
                                     print("Setting recording_type to unclear")
                             # calculate the word error rate
-                            WER = wer(
-                                transcription,
-                                verified_transcription
-                            )
+                            WER = 0
+                            if(len(transcription) > 0):
+                                WER = wer(
+                                    transcription,
+                                    verified_transcription
+                                )
                             c.execute(
                                 " ".join([
                                     "update audiolog set ",
@@ -947,6 +949,9 @@ def application(environ, start_response):
                         '<body>SQLite error: {}</body>',
                         '</html>'
                     ]).format(e))
+        # Save (commit) the changes
+        conn.commit()
+        conn.close()
         return [line.encode("UTF-8") for line in ret]
 
 

diff --git a/naomi/application.py b/naomi/application.py
@@ -1281,7 +1281,7 @@ def install_plugins(self, plugins):
                         else:
                             required_file = os.path.join(
                                 install_dir,
-                                "python_required.txt"
+                                "python_requirements.txt"
                             )
                             if os.path.isfile(required_file):
                                 # Install any python packages required

diff --git a/naomi/commandline.py b/naomi/commandline.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from blessings import Terminal
 from getpass import getpass
 from naomi import app_utils

diff --git a/plugins/speechhandler/hackernews/hackernews.py b/plugins/speechhandler/hackernews/hackernews.py
@@ -36,7 +36,8 @@ def intents(self):
                         'templates': [
                             "READ HACKER NEWS",
                             "WHAT IS IN HACKER NEWS",
-                            "WHAT ARE THE HACKER NEWS HEADLINES"
+                            "WHAT ARE THE HACKER NEWS HEADLINES",
+                            "WHAT IS HAPPENING IN HACKER NEWS"
                         ]
                     },
                     'fr-FR': {

diff --git a/plugins/speechhandler/news/news.py b/plugins/speechhandler/news/news.py
@@ -47,27 +47,45 @@ def intents(self):
             'NewsIntent': {
                 'locale': {
                     'en-US': {
+                        'keywords': {
+                            'NewsKeyword': [
+                                'NEWS',
+                                'HEADLINES'
+                            ]
+                        },
                         'templates': [
-                            "READ THE NEWS",
-                            "WHAT IS IN THE NEWS",
-                            "WHAT IS HAPPENING IN THE NEWS",
-                            "WHAT ARE TODAY'S HEADLINES"
+                            "READ THE {NewsKeyword}",
+                            "WHAT IS IN THE {NewsKeyword}",
+                            "WHAT IS HAPPENING IN THE {NewsKeyword}",
+                            "WHAT ARE TODAY'S {NewsKeyword}"
                         ]
                     },
                     'fr-FR': {
+                        'keywords': {
+                            'NewsKeyword': [
+                                "NOUVELLES",
+                                "TITRES D'AUJOURD'HUI"
+                            ]
+                        },
                         'templates': [
-                            "LIRE LES NOUVELLES",
-                            "CE QUI EST DANS LES NOUVELLES",
-                            "CE QUI SE PASSE DANS LES NOUVELLES",
-                            "QUELS SONT LES TITRES D'AUJOURD'HUI"
+                            "LIRE LES {NewsKeyword}",
+                            "CE QUI EST DANS LES {NewsKeyword}",
+                            "CE QUI SE PASSE DANS LES {NewsKeyword}",
+                            "QUELS SONT LES {NewsKeyword}"
                         ]
                     },
                     'de-DE': {
+                        'keywords':{
+                            'NewsKeyword': [
+                                "NACHRICHTEN",
+                                "SCHLAGZEILEN"
+                            ]
+                        },
                         'templates': [
-                            "LIES DIE NACHRICHTEN",
-                            "WAS IST IN DEN NACHRICHTEN",
-                            "WAS PASSIERT IN DEN NACHRICHTEN",
-                            "WAS SIND HEUTE SCHLAGZEILEN"
+                            "LIES DIE {NewsKeyword}",
+                            "WAS IST IN DEN {NewsKeyword}",
+                            "WAS PASSIERT IN DEN {NewsKeyword}",
+                            "WAS SIND HEUTE {NewsKeyword}"
                         ]
                     }
                 },

diff --git a/plugins/speechhandler/wwis_weather/wwis_weather.py b/plugins/speechhandler/wwis_weather/wwis_weather.py
@@ -313,15 +313,15 @@ def handle(self, intent, mic):
                 tomorrow.month,
                 tomorrow.day
             )
-            if("today" in text.lower()):
+            if(_("today") in text.lower()):
                 if(todaydate in forecast.keys()):
                     mic.say(
                         _("The weather today in {} is {}").format(
                             city, forecast[todaydate]["weather"]
                         )
                     )
                     snark = False
-            elif("tomorrow" in text.lower()):
+            elif(_("tomorrow") in text.lower()):
                 if(tomorrowdate in forecast.keys()):
                     mic.say(
                         _("The weather tomorrow in {} will be {}").format(
@@ -334,9 +334,9 @@ def handle(self, intent, mic):
                 first = True
                 for day in sorted(forecast.keys()):
                     if(day == todaydate):
-                        DOW = "today"
+                        DOW = _("today")
                     elif(day == tomorrowdate):
-                        DOW = "tomorrow"
+                        DOW = _("tomorrow")
                     else:
                         DOW = WEEKDAY_NAMES[datetime.datetime.strptime(day, "%Y-%m-%d").weekday()]
                     if(first):

diff --git a/plugins/stt/pocketsphinx-stt/sphinxplugin.py b/plugins/stt/pocketsphinx-stt/sphinxplugin.py
@@ -66,6 +66,8 @@ def check_pocketsphinx_model(directory):
         FilesExist = False
     if(not os.path.isfile(os.path.join(directory, "variances"))):
         FilesExist = False
+    if(not os.path.isfile(os.path.join(directory, "model", "train.fst"))):
+        FilesExist = False
     return FilesExist
 
 
@@ -159,6 +161,7 @@ def __del__(self):
             os.remove(self._logfile)
 
     def settings(self):
+        language = profile.get(['language'])
         # Get the defaults for settings
         # hmm_dir
         hmm_dir = profile.get(
@@ -254,7 +257,7 @@ def settings(self):
                     fst_model = path
         # If either the hmm dir or fst model is missing, then
         # download the standard model
-        if not(hmm_dir and fst_model):
+        if not(hmm_dir and os.path.isdir(hmm_dir) and fst_model and os.path.isfile(fst_model)):
             # Start by checking to see if we have a copy of the standard
             # model for this user's chosen language and download it if not.
             # Check for the files we need
@@ -269,6 +272,7 @@ def settings(self):
             if not os.path.isdir(standard_dir):
                 os.mkdir(standard_dir)
             hmm_dir = standard_dir
+            fst_model = os.path.join(hmm_dir, "train", "model.fst")
             formatteddict_path = os.path.join(
                 hmm_dir,
                 "cmudict.formatted.dict"
@@ -287,7 +291,7 @@ def settings(self):
                 ]
                 completedprocess = run_command(cmd)
                 self._logger.info(process_completedprocess(completedprocess))
-
+            if(not os.path.isfile(formatteddict_path)):
                 print("Formatting the g2p dictionary")
                 with open(os.path.join(standard_dir, "cmudict.dict"), "r") as in_file:
                     with open(formatteddict_path, "w+") as out_file:
@@ -301,6 +305,7 @@ def settings(self):
                             # replace the first whitespace with a tab
                             line = line.replace(' ', '\t', 1)
                             print(line, file=out_file)
+            if(not os.path.isfile(fst_model)):
                 # Use phonetisaurus to prepare an fst model
                 print("Training an FST model")
                 cmd = [
@@ -311,7 +316,6 @@ def settings(self):
                 ]
                 completedprocess = run_command(cmd)
                 self._logger.info(process_completedprocess(completedprocess))
-                fst_model = os.path.join(hmm_dir, "train", "model.fst")
 
         phonetisaurus_executable = profile.get_profile_var(
             ['pocketsphinx', 'phonetisaurus_executable']

diff --git a/plugins/stt_trainer/pocketsphinx_adapt/pocketsphinx_adapt.py b/plugins/stt_trainer/pocketsphinx_adapt/pocketsphinx_adapt.py
@@ -398,7 +398,7 @@ def HandleCommand(self, command, description):
                             if(hasattr(plugin, "intents")):
                                 intents = plugin.intents()
                                 for intent in intents:
-                                    for template in intents[intent]['templates']:
+                                    for template in intents[intent]['locale'][self.language]['templates']:
                                         phrases.extend([
                                             word.upper() for word in template.split()
                                         ])
@@ -411,6 +411,13 @@ def HandleCommand(self, command, description):
                                     info.name, message
                                 )
                             )
+                            self._logger.warning(
+                                "Plugin '{}' skipped! (Reason: {})".format(
+                                    info.name, message
+                                ),
+                                exc_info=True
+                            )
+
                     # Get the set of all words in words_used that do not appear
                     # in phrases
                     print("Phrases:")

diff --git a/plugins/tti/naomi_tti/naomi_tti.py b/plugins/tti/naomi_tti/naomi_tti.py
@@ -85,7 +85,7 @@ def add_intents(self, intents):
                     if(
                         word not in profile.get(
                             ['naomi_tti', 'words_to_ignore'],
-                            ['ANY', 'ARE', 'DO', 'IS', 'THE', 'TO', 'WHAT']
+                            ['ANY', 'ARE', 'DO', 'IN', 'IS', 'THE', 'TO', 'WHAT']
                         )
                     ):
                         try:
@@ -229,16 +229,25 @@ def determine_intent(self, phrase):
             intentscores = {}
             for intent in self.intent_map['intents']:
                 score = 0
+                # build up a score based on the words that match.
                 for word in words:
                     if word in self.intent_map['intents'][intent]['words']:
                         intents_count = len(self.intent_map['intents'])
                         word_appears_in = len(self.words[word])
-                        # print("Word: {} Weight: {} Intents: {} Appears in: {}".format(word, weight, intents_count, word_appears_in))
                         score += self.intent_map['intents'][intent]['words'][word] * (intents_count - word_appears_in) / intents_count
-                intentscores[intent] = score / len(words)
+                # penalize the variant if it does not contain important words
+                # highscore would be if the variant contains at least each of
+                # the keywords
+                highscore = sum(self.intent_map['intents'][intent]['words'].values())
+                penalty = 0
+                for word in self.intent_map['intents'][intent]['words']:
+                    if word not in words:
+                        penalty += self.intent_map['intents'][intent]['words'][word]
+                intentscores[intent] = score * (highscore - penalty) / highscore
             # list intents and scores
             for intent in intentscores.keys():
                 self._logger.info("\t{}: {}".format(intent, intentscores[intent]))
+                # print("\t{}: {}".format(intent, intentscores[intent]))
             # Take the intent with the highest score
             # print("==========intentscores============")
             # pprint(intentscores)