Skip to content

Commit

Permalink
Switch from pygettext to xgettext (#248)
Browse files Browse the repository at this point in the history
* Switch from pygettext to xgettext

This commit switches the update_translations.py script to use
xgettext instead of pygettext, mostly because xgettext is installed
system wide, while pygettext moves around, especially when updating
python versions.

It also contains a few tweaks to the Naomi TTI system, which was
scoring Hacker News higher than News when responding to the word
"news" because every template in Hacker News contains the word
news while one of the templates in News does not. I have therefore
started penalizing the intent proportional to the weight of words
that do not appear in the incoming request, which seems to work
well so far, and penalizes "Hacker News" into a lower ranking if
the word "Hacker" is not detected in the incoming request.

* Skip WER if transcription is blank

jiWER suddenly is throwing an error if the transcription is
blank, so skip it.

* Fixes to update_translations.py

Fixed a couple of bugs introduced when updating update_translations.py
to python3, and added some fixes to hopefully prevent msgcat from
duplicating the header information when updating translations.

* Flake8 issues

Fixed some formatting issues, unused variables, unused import

* Codacy subprocess complaint

Codacy does not like subprocess. Added a nosec check.

* Fixed plurals

Fixed a bug which was preventing the plurals information from
being written to the .po files

* Better recovery after canceling setup

I started setting up Naomi, but canceled the process before it
completed. At that point, it had built the Pocketsphinx model,
but had not set the locations of the hmm dir or fst file in the
profile. Because it did not have to create them the next time I
ran it, it did not set the default location of the fst file.

* STTTrainer fix for new intents

This fixes the Adapt Pocketsphinx STTTrainer plugin to extract
intent templates from the new intent structure.
  • Loading branch information
aaronchantrill committed Apr 8, 2020
1 parent faffb05 commit f18c800
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 85 deletions.
13 changes: 9 additions & 4 deletions NaomiSTTTrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,10 +484,12 @@ def application(environ, start_response):
recording_type = "unclear"
print("Setting recording_type to unclear")
# calculate the word error rate
WER = wer(
transcription,
verified_transcription
)
WER = 0
if(len(transcription) > 0):
WER = wer(
transcription,
verified_transcription
)
c.execute(
" ".join([
"update audiolog set ",
Expand Down Expand Up @@ -947,6 +949,9 @@ def application(environ, start_response):
'<body>SQLite error: {}</body>',
'</html>'
]).format(e))
# Save (commit) the changes
conn.commit()
conn.close()
return [line.encode("UTF-8") for line in ret]


Expand Down
2 changes: 1 addition & 1 deletion naomi/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,7 @@ def install_plugins(self, plugins):
else:
required_file = os.path.join(
install_dir,
"python_required.txt"
"python_requirements.txt"
)
if os.path.isfile(required_file):
# Install any python packages required
Expand Down
1 change: 1 addition & 0 deletions naomi/commandline.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from blessings import Terminal
from getpass import getpass
from naomi import app_utils
Expand Down
3 changes: 2 additions & 1 deletion plugins/speechhandler/hackernews/hackernews.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def intents(self):
'templates': [
"READ HACKER NEWS",
"WHAT IS IN HACKER NEWS",
"WHAT ARE THE HACKER NEWS HEADLINES"
"WHAT ARE THE HACKER NEWS HEADLINES",
"WHAT IS HAPPENING IN HACKER NEWS"
]
},
'fr-FR': {
Expand Down
42 changes: 30 additions & 12 deletions plugins/speechhandler/news/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,45 @@ def intents(self):
'NewsIntent': {
'locale': {
'en-US': {
'keywords': {
'NewsKeyword': [
'NEWS',
'HEADLINES'
]
},
'templates': [
"READ THE NEWS",
"WHAT IS IN THE NEWS",
"WHAT IS HAPPENING IN THE NEWS",
"WHAT ARE TODAY'S HEADLINES"
"READ THE {NewsKeyword}",
"WHAT IS IN THE {NewsKeyword}",
"WHAT IS HAPPENING IN THE {NewsKeyword}",
"WHAT ARE TODAY'S {NewsKeyword}"
]
},
'fr-FR': {
'keywords': {
'NewsKeyword': [
"NOUVELLES",
"TITRES D'AUJOURD'HUI"
]
},
'templates': [
"LIRE LES NOUVELLES",
"CE QUI EST DANS LES NOUVELLES",
"CE QUI SE PASSE DANS LES NOUVELLES",
"QUELS SONT LES TITRES D'AUJOURD'HUI"
"LIRE LES {NewsKeyword}",
"CE QUI EST DANS LES {NewsKeyword}",
"CE QUI SE PASSE DANS LES {NewsKeyword}",
"QUELS SONT LES {NewsKeyword}"
]
},
'de-DE': {
'keywords':{
'NewsKeyword': [
"NACHRICHTEN",
"SCHLAGZEILEN"
]
},
'templates': [
"LIES DIE NACHRICHTEN",
"WAS IST IN DEN NACHRICHTEN",
"WAS PASSIERT IN DEN NACHRICHTEN",
"WAS SIND HEUTE SCHLAGZEILEN"
"LIES DIE {NewsKeyword}",
"WAS IST IN DEN {NewsKeyword}",
"WAS PASSIERT IN DEN {NewsKeyword}",
"WAS SIND HEUTE {NewsKeyword}"
]
}
},
Expand Down
8 changes: 4 additions & 4 deletions plugins/speechhandler/wwis_weather/wwis_weather.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,15 +313,15 @@ def handle(self, intent, mic):
tomorrow.month,
tomorrow.day
)
if("today" in text.lower()):
if(_("today") in text.lower()):
if(todaydate in forecast.keys()):
mic.say(
_("The weather today in {} is {}").format(
city, forecast[todaydate]["weather"]
)
)
snark = False
elif("tomorrow" in text.lower()):
elif(_("tomorrow") in text.lower()):
if(tomorrowdate in forecast.keys()):
mic.say(
_("The weather tomorrow in {} will be {}").format(
Expand All @@ -334,9 +334,9 @@ def handle(self, intent, mic):
first = True
for day in sorted(forecast.keys()):
if(day == todaydate):
DOW = "today"
DOW = _("today")
elif(day == tomorrowdate):
DOW = "tomorrow"
DOW = _("tomorrow")
else:
DOW = WEEKDAY_NAMES[datetime.datetime.strptime(day, "%Y-%m-%d").weekday()]
if(first):
Expand Down
10 changes: 7 additions & 3 deletions plugins/stt/pocketsphinx-stt/sphinxplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def check_pocketsphinx_model(directory):
FilesExist = False
if(not os.path.isfile(os.path.join(directory, "variances"))):
FilesExist = False
if(not os.path.isfile(os.path.join(directory, "model", "train.fst"))):
FilesExist = False
return FilesExist


Expand Down Expand Up @@ -159,6 +161,7 @@ def __del__(self):
os.remove(self._logfile)

def settings(self):
language = profile.get(['language'])
# Get the defaults for settings
# hmm_dir
hmm_dir = profile.get(
Expand Down Expand Up @@ -254,7 +257,7 @@ def settings(self):
fst_model = path
# If either the hmm dir or fst model is missing, then
# download the standard model
if not(hmm_dir and fst_model):
if not(hmm_dir and os.path.isdir(hmm_dir) and fst_model and os.path.isfile(fst_model)):
# Start by checking to see if we have a copy of the standard
# model for this user's chosen language and download it if not.
# Check for the files we need
Expand All @@ -269,6 +272,7 @@ def settings(self):
if not os.path.isdir(standard_dir):
os.mkdir(standard_dir)
hmm_dir = standard_dir
fst_model = os.path.join(hmm_dir, "train", "model.fst")
formatteddict_path = os.path.join(
hmm_dir,
"cmudict.formatted.dict"
Expand All @@ -287,7 +291,7 @@ def settings(self):
]
completedprocess = run_command(cmd)
self._logger.info(process_completedprocess(completedprocess))

if(not os.path.isfile(formatteddict_path)):
print("Formatting the g2p dictionary")
with open(os.path.join(standard_dir, "cmudict.dict"), "r") as in_file:
with open(formatteddict_path, "w+") as out_file:
Expand All @@ -301,6 +305,7 @@ def settings(self):
# replace the first whitespace with a tab
line = line.replace(' ', '\t', 1)
print(line, file=out_file)
if(not os.path.isfile(fst_model)):
# Use phonetisaurus to prepare an fst model
print("Training an FST model")
cmd = [
Expand All @@ -311,7 +316,6 @@ def settings(self):
]
completedprocess = run_command(cmd)
self._logger.info(process_completedprocess(completedprocess))
fst_model = os.path.join(hmm_dir, "train", "model.fst")

phonetisaurus_executable = profile.get_profile_var(
['pocketsphinx', 'phonetisaurus_executable']
Expand Down
9 changes: 8 additions & 1 deletion plugins/stt_trainer/pocketsphinx_adapt/pocketsphinx_adapt.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def HandleCommand(self, command, description):
if(hasattr(plugin, "intents")):
intents = plugin.intents()
for intent in intents:
for template in intents[intent]['templates']:
for template in intents[intent]['locale'][self.language]['templates']:
phrases.extend([
word.upper() for word in template.split()
])
Expand All @@ -411,6 +411,13 @@ def HandleCommand(self, command, description):
info.name, message
)
)
self._logger.warning(
"Plugin '{}' skipped! (Reason: {})".format(
info.name, message
),
exc_info=True
)

# Get the set of all words in words_used that do not appear
# in phrases
print("Phrases:")
Expand Down
15 changes: 12 additions & 3 deletions plugins/tti/naomi_tti/naomi_tti.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def add_intents(self, intents):
if(
word not in profile.get(
['naomi_tti', 'words_to_ignore'],
['ANY', 'ARE', 'DO', 'IS', 'THE', 'TO', 'WHAT']
['ANY', 'ARE', 'DO', 'IN', 'IS', 'THE', 'TO', 'WHAT']
)
):
try:
Expand Down Expand Up @@ -229,16 +229,25 @@ def determine_intent(self, phrase):
intentscores = {}
for intent in self.intent_map['intents']:
score = 0
# build up a score based on the words that match.
for word in words:
if word in self.intent_map['intents'][intent]['words']:
intents_count = len(self.intent_map['intents'])
word_appears_in = len(self.words[word])
# print("Word: {} Weight: {} Intents: {} Appears in: {}".format(word, weight, intents_count, word_appears_in))
score += self.intent_map['intents'][intent]['words'][word] * (intents_count - word_appears_in) / intents_count
intentscores[intent] = score / len(words)
# penalize the variant if it does not contain important words
# highscore would be if the variant contains at least each of
# the keywords
highscore = sum(self.intent_map['intents'][intent]['words'].values())
penalty = 0
for word in self.intent_map['intents'][intent]['words']:
if word not in words:
penalty += self.intent_map['intents'][intent]['words'][word]
intentscores[intent] = score * (highscore - penalty) / highscore
# list intents and scores
for intent in intentscores.keys():
self._logger.info("\t{}: {}".format(intent, intentscores[intent]))
# print("\t{}: {}".format(intent, intentscores[intent]))
# Take the intent with the highest score
# print("==========intentscores============")
# pprint(intentscores)
Expand Down
Loading

0 comments on commit f18c800

Please sign in to comment.