NaomiProject · aaronchantrill · Sep 15, 2022 · Oct 17, 2022 · Nov 6, 2022 · Nov 7, 2022
diff --git a/naomi/application.py b/naomi/application.py
@@ -349,7 +349,7 @@ def __init__(
             vad_slug,
             category='vad'
         )
-        vad_plugin = vad_info.plugin_class(input_device)
+        vad_plugin = vad_info.plugin_class(input_device, vad_info)
 
         # Initialize Brain
         tti_slug = profile.get_profile_var(['tti_engine'], 'Naomi TTI')
@@ -500,40 +500,46 @@ def __init__(
         tts_plugin_info = profile.get_arg('plugins').get_plugin(tts_slug, category='tts')
         tts_plugin = tts_plugin_info.plugin_class(tts_plugin_info)
 
+        # Initialize Speaker Recognition Engine
+        sr_slug = profile.get_profile_var(['sr_engine'], 'default_sr')
+        sr_plugin_info = profile.get_arg('plugins').get_plugin(sr_slug, category='sr')
+        sr_plugin = sr_plugin_info.plugin_class(sr_plugin_info)
+        profile.set_arg('sr_plugin', sr_plugin)
+
+        # audiolog for training
+        if(save_audio):
+            save_passive_audio = True
+            save_active_audio = True
+            save_noise = True
+
+        # Instead of passing the following values to mic, store them here and
+        # pick them up when needed.
+        profile.set_arg('input_device', input_device)
+        profile.set_arg('output_device', output_device)
+        profile.set_arg('sr_plugin', sr_plugin)
+        profile.set_arg('active_stt_reply', active_stt_reply)
+        profile.set_arg('active_stt_response', active_stt_response)
+        profile.set_arg('passive_stt_plugin', passive_stt_plugin)
+        profile.set_arg('active_stt_plugin', active_stt_plugin)
+        profile.set_arg('special_stt_slug', special_stt_slug)
+        profile.set_arg('tts_plugin', tts_plugin)
+        profile.set_arg('vad_plugin', vad_plugin)
+        profile.set_arg('keyword', keyword)
+        profile.set_arg('print_transcript', print_transcript)
+        profile.set_arg('passive_listen', passive_listen)
+        profile.set_arg('save_passive_audio', save_passive_audio)
+        profile.set_arg('save_active_audio', save_active_audio)
+        profile.set_arg('save_noise', save_noise)
+
         # Initialize Mic
         if use_mic == USE_TEXT_MIC:
             self.mic = local_mic.Mic()
             self._logger.info('Using local text input and output')
         elif use_mic == USE_BATCH_MIC:
-            self.mic = batch_mic.Mic(
-                passive_stt_plugin,
-                active_stt_plugin,
-                special_stt_slug,
-                profile.get_arg('plugins'),
-                batch_file,
-                keyword=keyword
-            )
+            self.mic = batch_mic.Mic()
             self._logger.info('Using batched mode')
         else:
-            self.mic = mic.Mic(
-                input_device,
-                output_device,
-                active_stt_reply,
-                active_stt_response,
-                passive_stt_plugin,
-                active_stt_plugin,
-                special_stt_slug,
-                profile.get_arg('plugins'),
-                tts_plugin,
-                vad_plugin,
-                keyword=keyword,
-                print_transcript=print_transcript,
-                passive_listen=passive_listen,
-                save_audio=save_audio,
-                save_passive_audio=save_passive_audio,
-                save_active_audio=save_active_audio,
-                save_noise=save_noise
-            )
+            self.mic = mic.Mic()
 
         self.conversation = conversation.Conversation(
             self.mic, self.brain
@@ -930,15 +936,18 @@ def validate_input_device(self, input_device_slug):
             input_device = audio_engine.get_device_by_slug(
                 profile.get_profile_var(['audio', 'input_device'])
             )
+            profile.set_arg('input_device', input_device)
             output_device = audio_engine.get_device_by_slug(
                 profile.get_profile_var(["audio", "output_device"])
             )
+            profile.set_arg('output_device', output_device)
             vad_slug = profile.get_profile_var(['vad_engine'], 'snr_vad')
             vad_info = profile.get_arg('plugins').get_plugin(
                 vad_slug,
                 category='vad'
             )
             vad_plugin = vad_info.plugin_class(input_device)
+            profile.set_arg('vad_plugin', vad_plugin)
 
             filename = os.path.join(
                 os.path.dirname(
@@ -953,18 +962,7 @@ def validate_input_device(self, input_device_slug):
                     filename
                 )
             visualizations.load_visualizations(self)
-            testMic = mic.Mic(
-                input_device,
-                output_device,
-                profile.get(['active_stt', 'reply']),
-                profile.get(['active_stt', 'response']),
-                None,
-                None,
-                None,
-                None,
-                None,
-                vad_plugin
-            )
+            testMic = mic.Mic()
 
             visualizations.run_visualization(
                 "output",

diff --git a/naomi/batch_mic.py b/naomi/batch_mic.py
@@ -20,13 +20,7 @@ def parse_batch_file(fp):
 
 class Mic(object):
     def __init__(
-        self,
-        passive_stt_engine,
-        active_stt_engine,
-        special_stt_slug,
-        plugins,
-        batch_file,
-        keyword='JASPER'
+        self
     ):
         self._logger = logging.getLogger(__name__)
         self._keyword = keyword

diff --git a/naomi/brain.py b/naomi/brain.py
@@ -104,7 +104,7 @@ def get_all_phrases(self):
         phrases.extend(self.get_plugin_phrases())
         return sorted(list(set(phrases)))
 
-    def query(self, texts):
+    def query(self, sr_response):
         """
         Passes user input to the appropriate module, testing it against
         each candidate module's isValid function.
@@ -119,14 +119,15 @@ def query(self, texts):
         Returns:
             A tuple containing a text and the module that can handle it
         """
-        for text in texts:
+        for text in sr_response['utterance']:
             # convert text to upper case and remove any punctuation
             text = self._intentparser.cleantext(text)
             intents = self._intentparser.determine_intent(text)
             for intent in intents:
                 # Add the intent to the response so the handler method
                 # can find out which intent activated it
                 intents[intent]['intent'] = intent
+                intents[intent]['user'] = sr_response['speaker']
                 if(profile.get_arg("print_transcript")):
                     print("{} {}".format(intent, intents[intent]['score']))
                 if(profile.get_arg('save_active_audio')):
@@ -164,7 +165,7 @@ def query(self, texts):
                     return(intents[intent])
             self._logger.debug(
                 "No module was able to handle any of these phrases: {}".format(
-                    str(texts)
+                    str(sr_response.get('utterance'))
                 )
             )
             return (None)
diff --git a/naomi/conversation.py b/naomi/conversation.py
@@ -37,12 +37,7 @@ def askName(self):
         self.mic.say(salutation)
 
     def greet(self):
-        if profile.get(['first_name']):
-            salutation = self.gettext("How can I be of service, {}?").format(
-                profile.get(["first_name"])
-            )
-        else:
-            salutation = self.gettext("How can I be of service?")
+        salutation = self.gettext("How can I be of service?")
         self.mic.say(salutation)
 
     def handleForever(self):
@@ -51,23 +46,23 @@ def handleForever(self):
         """
         self._logger.debug('Starting to handle conversation.')
         while True:
-            utterance = self.mic.listen()
+            sr_response = self.mic.listen()
             # if listen() returns False, just ignore it
-            if not isinstance(utterance, bool):
+            if not isinstance(sr_response, bool):
                 handled = False
-                while(" ".join(utterance) != "" and not handled):
-                    utterance, handled = self.handleRequest(utterance)
+                while(" ".join(sr_response['utterance']) != "" and not handled):
+                    sr_respone, handled = self.handleRequest(sr_response)
 
-    def handleRequest(self, utterance):
+    def handleRequest(self, sr_response):
         handled = False
-        intent = self.brain.query(utterance)
+        intent = self.brain.query(sr_response)
         if intent:
             try:
                 self._logger.info(intent)
                 intent['action'](intent, self.mic)
                 handled = True
             except Unexpected as e:
-                utterance = e.utterance
+                sr_response = e.sr_response
             except Exception as e:
                 self._logger.error(
                     'Failed to service intent {}: {}'.format(intent, str(e)),
@@ -84,14 +79,14 @@ def handleRequest(self, utterance):
                         "Handling of phrase '{}'",
                         "by plugin '{}' completed"
                     ]).format(
-                        utterance,
+                        sr_response['utterance'],
                         intent
                     )
                 )
         else:
             self.say_i_do_not_understand()
             handled = True
-        return utterance, handled
+        return sr_response, handled
 
     def say_i_do_not_understand(self):
         self.mic.say(