Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speaker Recognition #367

Draft
wants to merge 6 commits into
base: naomi-dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 38 additions & 40 deletions naomi/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def __init__(
vad_slug,
category='vad'
)
vad_plugin = vad_info.plugin_class(input_device)
vad_plugin = vad_info.plugin_class(input_device, vad_info)

# Initialize Brain
tti_slug = profile.get_profile_var(['tti_engine'], 'Naomi TTI')
Expand Down Expand Up @@ -500,40 +500,46 @@ def __init__(
tts_plugin_info = profile.get_arg('plugins').get_plugin(tts_slug, category='tts')
tts_plugin = tts_plugin_info.plugin_class(tts_plugin_info)

# Initialize Speaker Recognition Engine
sr_slug = profile.get_profile_var(['sr_engine'], 'default_sr')
sr_plugin_info = profile.get_arg('plugins').get_plugin(sr_slug, category='sr')
sr_plugin = sr_plugin_info.plugin_class(sr_plugin_info)
profile.set_arg('sr_plugin', sr_plugin)

# audiolog for training
if(save_audio):
save_passive_audio = True
save_active_audio = True
save_noise = True

# Instead of passing the following values to mic, store them here and
# pick them up when needed.
profile.set_arg('input_device', input_device)
profile.set_arg('output_device', output_device)
profile.set_arg('sr_plugin', sr_plugin)
profile.set_arg('active_stt_reply', active_stt_reply)
profile.set_arg('active_stt_response', active_stt_response)
profile.set_arg('passive_stt_plugin', passive_stt_plugin)
profile.set_arg('active_stt_plugin', active_stt_plugin)
profile.set_arg('special_stt_slug', special_stt_slug)
profile.set_arg('tts_plugin', tts_plugin)
profile.set_arg('vad_plugin', vad_plugin)
profile.set_arg('keyword', keyword)
profile.set_arg('print_transcript', print_transcript)
profile.set_arg('passive_listen', passive_listen)
profile.set_arg('save_passive_audio', save_passive_audio)
profile.set_arg('save_active_audio', save_active_audio)
profile.set_arg('save_noise', save_noise)

# Initialize Mic
if use_mic == USE_TEXT_MIC:
self.mic = local_mic.Mic()
self._logger.info('Using local text input and output')
elif use_mic == USE_BATCH_MIC:
self.mic = batch_mic.Mic(
passive_stt_plugin,
active_stt_plugin,
special_stt_slug,
profile.get_arg('plugins'),
batch_file,
keyword=keyword
)
self.mic = batch_mic.Mic()
self._logger.info('Using batched mode')
else:
self.mic = mic.Mic(
input_device,
output_device,
active_stt_reply,
active_stt_response,
passive_stt_plugin,
active_stt_plugin,
special_stt_slug,
profile.get_arg('plugins'),
tts_plugin,
vad_plugin,
keyword=keyword,
print_transcript=print_transcript,
passive_listen=passive_listen,
save_audio=save_audio,
save_passive_audio=save_passive_audio,
save_active_audio=save_active_audio,
save_noise=save_noise
)
self.mic = mic.Mic()

self.conversation = conversation.Conversation(
self.mic, self.brain
Expand Down Expand Up @@ -930,15 +936,18 @@ def validate_input_device(self, input_device_slug):
input_device = audio_engine.get_device_by_slug(
profile.get_profile_var(['audio', 'input_device'])
)
profile.set_arg('input_device', input_device)
output_device = audio_engine.get_device_by_slug(
profile.get_profile_var(["audio", "output_device"])
)
profile.set_arg('output_device', output_device)
vad_slug = profile.get_profile_var(['vad_engine'], 'snr_vad')
vad_info = profile.get_arg('plugins').get_plugin(
vad_slug,
category='vad'
)
vad_plugin = vad_info.plugin_class(input_device)
profile.set_arg('vad_plugin', vad_plugin)

filename = os.path.join(
os.path.dirname(
Expand All @@ -953,18 +962,7 @@ def validate_input_device(self, input_device_slug):
filename
)
visualizations.load_visualizations(self)
testMic = mic.Mic(
input_device,
output_device,
profile.get(['active_stt', 'reply']),
profile.get(['active_stt', 'response']),
None,
None,
None,
None,
None,
vad_plugin
)
testMic = mic.Mic()

visualizations.run_visualization(
"output",
Expand Down
8 changes: 1 addition & 7 deletions naomi/batch_mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,7 @@ def parse_batch_file(fp):

class Mic(object):
def __init__(
self,
passive_stt_engine,
active_stt_engine,
special_stt_slug,
plugins,
batch_file,
keyword='JASPER'
self
):
self._logger = logging.getLogger(__name__)
self._keyword = keyword
Expand Down
7 changes: 4 additions & 3 deletions naomi/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def get_all_phrases(self):
phrases.extend(self.get_plugin_phrases())
return sorted(list(set(phrases)))

def query(self, texts):
def query(self, sr_response):
"""
Passes user input to the appropriate module, testing it against
each candidate module's isValid function.
Expand All @@ -119,14 +119,15 @@ def query(self, texts):
Returns:
A tuple containing a text and the module that can handle it
"""
for text in texts:
for text in sr_response['utterance']:
# convert text to upper case and remove any punctuation
text = self._intentparser.cleantext(text)
intents = self._intentparser.determine_intent(text)
for intent in intents:
# Add the intent to the response so the handler method
# can find out which intent activated it
intents[intent]['intent'] = intent
intents[intent]['user'] = sr_response['speaker']
if(profile.get_arg("print_transcript")):
print("{} {}".format(intent, intents[intent]['score']))
if(profile.get_arg('save_active_audio')):
Expand Down Expand Up @@ -164,7 +165,7 @@ def query(self, texts):
return(intents[intent])
self._logger.debug(
"No module was able to handle any of these phrases: {}".format(
str(texts)
str(sr_response.get('utterance'))
)
)
return (None)
25 changes: 10 additions & 15 deletions naomi/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,7 @@ def askName(self):
self.mic.say(salutation)

def greet(self):
if profile.get(['first_name']):
salutation = self.gettext("How can I be of service, {}?").format(
profile.get(["first_name"])
)
else:
salutation = self.gettext("How can I be of service?")
salutation = self.gettext("How can I be of service?")
self.mic.say(salutation)

def handleForever(self):
Expand All @@ -51,23 +46,23 @@ def handleForever(self):
"""
self._logger.debug('Starting to handle conversation.')
while True:
utterance = self.mic.listen()
sr_response = self.mic.listen()
# if listen() returns False, just ignore it
if not isinstance(utterance, bool):
if not isinstance(sr_response, bool):
handled = False
while(" ".join(utterance) != "" and not handled):
utterance, handled = self.handleRequest(utterance)
while(" ".join(sr_response['utterance']) != "" and not handled):
sr_respone, handled = self.handleRequest(sr_response)

def handleRequest(self, utterance):
def handleRequest(self, sr_response):
handled = False
intent = self.brain.query(utterance)
intent = self.brain.query(sr_response)
if intent:
try:
self._logger.info(intent)
intent['action'](intent, self.mic)
handled = True
except Unexpected as e:
utterance = e.utterance
sr_response = e.sr_response
except Exception as e:
self._logger.error(
'Failed to service intent {}: {}'.format(intent, str(e)),
Expand All @@ -84,14 +79,14 @@ def handleRequest(self, utterance):
"Handling of phrase '{}'",
"by plugin '{}' completed"
]).format(
utterance,
sr_response['utterance'],
intent
)
)
else:
self.say_i_do_not_understand()
handled = True
return utterance, handled
return sr_response, handled

def say_i_do_not_understand(self):
self.mic.say(
Expand Down
Loading