Merge pull request #2871 from forslund/bugfix/mimic-phonemes

Fix loading Mimic phonemes from cache
MycroftAI · Apr 13, 2021 · 1ba9eda · 1ba9eda
2 parents 356288a + 344999d
commit 1ba9eda
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 21 deletions.
diff --git a/mycroft/tts/cache.py b/mycroft/tts/cache.py
@@ -126,11 +126,8 @@ def save(self, phonemes):
         Arguments:
             phonemes: instructions for how to make the mouth on a device move
         """
-        if type(phonemes) == str:
-            rec = phonemes
-        else:
-            rec = json.dumps(phonemes)
         try:
+            rec = json.dumps(phonemes)
             with open(self.path, "w") as phoneme_file:
                 phoneme_file.write(rec)
         except Exception:

diff --git a/mycroft/tts/mimic_tts.py b/mycroft/tts/mimic_tts.py
@@ -91,6 +91,19 @@ def make_executable(dest):
                           .format(voice))
 
 
+def parse_phonemes(phonemes):
+    """Parse mimic phoneme string into a list of phone, duration pairs.
+
+    Arguments
+        phonemes (bytes): phoneme output from mimic
+    Returns:
+        (list) list of phoneme duration pairs
+    """
+    phon_str = phonemes.decode()
+    pairs = phon_str.split(' ')
+    return [pair.split(':') for pair in pairs if ':' in pair]
+
+
 class Mimic(TTS):
     """TTS interface for local mimic v1."""
     def __init__(self, lang, config):
@@ -157,24 +170,20 @@ def get_tts(self, sentence, wav_file):
         """
         phonemes = subprocess.check_output(self.args + ['-o', wav_file,
                                                         '-t', sentence])
-        return wav_file, phonemes.decode()
+        return wav_file, parse_phonemes(phonemes)
 
-    def viseme(self, output):
+    def viseme(self, phoneme_pairs):
         """Convert phoneme string to visemes.
 
         Arguments:
-            output (str): Phoneme output from mimic
+            phoneme_pairs (list): Phoneme output from mimic
 
         Returns:
             (list) list of tuples of viseme and duration
         """
         visemes = []
-        pairs = str(output).split(" ")
-        for pair in pairs:
-            pho_dur = pair.split(":")  # phoneme:duration
-            if len(pho_dur) == 2:
-                visemes.append((VISIMES.get(pho_dur[0], '4'),
-                                float(pho_dur[1])))
+        for phon, dur in phoneme_pairs:
+            visemes.append((VISIMES.get(phon, '4'), float(dur)))
         return visemes
 
 

diff --git a/test/unittests/tts/test_mimic_tts.py b/test/unittests/tts/test_mimic_tts.py
@@ -21,22 +21,27 @@ class TestMimic(unittest.TestCase):
     @mock.patch('mycroft.tts.mimic_tts.subprocess')
     def test_get_tts(self, mock_subprocess, _, mock_device_api):
         mock_device_api.return_value = device_instance_mock
+        mock_subprocess.check_output().decode.return_value = 's:1 pau:2'
+        mock_subprocess.check_output.reset_mock()
         m = Mimic('en-US', {})
         wav, phonemes = m.get_tts('hello', 'abc.wav')
         mock_subprocess.check_output.assert_called_once_with(
             m.args + ['-o', 'abc.wav', '-t', 'hello'])
-        self.assertEqual(phonemes, mock_subprocess.check_output().decode())
+        self.assertEqual(phonemes, [['s', '1'], ['pau', '2']])
 
     def test_viseme(self, _, mock_device_api):
         mock_device_api.return_value = device_instance_mock
         m = Mimic('en-US', {})
-        viseme_string = ('pau:0.206 m:0.287 ah:0.401 ch:0.513 dh:0.578 '
-                         'iy:0.699 s:0.835 ey:1.013 m:1.118 w:1.213 ey:1.345 '
-                         'dh:1.415 ae:1.491 t:1.539 b:1.616 r:1.671 ih:1.744 '
-                         'k:1.819 s:1.923 d:1.978 ow:2.118 n:2.206 t:2.301 '
-                         'pau:2.408')
-
-        vis = m.viseme(viseme_string)
+        phoneme_list = (['pau', 0.206], ['m', 0.287], ['ah', 0.401],
+                        ['ch', 0.513], ['dh', 0.578], ['iy', 0.699],
+                        ['s', 0.835], ['ey', 1.013], ['m', 1.118],
+                        ['w', 1.213], ['ey', 1.345], ['dh', 1.415],
+                        ['ae', 1.491], ['t', 1.539], ['b', 1.616],
+                        ['r', 1.671], ['ih', 1.744], ['k', 1.819],
+                        ['s', 1.923], ['d', 1.978], ['ow', 2.118],
+                        ['n', 2.206], ['t', 2.301], ['pau', 2.408])
+
+        vis = m.viseme(phoneme_list)
         self.assertEqual(vis,
                          [('4', 0.206), ('4', 0.287), ('0', 0.401),
                           ('3', 0.513), ('3', 0.578), ('0', 0.699),