In [2]:
from pathlib import Path

from paraphone.tasks.phonemize import PhonemizedWordsCSV
from paraphone.tasks.synth import GoogleSpeakSynthesizer, VOICES

synth = GoogleSpeakSynthesizer("fr", VOICES["fr"][0],
                               Path("../morphologicalstudy-1224861510d4.json"))

words = {"jeune", "jeuner", "os", "osseux", "oeuf", "euphémisme", "bonne",
         "beaune", "tomme", "tome", "taume", "comme", "côme", "gomme",
         "somme", "chaume", "nonne", "môme", "bol", "tôle", "col", "folle",
         "vol", "geole", "molle", "grolle"}
pho_word_csv = PhonemizedWordsCSV(Path("../workspaces/large_fr/phonemized/all.csv"))
pho_dict = pho_word_csv.to_dict()
tested_words = {
    word: " ".join(pho_dict[word]) for word in words if word in pho_dict
}
# TODO:

In [6]:
pho_ssml_template = """
<speak>
{word}
<break time="1s"/>
<phoneme alphabet="ipa" ph="{phonemes}"></phoneme>
</speak>
"""

In [6]:
OUTPUT_DIR = Path("/tmp/synth_tests/")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [4]:
import nest_asyncio

nest_asyncio.apply()

In [9]:
from asyncio import get_event_loop

loop = get_event_loop()
for word, phonemes in tested_words.items():
    task = synth.synth_ssml(pho_ssml_template.format(word=word, phonemes=phonemes.replace(" ", "")))
    audio_output = loop.run_until_complete(task)
    with open(OUTPUT_DIR / Path(f"{word}.ogg"), "wb") as audio_file:
        audio_file.write(audio_output)

In [11]:
from asyncio import get_event_loop

pho_ssml_template = """
<speak>
<phoneme alphabet="ipa" ph="{phonemes}"></phoneme>
</speak>
"""

text_ssml_template = """
<speak>
{word}
</speak>
"""

tested_words = {"chien": "ʃ j ɛ̃",
                "lapin": "l a p ɛ̃",
                "escargot": "ɛ s k a ʁ ɡ o",
                "pomme": "p ɔ m",
                "poire": "p w a ʁ",
                "devenir": "d ə v ə n i ʁ"}

loop = get_event_loop()
for word, phonemes in tested_words.items():
    task_pho = synth.synth_ssml(pho_ssml_template.format(word=word, phonemes=phonemes.replace(" ", "")))
    audio_output = loop.run_until_complete(task_pho)
    with open(OUTPUT_DIR / Path(f"{word}_pho.ogg"), "wb") as audio_file:
        audio_file.write(audio_output)

    task_text = synth.synth_ssml(text_ssml_template.format(word=word))
    audio_output = loop.run_until_complete(task_text)
    with open(OUTPUT_DIR / Path(f"{word}_txt.ogg"), "wb") as audio_file:
        audio_file.write(audio_output)


In [15]:
from pydub import AudioSegment
import numpy as np

for word in tested_words:
    array_text = np.array(AudioSegment.from_ogg(OUTPUT_DIR / Path(f"{word}_txt.ogg")).get_array_of_samples())
    array_pho = np.array(AudioSegment.from_ogg(OUTPUT_DIR / Path(f"{word}_pho.ogg")).get_array_of_samples())
    min_len = min(len(array_pho), len(array_text))
    print(f"{word}: ", np.abs(array_text[:min_len] - array_pho[:min_len]).sum())

chien:  3929395009052
lapin:  5480830304602
escargot:  5812454365175
pomme:  4941393474969
poire:  4395417474030
devenir:  7814065350023


array.array