From 43fe2a48fbe20d9574eb389f6db407e47b42f204 Mon Sep 17 00:00:00 2001 From: dsborets Date: Mon, 3 May 2021 09:52:53 -0700 Subject: [PATCH 1/2] Add support of the Google Cloud TTS https://cloud.google.com/text-to-speech --- mycroft/tts/google_cloud_tts.py | 95 +++++++++++++++++++++++++++++++++ mycroft/tts/tts.py | 2 + mycroft/util/__init__.py | 3 -- mycroft/util/format.py | 2 - requirements/requirements.txt | 1 + 5 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 mycroft/tts/google_cloud_tts.py diff --git a/mycroft/tts/google_cloud_tts.py b/mycroft/tts/google_cloud_tts.py new file mode 100644 index 000000000000..904abe027db8 --- /dev/null +++ b/mycroft/tts/google_cloud_tts.py @@ -0,0 +1,95 @@ +# Copyright 2019 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .tts import TTS, TTSValidator +from mycroft.configuration import Configuration + +from google.cloud import texttospeech +from google.oauth2 import service_account + +VOICE_LIST = { + "male": texttospeech.SsmlVoiceGender.MALE, + "female": texttospeech.SsmlVoiceGender.FEMALE +} + +OUTPUT_FILE_FORMAT = { + "wav": texttospeech.AudioEncoding.LINEAR16, + "mp3": texttospeech.AudioEncoding.MP3 +} + + +class GoogleCloudTTS(TTS): + + def __init__(self, lang, config): + self.config = Configuration.get().get("tts", {}).get("google_cloud", {}) + + self.type = self.config.get("file_format", "wav").lower() + super(GoogleCloudTTS, self).__init__(lang, config, GoogleCloudTTSValidator(self), + audio_ext=self.type) + self.lang = self.config.get("lang", lang) + + voice_gender = self.config.get("voice_gender", "male").lower() + + service_account_info = self.config.get("service_account_info", {}) + + credentials = service_account.Credentials.from_service_account_info(service_account_info) + + # Instantiates a client + self.client = texttospeech.TextToSpeechClient(credentials=credentials) + + # Select the language code and the ssml voice gender + self.voice = texttospeech.VoiceSelectionParams( + language_code=self.lang, ssml_gender=VOICE_LIST.get(voice_gender) + ) + + # Select the type of audio file you want returned + self.audio_config = texttospeech.AudioConfig(audio_encoding=OUTPUT_FILE_FORMAT.get(self.type)) + + def get_tts(self, sentence, audio_file): + with open(audio_file, "wb") as out: + # Set the text input to be synthesized + synthesis_input = texttospeech.SynthesisInput(text=sentence) + + # Perform the text-to-speech request on the text input with the selected + # voice parameters and audio file type + response = self.client.synthesize_speech( + input=synthesis_input, voice=self.voice, audio_config=self.audio_config + ) + + out.write(response.audio_content) + return audio_file, None # No phonemes + + +class GoogleCloudTTSValidator(TTSValidator): + def __init__(self, tts): + super(GoogleCloudTTSValidator, self).__init__(tts) + + def validate_lang(self): + pass + + def validate_connection(self): + try: + synthesis_input = texttospeech.SynthesisInput(text="Test") + + self.tts.client.synthesize_speech( + input=synthesis_input, voice=self.tts.voice, audio_config=self.tts.audio_config + ) + except Exception as ex: + raise Exception( + 'Error connecting to Google Cloud TTS server. Please check your ' + 'internet connection and configuration', ex) + + def get_tts_class(self): + return GoogleCloudTTS diff --git a/mycroft/tts/tts.py b/mycroft/tts/tts.py index e1d1f6f66ca6..58a321bbe522 100644 --- a/mycroft/tts/tts.py +++ b/mycroft/tts/tts.py @@ -537,6 +537,7 @@ class TTSFactory: from mycroft.tts.espeak_tts import ESpeak from mycroft.tts.fa_tts import FATTS from mycroft.tts.google_tts import GoogleTTS + from mycroft.tts.google_cloud_tts import GoogleCloudTTS from mycroft.tts.mary_tts import MaryTTS from mycroft.tts.mimic_tts import Mimic from mycroft.tts.spdsay_tts import SpdSay @@ -553,6 +554,7 @@ class TTSFactory: "mimic": Mimic, "mimic2": Mimic2, "google": GoogleTTS, + "google_cloud": GoogleCloudTTS, "marytts": MaryTTS, "fatts": FATTS, "festival": Festival, diff --git a/mycroft/util/__init__.py b/mycroft/util/__init__.py index 3f396dc32366..dbe67ffd7272 100644 --- a/mycroft/util/__init__.py +++ b/mycroft/util/__init__.py @@ -18,9 +18,6 @@ """ from __future__ import absolute_import -import os - -import mycroft.audio from mycroft.util.format import nice_number from .string_utils import camel_case_split from .audio_utils import (play_audio_file, play_wav, play_ogg, play_mp3, diff --git a/mycroft/util/format.py b/mycroft/util/format.py index d8b1faa907a0..a6f488e50eb1 100644 --- a/mycroft/util/format.py +++ b/mycroft/util/format.py @@ -35,8 +35,6 @@ date_time_format, expand_options, _translate_word) -from padatious.util import expand_parentheses - def nice_number(number, lang=None, speech=True, denominators=None): """Format a float to human readable functions diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 57b291e639b9..54d8cf373c72 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,6 @@ requests>=2.20.0,<2.26.0 gTTS>=2.2.2,<2.3.0 +google-cloud-texttospeech==2.3.0 PyAudio==0.2.11 pyee==8.1.0 SpeechRecognition==3.8.1 From 1f999712bf0db5b80852c56a5bd6ef4d9cfdbac5 Mon Sep 17 00:00:00 2001 From: dsborets Date: Mon, 3 May 2021 10:12:25 -0700 Subject: [PATCH 2/2] PIP8 fixes --- mycroft/tts/google_cloud_tts.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/mycroft/tts/google_cloud_tts.py b/mycroft/tts/google_cloud_tts.py index 904abe027db8..16a6b9f89fdf 100644 --- a/mycroft/tts/google_cloud_tts.py +++ b/mycroft/tts/google_cloud_tts.py @@ -33,10 +33,12 @@ class GoogleCloudTTS(TTS): def __init__(self, lang, config): - self.config = Configuration.get().get("tts", {}).get("google_cloud", {}) + self.config = (Configuration.get().get("tts", {}) + .get("google_cloud", {})) self.type = self.config.get("file_format", "wav").lower() - super(GoogleCloudTTS, self).__init__(lang, config, GoogleCloudTTSValidator(self), + super(GoogleCloudTTS, self).__init__(lang, config, + GoogleCloudTTSValidator(self), audio_ext=self.type) self.lang = self.config.get("lang", lang) @@ -44,7 +46,8 @@ def __init__(self, lang, config): service_account_info = self.config.get("service_account_info", {}) - credentials = service_account.Credentials.from_service_account_info(service_account_info) + credentials = (service_account.Credentials + .from_service_account_info(service_account_info)) # Instantiates a client self.client = texttospeech.TextToSpeechClient(credentials=credentials) @@ -55,17 +58,20 @@ def __init__(self, lang, config): ) # Select the type of audio file you want returned - self.audio_config = texttospeech.AudioConfig(audio_encoding=OUTPUT_FILE_FORMAT.get(self.type)) + self.audio_config = texttospeech.AudioConfig( + audio_encoding=OUTPUT_FILE_FORMAT.get(self.type) + ) def get_tts(self, sentence, audio_file): with open(audio_file, "wb") as out: # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=sentence) - # Perform the text-to-speech request on the text input with the selected - # voice parameters and audio file type + # Perform the text-to-speech request on the text input + # with the selected voice parameters and audio file type response = self.client.synthesize_speech( - input=synthesis_input, voice=self.voice, audio_config=self.audio_config + input=synthesis_input, voice=self.voice, + audio_config=self.audio_config ) out.write(response.audio_content) @@ -84,12 +90,14 @@ def validate_connection(self): synthesis_input = texttospeech.SynthesisInput(text="Test") self.tts.client.synthesize_speech( - input=synthesis_input, voice=self.tts.voice, audio_config=self.tts.audio_config + input=synthesis_input, voice=self.tts.voice, + audio_config=self.tts.audio_config ) except Exception as ex: raise Exception( - 'Error connecting to Google Cloud TTS server. Please check your ' - 'internet connection and configuration', ex) + 'Error connecting to Google Cloud TTS server. ' + 'Please check your internet connection ' + 'and configuration settings', ex) def get_tts_class(self): return GoogleCloudTTS