### Playsound is required to play synthesized speech

In [1]:
from google.cloud import texttospeech
from google.cloud import translate as trans
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from google.oauth2 import service_account

import pyaudio
from playsound import playsound

In [2]:
def record_voice(player, seconds=3):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 22050
    RECORD_SECONDS = seconds

    stream = player.open(
                input=True,
                format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                frames_per_buffer=CHUNK)

    print("Start recording... ", end="")
    frames = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)  # or you can here detect average magnitude and stop an silence
        frames.append(data)
    print("recorded", seconds, "second(s)")
    stream.stop_stream()
    stream.close()

    return frames

In [3]:
def get_response_text(responses):
    transcripts = []
    for response in responses:
        if not response.results:
            continue

        result = response.results[0]

        if not result.alternatives:
            continue

        transcripts.append(result.alternatives[0].transcript)
    return transcripts[-1]

In [4]:
def speech_to_text(client, language_code, player):
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)
    
    frames = record_voice(player)
    
    requests = (types.StreamingRecognizeRequest(audio_content=content)
                for content in frames)

    responses = client.streaming_recognize(streaming_config, requests)
    return get_response_text(responses)

In [5]:
def translate(text, language_to):
    # Пытался сначала использовать Google Transalation API,
    # но на все запросы к нему прилетало 403 permission denied.
    # Пришлось подключать Яндекс
    import locale
    locale.setlocale(locale.LC_ALL, '')

    api_key = open("../lab6/yandex.translate.key").read()   # todo your key in the file

    from yandex.Translater import Translater
    tr = Translater()
    tr.set_key(api_key)
    tr.set_text(text)
    tr.set_from_lang(tr.detect_lang())
    tr.set_to_lang(language_to[:2])

    return tr.translate()

In [6]:
def text_to_speech(client, text, language_to, player):
    import base64
    synthesis_input = texttospeech.types.SynthesisInput(text=text)

    voice = texttospeech.types.VoiceSelectionParams(
        language_code=language_to,
        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

    audio_config = texttospeech.types.AudioConfig(audio_encoding=texttospeech.enums.AudioEncoding.MP3)
    response = client.synthesize_speech(synthesis_input, voice, audio_config)
    
    with open('out.mp3', 'wb') as speech:
        speech.write(response.audio_content)
    playsound('out.mp3')

In [120]:
def main():
    language_from = 'ru-RU'
    language_to = "en-US"
    
    credentials = service_account.Credentials.from_service_account_file("/home/kn/practicalAI/lab7/googleSpeech.json")
    clientSpeech = speech.SpeechClient(credentials=credentials)
    clientText = texttospeech.TextToSpeechClient(credentials=credentials)
    player = pyaudio.PyAudio()
    
    text = speech_to_text(clientSpeech, language_from, player)
    translated_text = translate(text, language_to)
    text_to_speech(clientText, translated_text, language_to, player)
    

if __name__ == '__main__':
    main()

Start recording... recorded 3 second(s)
