# Stream Subtitles with Google Cloud Speech API

## Google Speech-to-Text Python Library Installation

You need to configure some information before using this API.

Look [here](https://cloud.google.com/speech-to-text/docs/transcribe-client-libraries#client-libraries-install-python)

If you are Japanese, there is a detailed explanation in my [page](https://ryo-udon.hatenadiary.jp/)

日本人の方は[こちらのブログ](https://ryo-udon.hatenadiary.jp/)に日本語の説明を載せているので確認してください。


Python Library can be installed with pip.

    pip install --upgrade google-cloud-speech


In [1]:
from google.cloud import speech

client = speech.SpeechClient()

In [4]:
# Sample code in introduction page
gcs_uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"

audio = speech.RecognitionAudio(uri=gcs_uri)

config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=16000,
    language_code="en-US"
)

response = client.recognize(config=config, audio=audio)

for result in response.results:
    print("Transcript: {}".format(result.alternatives[0].transcript))

Transcript: how old is the Brooklyn Bridge


## Install PyAudio

### Windows

    python -m pip install pyaudio

### Mac

    brew install portaudio

    pip install pyaudio

### Linux

    suod apt install python3-pyaudio

### PyAudio Check

**Caution**

This code will publish **SOOO MANY** Graphs. Be careful.

Press Alt+Delete to clear outputs.

In [None]:
import pyaudio
import matplotlib.pyplot as plt
import numpy as np

MIKE_AUDIO_FORMAT = pyaudio.paInt16
MIKE_AUDIO_RATE = 44100
MIKE_AUDIO_CHANNEL = 1
MIKE_AUDIO_BUFFER_SIZE = 2**11
MIKE_AUDIO_DEVICE_INDEX = 1

def GrabAudio():
    audio = pyaudio.PyAudio()
    stream = audio.open(format = MIKE_AUDIO_FORMAT,
                        rate = MIKE_AUDIO_RATE,
                        channels=MIKE_AUDIO_CHANNEL,
                        input_device_index=MIKE_AUDIO_DEVICE_INDEX,
                        input =True,
                        frames_per_buffer = MIKE_AUDIO_BUFFER_SIZE
    )
    return(audio,stream)

def ReleaseAudio(audio, stream):
    stream.stop_stream()
    stream.close()
    audio.terminate()

def ReadPlotData(stream):
    data = stream.read(10024)
    audiodata = np.frombuffer(data, dtype='int16')

    plt.plot(audiodata)
    plt.draw()
    plt.pause(0.001)
    plt.cla()

if __name__=='__main__':
    (audio,stream) = GrabAudio()

    while True:
        try:
            ReadPlotData(stream)
        except KeyboardInterrupt:
            break

    ReleaseAudio(audio,stream)

## Now Combine PyAudio & Google Cloud Speech (Now Editing)

In [None]:

from google.cloud import speech

import pyaudio
import matplotlib.pyplot as plt
import numpy as np

MIKE_AUDIO_FORMAT = pyaudio.paInt16
MIKE_AUDIO_RATE = 44100
MIKE_AUDIO_CHANNEL = 1
MIKE_AUDIO_BUFFER_SIZE = 2**11
MIKE_AUDIO_DEVICE_INDEX = 1


config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=MIKE_AUDIO_RATE,
    language_code="ja"
)

stream_continue_flag = True

def GrabAudio():
    audio = pyaudio.PyAudio()
    stream = audio.open(format = MIKE_AUDIO_FORMAT,
                        rate = MIKE_AUDIO_RATE,
                        channels=MIKE_AUDIO_CHANNEL,
                        input_device_index=MIKE_AUDIO_DEVICE_INDEX,
                        input =True,
                        frames_per_buffer = MIKE_AUDIO_BUFFER_SIZE
    )
    return(audio,stream)

def ReleaseAudio(audio, stream):
    stream.stop_stream()
    stream.close()
    audio.terminate()

def CreateSubtitles(response):
    for result in response.results:
        if result.is_final:
            print(result.alternatives[0].transcript)
        else:
            print("    ", result.alternatives[0].transcript)
    
        if result.alternatives[0].transcript == "エンド":
            stream_continue_flag = False






if __name__=='__main__':

    client = speech.SpeechClient()
    print("")
    response = client.recognize(config=config, audio=audio)
    