# Using PyAudio to record WAV file (defalut record_seconds)

In [1]:
"""Source: https://people.csail.mit.edu/hubert/pyaudio/"""
"""PyAudio example: Record a few seconds of audio and save to a WAV file."""

import pyaudio
import wave

CHUNK = 1024
FORMAT = pyaudio.paInt16  # 16 bits equal to 2 bytes
CHANNELS = 2
RATE = 44100          # 採樣速率(Khz/sec)
RECORD_SECONDS = 5    # 固定錄音時間(sec)
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()     # Create object

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))   # 用bit資料型態儲存資料
wf.close()

* recording
* done recording


# Try to understand what frame is

In [None]:
frame

In [3]:
len(frames)

215

In [None]:
frames[0]

In [5]:
len(frames[0])

4096

In [6]:
RATE / CHUNK * RECORD_SECONDS

215.33203125

In [7]:
44100 / 1024

43.06640625

In [8]:
43 * 5

215

# Speech-to-Text Application feat. Google Cloud Speech-to-Text API

In [12]:
import os

from google.cloud import speech, storage

YOUR_SERVICE = 'YOUR_SERVICE ACCOUNT KEY'   # YOUR_SERVICE ACCOUNT KEY(a json file)
YOUR_AUDIO = './output.wav'  # YOUR_AUDIO FILE
YOUR_BUCKET = 'YOUR_BUCKET NAME'  # YOUR_BUCKET NAME  # note: service account should have cloud storage access

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = YOUR_SERVICE
storage_client = storage.Client()
speech_client = speech.SpeechClient()

# upload file to GCS(Google Cloud Storage)
bucket = storage_client.bucket(YOUR_BUCKET)
bucket.blob(YOUR_AUDIO).upload_from_filename(YOUR_AUDIO)
uri = f'gs://{YOUR_BUCKET}/{YOUR_AUDIO}'

# transcript the audio
audio = speech.RecognitionAudio(uri=uri)
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,  # If your file format is WAV or FLAC, you do not need to specify an AudioEncoding actually
    sample_rate_hertz=44100,
    audio_channel_count=2,  # take care, default is 1
    language_code="zh-TW",
    max_alternatives=10
)
response = speech_client.recognize(config=config, audio=audio)

for r in response.results:
    print(f'{r.alternatives[0].transcript}, {r.alternatives[0].confidence:.3f}')

這是5秒鐘的測試聽一下聲音看怎麼樣嘿嘿嘿, 0.926


# Using PyAudio to record WAV file (optional recording time)

In [26]:
"""Source: https://people.csail.mit.edu/hubert/pyaudio/"""
"""PyAudio example: Record a few seconds of audio and save to a WAV file."""

import pyaudio
import wave
import threading

CHUNK = 1024
FORMAT = pyaudio.paInt16  
CHANNELS = 2
RATE = 44100          
WAVE_OUTPUT_FILENAME = "output2.wav"

def user_stop():
    input('[enter] to stop...')
    stream.stop_stream()

p = pyaudio.PyAudio()   

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

user_stop_thread = threading.Thread(target=user_stop)
user_stop_thread.start()

while stream.is_active():
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")
user_stop_thread.join()


stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))   
wf.close()

* recording
[enter] to stop...
* done recording
