In [1]:
import os
import pyaudio
import wave
import soundfile as sf

In [2]:
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')

for i in range(0, numdevices):
    if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
        print("Input Device id ", i, " - ", p.get_device_info_by_host_api_device_index(0, i).get('name'))

Input Device id  0  -  Microsoft Sound Mapper - Input
Input Device id  1  -  Microphone Array (Intel® Smart 


In [19]:
SOUND_DEVICE_ID = 1
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
MAX_RECORD_SECONDS = 12
WAVE_OUTPUT_DIR = "../data/custom/audio/rachel/"
TRANSCRIPT_DIR = "../data/custom/transcripts/"
TRANSCRIPT_FILE = "../data/custom/source.txt"

In [20]:
if not os.path.exists(WAVE_OUTPUT_DIR):
    os.makedirs(WAVE_OUTPUT_DIR)

if not os.path.exists(TRANSCRIPT_DIR):
    os.makedirs(TRANSCRIPT_DIR)

In [21]:
def record_audio(filename):
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    input_device_index=SOUND_DEVICE_ID,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("* recording")

    frames = []

    for _ in range(0, int(RATE / CHUNK * MAX_RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("* done recording")

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(filename, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

In [22]:
def trim_audio(filename):
    data, samplerate = sf.read(filename)
    if len(data) > samplerate * MAX_RECORD_SECONDS:
        data = data[:samplerate * MAX_RECORD_SECONDS]
        sf.write(filename, data, samplerate)

In [23]:
def write_transcript(filename, transcript):
    with open(filename, 'w') as f:
        f.write(transcript)

In [24]:
def read_transcripts(file):
    with open(file, 'r') as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        audio_filename = os.path.join(WAVE_OUTPUT_DIR, f'speech_{i+1:02}.wav')
        transcript_filename = os.path.join(TRANSCRIPT_DIR, f'speech_{i+1:02}.txt')

        while not (os.path.exists(audio_filename) and os.path.exists(transcript_filename)):
            print(f"Please read the following line: {line}")
            input("Press Enter when you're ready to start recording...")

            record_audio(audio_filename)
            trim_audio(audio_filename)

            print("Recording complete. Press 'r' to redo the recording or any other key to save it.")
            user_input = input()

            if user_input.lower() != 'r':
                write_transcript(transcript_filename, line)
                print(f"Recording for line {i+1} saved!\n")
            else:
                print("Redoing the recording...\n")



In [25]:
read_transcripts(TRANSCRIPT_FILE)




Please read the following line: Attention, students. The National University of Singapore is hosting a career fair this week, bringing together top employers from various industries. Dress professionally and prepare your resumes.

* recording
* done recording
Recording complete. Press 'r' to redo the recording or any other key to save it.
Redoing the recording...

Please read the following line: What's the latest buzz? Did you hear about that new café that just opened?

* recording
* done recording
Recording complete. Press 'r' to redo the recording or any other key to save it.
Recording for line 2 saved!

Please read the following line: Hello, shoppers. The Great Singapore Sale is back. Enjoy huge discounts at major shopping malls across the city.

* recording
* done recording
Recording complete. Press 'r' to redo the recording or any other key to save it.
Recording for line 3 saved!

Please read the following line: Hello, everyone! I'm delighted to announce that our hawker center has