# Speech-to-Text with Whisper API and your Device Microphone

Learn how to record audio from your device's microphone, transcribe the audio using OpenAI's Whisper API, and copy the transcription result to your clipboard (optional).

### Setup

`brew install portaudio`
`pip install pyaudio wave openai tempfile simpleaudio os`

In [43]:
import simpleaudio as sa
import openai
import pyaudio
import wave
import tempfile
import os

def transcribe_audio(runPlayback=False):
    """
    Records audio directly from the microphone and transcribes it to text using OpenAI's API.

    Returns:
        str: The transcription of the recorded audio.
    """
    # Create a temporary file to store the recorded audio
    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    temp_file_name = temp_file.name

    def callback(data_input, frame_count, time_info, status):
        wav_file.writeframes(data_input)
        return None, pyaudio.paContinue

    # Open the wave file for writing
    with wave.open(temp_file_name, 'wb') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)  # 16 bits per sample divided by 8
        wav_file.setframerate(16000)

        # Initialize PyAudio and start recording audio
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16,
                            channels=1,
                            rate=16000,
                            input=True,
                            frames_per_buffer=1024,
                            stream_callback=callback)

        input("Press Enter to stop recording...")

        # Stop and close the audio stream
        stream.stop_stream()
        stream.close()
        audio.terminate()

    # Play the recorded audio - debug
    if runPlayback:
        playback = sa.WaveObject.from_wave_file(temp_file.name)
        play_obj = playback.play()
        play_obj.wait_done()  # Wait until playback is finished

    # Transcribe the audio to text using OpenAI's API
    with open(temp_file_name, 'rb') as audio_file:
        response = openai.Audio.transcribe(
            file=audio_file,
            model="whisper-1",
            prompt="Carl talks to Hadavand"
        )
        transcription = response['text'].strip()

    # Delete the temporary audio file
    os.remove(temp_file_name)

    return transcription


In [44]:
transcribe_audio(runPlayback=True)

NameError: name 'temp_file' is not defined