# Audio Transcription with OpenAI's Whisper

In [1]:
%pip install -qU langchain-openai langchain-community pydub librosa

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

OPENAI_API_KEY  = os.getenv('OPENAI_API_KEY')

## Load the test audio

In [12]:
local = False

In [None]:
from langchain_community.document_loaders.parsers.audio import (
    OpenAIWhisperParser,
    OpenAIWhisperParserLocal,
)

# Load the OpenAI Whisper parser with the necessary configurations
if local:
    parser = OpenAIWhisperParserLocal(
      device="cpu",  # Use "cuda" for GPU support if available
      lang_model="whisper-1",  # Specify the Whisper model to use
      batch_size=1,  # Set the batch size for processing
      chunk_length=30,  # seconds
      language="fr",  # Specify the language of the audio
    )
else:
    parser = OpenAIWhisperParser(
      api_key=OPENAI_API_KEY,
      language="fr",  # Specify the language of the audio
      response_format="json",  # Specify the response format
      model="whisper-1"  # Specify the Whisper model to use
    )

In [None]:
from pathlib import Path
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader

audio_path = Path.cwd().parent / "data" / "audio" / "test_whisper.m4a"
blob_loader = FileSystemBlobLoader(audio_path)

loader = GenericLoader(blob_loader, parser)

docs = loader.load()

Transcribing part 1!


In [16]:
for doc in docs:
    print("Transcription :", doc.page_content)

Transcription : Bonjour, je m'appelle Joachim Jasmin. Ceci est un enregistrement.
