<a href="https://colab.research.google.com/github/JayaManasa/speech-to-text-transcriber/blob/main/whisper_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import librosa

# Load audio file
audio_path = "/content/sample_data/WhatsApp Ptt 2024-10-29 at 3.59.39 PM.ogg"
audio, sr = librosa.load(audio_path, sr=16000)

# Initialize Whisper model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")

# Process audio
input_features = processor(audio, sampling_rate=sr, return_tensors="pt").input_features

# Generate transcription
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

print("Transcription:", transcription[0])

Transcription:  ہاپی بات دیا سائل جا


In [7]:
from transformers import pipeline
import torch

# Initialize pipeline
transcriber = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base",
    torch_dtype=torch.float16,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Transcribe audio
result = transcriber("/content/sample_data/WhatsApp Ptt 2024-10-29 at 3.59.39 PM.ogg")
print("Transcription:", result["text"])

Transcription:  ہاپی بات دیا سائل جا




In [12]:
import os
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import librosa
import torch

def batch_transcribe_indian_english():
    # Initialize model and processor once
    processor = WhisperProcessor.from_pretrained("openai/whisper-base")
    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
    model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="en", task="transcribe")

    # Directory containing audio files
    directory = "/content/sample_data/whatsapp"

    for filename in os.listdir(directory):
        if filename.endswith(".ogg"):
            file_path = os.path.join(directory, filename)
            try:
                # Load and process audio
                audio, sampling_rate = librosa.load(file_path, sr=16000)
                input_features = processor(
                    audio,
                    sampling_rate=sampling_rate,
                    return_tensors="pt"
                ).input_features

                # Generate transcription
                predicted_ids = model.generate(
                    input_features,
                    language="en",
                    task="transcribe",
                    temperature=0.2,
                    no_repeat_ngram_size=3,
                    num_beams=5
                )

                # Decode and save
                transcription = processor.batch_decode(
                    predicted_ids,
                    skip_special_tokens=True,
                    normalize=True
                )[0]

                # Save to file
                output_filename = os.path.splitext(filename)[0] + ".txt"
                output_path = os.path.join(directory, output_filename)
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(transcription)

                print(f"Transcribed {filename}: {transcription}")

            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")

# Run batch transcription
batch_transcribe_indian_english()

Transcribed WhatsApp Ptt 2024-11-07 at 9.41.07 PM.ogg: all the pictures of the bride
Transcribed WhatsApp Ptt 2024-10-30 at 8.06.52 AM.ogg: thank you for the new album happy geri geri san toshom
Transcribed WhatsApp Ptt 2024-11-07 at 9.41.29 PM.ogg: you may pictures of dancing photos
Transcribed WhatsApp Ptt 2024-11-07 at 9.41.03 PM.ogg: one pictures from red tarbun
Transcribed WhatsApp Ptt 2024-10-29 at 3.59.39 PM.ogg: happy birthday saeeda
Transcribed WhatsApp Ptt 2024-10-29 at 6.34.26 PM.ogg: i am jan madinath subhaka angelo chappinapadad nagaru lata viji kiran vanajak neelima narendar nayana andarki dhanyavadalu
Transcribed WhatsApp Ptt 2024-11-07 at 9.40.54 PM.ogg: me photos of red dress
