In [None]:
!add-apt-repository -y ppa:jonathonf/ffmpeg-4
!apt update
!apt install -y ffmpeg

In [None]:
!pip install datasets>=2.6.1
!pip install git+https://github.com/huggingface/transformers
!pip install librosa
!pip install evaluate>=0.30
!pip install jiwer
!pip install gradio
!pip install -q bitsandbytes datasets accelerate
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft

In [None]:
import torch
import os
import nltk
import csv
#import fastwer
from jiwer import cer
#import gradio as gr
from transformers import (
    AutomaticSpeechRecognitionPipeline,
    WhisperForConditionalGeneration,
    WhisperTokenizer,
    WhisperProcessor,
)
from peft import PeftModel, PeftConfig


peft_model_id = "yaygomii/FYP_Whisper_PEFT_TAMIL"
language = "Tamil"
task = "transcribe"
peft_config = PeftConfig.from_pretrained(peft_model_id)
model = WhisperForConditionalGeneration.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto"
)


model = PeftModel.from_pretrained(model, peft_model_id)
tokenizer = WhisperTokenizer.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)
processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path, language=language, task=task)
feature_extractor = processor.feature_extractor
forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
pipe = AutomaticSpeechRecognitionPipeline(model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)

def transcribe(audio):
    with torch.cuda.amp.autocast():
        text = pipe(audio, generate_kwargs={"forced_decoder_ids": forced_decoder_ids}, max_new_tokens=255)["text"]
    return text



In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Transcriptio of the each audio file saved as each Text file in the given save_dir**

In [None]:
import os

# Path to audio directory
src = '/content/drive/MyDrive/Project 2/Testing'

# Directory to save transcription text files
save_dir = '/content/drive/MyDrive/SSNCSE/Whisper_PEFT/Peft/'
os.makedirs(save_dir, exist_ok=True)

# Set the specific audio number you want to transcribe
target_audio_number = "50"

# Match files that start with "Audio - 45_" and end with ".wav"
audio_files = sorted(
    [f for f in os.listdir(src) if f.endswith(".wav") and f.startswith(f"Audio - {target_audio_number}_")],
    key=lambda x: int(''.join(filter(str.isdigit, x)))
)

# Transcription loop for matching files only
for file in audio_files:
    path = os.path.join(src, file)
    print(f"Transcribing: {path}")

    # Transcribe using your model
    transcription = pipe(path, return_timestamps=True)

    print("Transcription:", transcription)

    # Generate .txt file name from audio file name
    filename_wo_ext = os.path.splitext(file)[0]
    txt_filename = f"{filename_wo_ext}.txt"
    txt_path = os.path.join(save_dir, txt_filename)

    # Write transcription to the file
    with open(txt_path, "w", encoding="utf-8") as txt_file:
        txt_file.write(transcription['text'])
