
## Model link: 
https://huggingface.co/ishmamzarif/augmented_normal_v2_extra_dataset_bangla-whisper-epoch-6

# Configuration Cell

In [7]:
DATA_DIR = "/kaggle/input/shobdotori/Test"
MODEL_ID = "ishmamzarif/augmented_normal_v2_extra_dataset_bangla-whisper-epoch-6"
OUTPUT_DIR = "./"
OUTPUT_FILE = "Labyrinth_predictions_hidden.csv"

# Importing Dependencies and setting seed

In [None]:
# It might show an attribute warning due to kaggle environment 
# However the program runs smoothly and generates the "TeamName_predictions_hidden.csv" properly file
# Does not cause any issues

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = '1'

import sys
import io
import warnings
warnings.filterwarnings('ignore')

import torch
import datasets
import librosa
import soundfile
import random
import numpy as np 
import pandas as pd
from tqdm.auto import tqdm
import time

# Suppress transformers import warnings
import logging
logging.getLogger('transformers').setLevel(logging.ERROR)

old_stderr = sys.stderr
sys.stderr = io.StringIO()

try:
    from transformers import (
        WhisperProcessor,
        WhisperForConditionalGeneration
    )
finally:
    sys.stderr = old_stderr

def set_seed(seed: int = 42):
    """Set seed for reproducibility across all libraries"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    print(f"Global seed set to {seed}")

set_seed(42)

Global seed set to 42


# Loading the model

In [8]:
print(f"Loading {MODEL_ID}...")

processor = WhisperProcessor.from_pretrained(MODEL_ID)
model = WhisperForConditionalGeneration.from_pretrained(MODEL_ID)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()



print("Model and processor loaded successfully")
print(f"Model parameters: {model.num_parameters() / 1e6:.1f}M")

Loading ishmamzarif/augmented_normal_v2_extra_dataset_bangla-whisper-epoch-6...
Model and processor loaded successfully
Model parameters: 241.7M


# Loading test files

In [9]:
print("Loading test files...\n")

test_files = sorted([f for f in os.listdir(DATA_DIR) if f.endswith('.wav')])

print(f"Found {len(test_files)} test files")

if len(test_files) == 0:
    print("\nWARNING: No test files found! Check DATA_DIR")
else:
    print(f"\nTest data loaded successfully!")

Loading test files...

Found 450 test files

Test data loaded successfully!


# Inference

In [10]:
start_time = time.time()

print("Generating test predictions...\n")
print(f"Processing {len(test_files)} test files...\n")

predictions = []
errors = []

for i, audio_file in enumerate(tqdm(test_files, desc="Transcribing")):
    audio_path = os.path.join(DATA_DIR, audio_file)

    try:
        # Load and resample audio to 16kHz
        audio_array, sr = librosa.load(audio_path, sr=16000)

        # Extract features
        input_features = processor.feature_extractor(
            audio_array,
            sampling_rate=16000,
            return_tensors="pt"
        ).input_features

        input_features = input_features.to(device)

        # Generate transcription
        with torch.no_grad():
            predicted_ids = model.generate(
                input_features,
                language="bn",
                task="transcribe",
                max_length=225
            )

        # Decode prediction
        transcription = processor.tokenizer.batch_decode(
            predicted_ids,
            skip_special_tokens=True
        )[0]

        predictions.append({
            'audio': audio_file,
            'text': transcription
        })

    except Exception as e:
        error_msg = f"Error processing {audio_file}: {str(e)}"
        errors.append(error_msg)
        print(f"\n{error_msg}")

        # Add empty prediction
        predictions.append({
            'audio': audio_file,
            'text': ""
        })

print(f"  Total predictions: {len(predictions)}")
print(f"  Errors: {len(errors)}")

submission_df = pd.DataFrame(predictions)
submission_df.to_csv(OUTPUT_FILE, index=False, encoding='utf-8')

end_time = time.time()
total_time = end_time - start_time
print(f"Total Execution Time: {total_time:.2f} seconds")
print(f"Output file ’{OUTPUT_FILE}’ generated successfully!")

Generating test predictions...

Processing 450 test files...



Transcribing:   0%|          | 0/450 [00:00<?, ?it/s]

  Total predictions: 450
  Errors: 0
Total Execution Time: 368.16 seconds
Output file ’Labyrinth_predictions_hidden.csv’ generated successfully!
