In [1]:
!pip install gradio
!pip install -U openai-whisper
!pip install sentence_transformers
!pip install transformers



# ASR Model Used to generate txt files from audio
It was run on kaggle because we wanted to use GPU for it to be fast.
## This needs to be run for single mp3 file not on entire folder once as it would take a lot of power for entire folder to be used at a time

In [None]:
import torch
import whisper
import librosa
import concurrent.futures

# Load the medium Whisper model on GPU (if available)
device = "cuda" if torch.cuda.is_available() else "cpu"  # Check if GPU is available, else fallback to CPU
model_m = whisper.load_model("medium", device=device)

# File paths for input audio files and output transcription files
input_file_paths = [
    r"/kaggle/input/sandalwood/audiocorpus/SandalWoodNewsStories_112.mp3" # Kannada audio file path # according to kaggle environment
    # Add more file paths here for batch processing
]
output_file_paths = [
    r'/kaggle/working/all_transcription.txt' # Kannada transcription file path # according to kaggle environment # append mode to put all transcriptions in one file all_transcription.txt
    # Corresponding output paths for batch files
]

def transcribe_audio(input_file_path, output_file_path):
    # Load and resample audio to 16kHz with librosa
    audio, sr = librosa.load(input_file_path, sr=16000)
    whisper_audio = torch.tensor(audio, dtype=torch.float32).to(device)  # Move audio tensor to GPU

    # Perform transcription
    result = model_m.transcribe(whisper_audio, language="en", fp16=False)  # 'kn' for Kannada
    transcription = result["text"]
    
    # Save Kannada transcription to file
    with open(output_file_path, 'a', encoding='utf-8') as f:
        f.write(transcription)
        f.write("\n")
    
    return transcription

# Use concurrent futures to process multiple files in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
    # Map function to the file paths (batch processing)
    transcriptions = list(executor.map(transcribe_audio, input_file_paths, output_file_paths))

# # Print transcriptions (optional)
# for transcription in transcriptions:
#     print("Transcription:", transcription)
