In [None]:
# Hindi Audio Transcription and Translation
# Complete solution for Google Colab

# Step 1: Install required packages
!pip install torch torchvision torchaudio
!pip install transformers
!pip install openai-whisper
!pip install librosa
!pip install soundfile

import whisper
import torch
from transformers import MarianMTModel, MarianTokenizer
import librosa
import soundfile as sf
import numpy as np
import os
from google.colab import files
import warnings
warnings.filterwarnings("ignore")

class HindiTranscriptionTranslator:
    def __init__(self):
        """Initialize the transcription and translation models"""
        print("Loading models... This may take a few minutes on first run.")
        
        # Load Whisper model for speech-to-text
        print("Loading Whisper model...")
        self.whisper_model = whisper.load_model("base")  # You can use "small", "medium", "large" for better accuracy
        
        # Load MarianMT model for Hindi to English translation
        print("Loading translation model...")
        self.translation_model_name = "Helsinki-NLP/opus-mt-hi-en"
        self.tokenizer = MarianTokenizer.from_pretrained(self.translation_model_name)
        self.translation_model = MarianMTModel.from_pretrained(self.translation_model_name)
        
        print("Models loaded successfully!")
    
    def preprocess_audio(self, audio_path):
        """Preprocess audio file for Whisper"""
        # Load audio file
        audio, sr = librosa.load(audio_path, sr=16000)  # Whisper expects 16kHz
        return audio
    
    def transcribe_audio(self, audio_path):
        """Transcribe Hindi audio to Hindi text"""
        print(f"Transcribing audio: {audio_path}")
        
        # Transcribe with Whisper
        result = self.whisper_model.transcribe(audio_path, language="hi")
        hindi_text = result["text"]
        
        print(f"Hindi Transcription: {hindi_text}")
        return hindi_text
    
    def translate_text(self, hindi_text):
        """Translate Hindi text to English"""
        print("Translating to English...")
        
        # Tokenize the Hindi text
        inputs = self.tokenizer(hindi_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        
        # Generate translation
        with torch.no_grad():
            outputs = self.translation_model.generate(**inputs, max_length=512, num_beams=4, early_stopping=True)
        
        # Decode the translation
        english_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        print(f"English Translation: {english_text}")
        return english_text
    
    def process_audio_file(self, audio_path):
        """Complete pipeline: audio -> Hindi text -> English translation"""
        try:
            # Step 1: Transcribe audio to Hindi text
            hindi_text = self.transcribe_audio(audio_path)
            
            # Step 2: Translate Hindi text to English
            english_text = self.translate_text(hindi_text)
            
            return {
                "hindi_text": hindi_text,
                "english_text": english_text
            }
            
        except Exception as e:
            print(f"Error processing audio: {str(e)}")
            return None

# Usage example and testing functions

def upload_and_process():
    """Upload audio file and process it"""
    print("Please upload your Hindi audio file (supported formats: wav, mp3, m4a, etc.)")
    uploaded = files.upload()
    
    if not uploaded:
        print("No file uploaded!")
        return
    
    # Get the uploaded file path
    audio_file = list(uploaded.keys())[0]
    print(f"Processing file: {audio_file}")
    
    # Initialize the processor
    processor = HindiTranscriptionTranslator()
    
    # Process the audio
    result = processor.process_audio_file(audio_file)
    
    if result:
        print("\n" + "="*50)
        print("RESULTS:")
        print("="*50)
        print(f"Hindi Text: {result['hindi_text']}")
        print(f"English Translation: {result['english_text']}")
        print("="*50)
        
        # Save results to file
        with open("transcription_results.txt", "w", encoding="utf-8") as f:
            f.write(f"Hindi Transcription: {result['hindi_text']}\n")
            f.write(f"English Translation: {result['english_text']}\n")
        
        print("Results saved to 'transcription_results.txt'")
        
        # Download results file
        files.download("transcription_results.txt")
    
    return result

def test_with_sample_text():
    """Test translation with sample Hindi text"""
    print("Testing translation with sample Hindi text...")
    
    processor = HindiTranscriptionTranslator()
    
    # Sample Hindi texts for testing
    sample_texts = [
        "नमस्ते, मेरा नाम राहुल है।",
        "आज मौसम बहुत अच्छा है।",
        "मैं भारत से हूँ और हिंदी बोलता हूँ।"
    ]
    
    for hindi_text in sample_texts:
        print(f"\nHindi: {hindi_text}")
        english_text = processor.translate_text(hindi_text)
        print(f"English: {english_text}")

def create_sample_audio():
    """Create a sample audio file for testing (using text-to-speech)"""
    try:
        !pip install gTTS
        from gtts import gTTS
        
        # Sample Hindi text
        hindi_text = "नमस्ते, मेरा नाम राहुल है और मैं भारत से हूँ।"
        
        # Create TTS audio
        tts = gTTS(text=hindi_text, lang='hi')
        tts.save("sample_hindi_audio.mp3")
        
        print("Sample audio created: sample_hindi_audio.mp3")
        return "sample_hindi_audio.mp3"
        
    except Exception as e:
        print(f"Could not create sample audio: {e}")
        return None

# Main execution
if __name__ == "__main__":
    print("Hindi Audio Transcription and Translation System")
    print("=" * 50)
    
    print("\nChoose an option:")
    print("1. Upload and process your Hindi audio file")
    print("2. Test translation with sample Hindi text")
    print("3. Create and process sample audio")
    
    choice = input("\nEnter your choice (1/2/3): ")
    
    if choice == "1":
        upload_and_process()
    elif choice == "2":
        test_with_sample_text()
    elif choice == "3":
        sample_file = create_sample_audio()
        if sample_file:
            processor = HindiTranscriptionTranslator()
            result = processor.process_audio_file(sample_file)
    else:
        print("Invalid choice. Running upload and process by default...")
        upload_and_process()

# Alternative: Direct function calls for custom usage
"""
# For direct usage in your code:

# Initialize the processor
processor = HindiTranscriptionTranslator()

# Process an audio file
result = processor.process_audio_file("your_audio_file.wav")

# Or just translate text
english_text = processor.translate_text("आपका हिंदी टेक्स्ट यहाँ")
"""

print("\n" + "="*50)
print("Setup complete! Run the cells above to start processing.")
print("="*50)

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

2025-08-01 16:32:16.760030: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754065937.122558      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754065937.222158      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Hindi Audio Transcription and Translation System

Choose an option:
1. Upload and process your Hindi audio file
2. Test translation with sample Hindi text
3. Create and process sample audio



Enter your choice (1/2/3):  1


Please upload your Hindi audio file (supported formats: wav, mp3, m4a, etc.)


In [None]:
if __name__ == "__main__":
    print("Hindi Audio Transcription and Translation System")
    print("=" * 50)
    
    print("\nChoose an option:")
    print("1. Upload and process your Hindi audio file")
    print("2. Test translation with sample Hindi text")
    print("3. Create and process sample audio")
    
    choice = input("\nEnter your choice (1/2/3): ")
    
    if choice == "1":
        upload_and_process()
    elif choice == "2":
        test_with_sample_text()
    elif choice == "3":
        sample_file = create_sample_audio()
        if sample_file:
            processor = HindiTranscriptionTranslator()
            result = processor.process_audio_file(sample_file)
    else:
        print("Invalid choice. Running upload and process by default...")
        upload_and_process()

# Alternative: Direct function calls for custom usage
"""
# For direct usage in your code:

# Initialize the processor
processor = HindiTranscriptionTranslator()

# Process an audio file
result = processor.process_audio_file("your_audio_file.wav")

# Or just translate text
english_text = processor.translate_text("आपका हिंदी टेक्स्ट यहाँ")
"""

print("\n" + "="*50)
print("Setup complete! Run the cells above to start processing.")
print("="*50)