In [9]:
!pip install -q fastapi uvicorn python-multipart
!pip install -q faster-whisper
!pip install -q pyngrok nest-asyncio
!apt update &>/dev/null
!apt install ffmpeg &>/dev/null

📦 Installing packages...
✅ All packages installed successfully!


In [10]:
import torch
import os
import tempfile
import subprocess
import logging
import threading
import time
import nest_asyncio
import asyncio
from pathlib import Path
from pyngrok import ngrok
from IPython.display import display, HTML

from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, HTMLResponse
import uvicorn
from faster_whisper import WhisperModel

nest_asyncio.apply()

print("System Information:")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("Using CPU (slower but works)")

print("All imports successful!")

🔍 System Information:
✅ PyTorch version: 2.6.0+cu124
✅ CUDA available: True
✅ GPU: Tesla T4
✅ GPU Memory: 14.7 GB
✅ All imports successful!


In [11]:
print("Setting up ngrok...")

# Option 1: Set your token directly here (easiest)
NGROK_TOKEN = ""  # Paste your token between the quotes

# Option 2: Enter interactively
if not NGROK_TOKEN:
    try:
        import getpass
        NGROK_TOKEN = getpass.getpass("Enter your ngrok token (or press Enter to skip): ")
    except:
        pass

# Apply token
if NGROK_TOKEN and len(NGROK_TOKEN) > 10:
    try:
        ngrok.set_auth_token(NGROK_TOKEN)
        print("ngrok authenticated successfully!")
    except Exception as e:
        print(f"ngrok issue: {e}")
        print("You can still run the app locally")
else:
    print("No ngrok token - app will run locally only")
    print("Get free token at: https://dashboard.ngrok.com/get-started/your-authtoken")

🌐 Setting up ngrok...
Enter your ngrok token (or press Enter to skip): ··········
✅ ngrok authenticated successfully!


In [12]:
# Cell 4: Load Whisper AI model
print("Loading Whisper model...")
print("This may take 1-2 minutes for first download...")

try:
    # Determine device and settings
    device = "cuda" if torch.cuda.is_available() else "cpu"
    compute_type = "float16" if device == "cuda" else "int8"

    print(f"Using device: {device}")
    print(f"Compute type: {compute_type}")

    # Load the model
    model = WhisperModel("large-v3", device=device, compute_type=compute_type)

    print("Whisper large-v3 model loaded successfully!")
    print("Ready for Arabic, English, and 90+ languages!")

except Exception as e:
    print(f"Error loading model: {e}")
    print("Trying with smaller model...")
    try:
        model = WhisperModel("base", device="cpu", compute_type="int8")
        print("Backup model loaded (may be less accurate)")
    except:
        model = None
        print("Model loading failed completely")

🤖 Loading Whisper model...
⏳ This may take 1-2 minutes for first download...
📱 Using device: cuda
⚙️ Compute type: float16


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

vocabulary.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

preprocessor_config.json:   0%|          | 0.00/340 [00:00<?, ?B/s]

model.bin:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

✅ Whisper large-v3 model loaded successfully!
🌍 Ready for Arabic, English, and 90+ languages!


In [13]:
# Cell 5: Audio processing functions
def convert_to_mp3(input_path: str, output_path: str) -> bool:
    """Convert audio to MP3 format"""
    try:
        cmd = [
            'ffmpeg', '-i', input_path,
            '-acodec', 'libmp3lame',
            '-ar', '16000',  # 16kHz sample rate
            '-ac', '1',      # mono
            '-b:a', '64k',   # 64kbps bitrate
            '-y',            # overwrite
            output_path
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        return result.returncode == 0
    except Exception as e:
        print(f"FFmpeg error: {e}")
        return False

def transcribe_audio(audio_path: str) -> dict:
    """Transcribe audio using Whisper"""
    try:
        if not model:
            raise Exception("Whisper model not loaded")

        # Transcribe with optimal settings
        segments, info = model.transcribe(
            audio_path,
            task="transcribe",        # Keep original language
            vad_filter=True,          # Voice activity detection
            beam_size=5,              # Balance speed/accuracy
            language=None,            # Auto-detect language
            temperature=0.0           # Deterministic results
        )

        # Language name mapping
        language_names = {
            'en': 'English',
            'ar': 'Arabic (العربية)',
            'fr': 'French',
            'es': 'Spanish',
            'de': 'German',
            'zh': 'Chinese',
            'ja': 'Japanese',
            'ko': 'Korean',
            'ru': 'Russian',
            'hi': 'Hindi',
            'ur': 'Urdu',
            'tr': 'Turkish',
            'fa': 'Persian'
        }

        # Process segments
        full_text = ""
        segments_list = []

        for segment in segments:
            full_text += segment.text + " "
            segments_list.append({
                "start": round(segment.start, 2),
                "end": round(segment.end, 2),
                "text": segment.text.strip()
            })

        # Check for Arabic characters
        contains_arabic = any('\u0600' <= char <= '\u06FF' for char in full_text)

        return {
            "transcription": full_text.strip(),
            "language": info.language,
            "language_name": language_names.get(info.language, info.language.upper()),
            "language_probability": round(info.language_probability, 2),
            "duration": round(info.duration, 2),
            "contains_arabic": contains_arabic,
            "segments": segments_list,
            "word_count": len(full_text.split())
        }

    except Exception as e:
        print(f"Transcription error: {e}")
        raise Exception(f"Transcription failed: {str(e)}")

print("Helper functions created successfully!")

✅ Helper functions created successfully!


In [16]:
# Cell 6: Create FastAPI App (UPDATED WITH WHITE BACKGROUND + ANIMATIONS)
app = FastAPI(title="Speech-to-Text API", version="1.0.0")

# Add CORS middleware for browser access
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Frontend HTML with WHITE background and COOL animations
HTML_CONTENT = '''<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>🎙️ Speech to Text - Colab</title>
    <style>
        * { margin: 0; padding: 0; box-sizing: border-box; }

        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background: white;
            min-height: 100vh; display: flex; align-items: center; justify-content: center;
            color: #333; overflow: hidden; position: relative;
        }

        /* Animated background elements */
        .bg-animation {
            position: absolute; top: 0; left: 0; width: 100%; height: 100%;
            pointer-events: none; z-index: -1;
        }

        .floating-circle {
            position: absolute; border-radius: 50%; opacity: 0.1;
            animation: float 8s ease-in-out infinite;
        }

        .floating-circle:nth-child(1) {
            width: 100px; height: 100px; background: #667eea;
            top: 10%; left: 10%; animation-delay: 0s;
        }
        .floating-circle:nth-child(2) {
            width: 150px; height: 150px; background: #764ba2;
            top: 15%; right: 15%; animation-delay: 2s;
        }
        .floating-circle:nth-child(3) {
            width: 80px; height: 80px; background: #52c7f2;
            bottom: 20%; left: 20%; animation-delay: 4s;
        }
        .floating-circle:nth-child(4) {
            width: 120px; height: 120px; background: #f093fb;
            bottom: 15%; right: 25%; animation-delay: 1s;
        }
        .floating-circle:nth-child(5) {
            width: 90px; height: 90px; background: #a8e6cf;
            top: 60%; left: 5%; animation-delay: 3s;
        }
        .floating-circle:nth-child(6) {
            width: 110px; height: 110px; background: #ffd93d;
            top: 70%; right: 10%; animation-delay: 5s;
        }

        @keyframes float {
            0%, 100% { transform: translateY(0px) rotate(0deg) scale(1); }
            25% { transform: translateY(-30px) rotate(90deg) scale(1.1); }
            50% { transform: translateY(0px) rotate(180deg) scale(0.9); }
            75% { transform: translateY(-15px) rotate(270deg) scale(1.05); }
        }

        /* Particle system */
        .particles {
            position: absolute; top: 0; left: 0; width: 100%; height: 100%;
            pointer-events: none; z-index: -1;
        }

        .particle {
            position: absolute; width: 4px; height: 4px; background: #667eea;
            border-radius: 50%; opacity: 0.3;
            animation: particleFloat 12s linear infinite;
        }

        @keyframes particleFloat {
            0% { transform: translateY(100vh) rotate(0deg); opacity: 0; }
            10% { opacity: 0.3; }
            90% { opacity: 0.3; }
            100% { transform: translateY(-100px) rotate(360deg); opacity: 0; }
        }

        .container {
            text-align: center;
            background: rgba(255, 255, 255, 0.95);
            backdrop-filter: blur(20px);
            border-radius: 30px; padding: 60px 40px;
            box-shadow: 0 25px 80px rgba(0, 0, 0, 0.1);
            border: 2px solid rgba(102, 126, 234, 0.2);
            max-width: 500px; width: 90%; position: relative;
            animation: containerEntrance 1s ease-out;
            transition: transform 0.3s ease, box-shadow 0.3s ease;
        }

        .container:hover {
            transform: translateY(-10px);
            box-shadow: 0 35px 100px rgba(0, 0, 0, 0.15);
        }

        @keyframes containerEntrance {
            0% { opacity: 0; transform: translateY(50px) scale(0.9); }
            100% { opacity: 1; transform: translateY(0) scale(1); }
        }

        h1 {
            margin-bottom: 20px; font-size: 2.8em; font-weight: 300;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            -webkit-background-clip: text; -webkit-text-fill-color: transparent;
            background-clip: text; animation: titleGlow 3s ease-in-out infinite alternate;
        }

        @keyframes titleGlow {
            0% { filter: brightness(1) drop-shadow(0 0 5px rgba(102, 126, 234, 0.3)); }
            100% { filter: brightness(1.2) drop-shadow(0 0 15px rgba(102, 126, 234, 0.6)); }
        }

        .colab-badge {
            background: linear-gradient(135deg, #F9AB00 0%, #FF6D01 100%);
            color: white; padding: 10px 25px; border-radius: 25px;
            font-size: 14px; margin-bottom: 35px; display: inline-block;
            font-weight: 600; animation: badgePulse 4s ease-in-out infinite;
            box-shadow: 0 5px 15px rgba(249, 171, 0, 0.3);
        }

        @keyframes badgePulse {
            0%, 100% { transform: scale(1); }
            50% { transform: scale(1.05); }
        }

        .mic-container {
            margin: 50px 0; position: relative;
            animation: micEntrance 1.2s ease-out 0.5s both;
        }

        @keyframes micEntrance {
            0% { opacity: 0; transform: scale(0.3) rotate(-180deg); }
            60% { transform: scale(1.1) rotate(10deg); }
            100% { opacity: 1; transform: scale(1) rotate(0deg); }
        }

        .mic-icon {
            width: 160px; height: 160px; border-radius: 50%;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            display: flex; align-items: center; justify-content: center;
            margin: 0 auto; cursor: pointer;
            border: 4px solid rgba(102, 126, 234, 0.3);
            position: relative; overflow: hidden;
            animation: micBreathe 4s ease-in-out infinite;
            transition: all 0.4s cubic-bezier(0.68, -0.55, 0.265, 1.55);
        }

        @keyframes micBreathe {
            0%, 100% { transform: scale(1); }
            50% { transform: scale(1.05); }
        }

        .mic-icon::before {
            content: '';
            position: absolute; top: 0; left: -100%;
            width: 100%; height: 100%;
            background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.4), transparent);
            animation: shimmer 3s ease-in-out infinite;
        }

        @keyframes shimmer {
            0% { left: -100%; }
            100% { left: 100%; }
        }

        .mic-icon:hover {
            transform: scale(1.15) rotate(5deg);
            box-shadow: 0 20px 40px rgba(102, 126, 234, 0.4);
            border-color: rgba(102, 126, 234, 0.6);
        }

        .mic-icon.recording {
            background: linear-gradient(135deg, #ff453a 0%, #ff6b35 100%);
            border-color: rgba(255, 69, 58, 1);
            animation: recordingPulse 1s infinite, micShake 0.1s infinite;
        }

        @keyframes recordingPulse {
            0% { box-shadow: 0 0 0 0 rgba(255, 69, 58, 0.7); }
            70% { box-shadow: 0 0 0 50px rgba(255, 69, 58, 0); }
            100% { box-shadow: 0 0 0 0 rgba(255, 69, 58, 0); }
        }

        @keyframes micShake {
            0%, 100% { transform: translateX(0) scale(1.15); }
            25% { transform: translateX(-2px) scale(1.15); }
            75% { transform: translateX(2px) scale(1.15); }
        }

        .mic-svg { width: 70px; height: 70px; fill: white; z-index: 2; position: relative; }

        /* Sound waves animation */
        .sound-waves {
            position: absolute; top: 50%; left: 50%;
            transform: translate(-50%, -50%); display: none;
        }

        .sound-waves.active { display: block; }

        .wave {
            position: absolute; border: 3px solid rgba(255, 69, 58, 0.6);
            border-radius: 50%; top: 50%; left: 50%;
            transform: translate(-50%, -50%);
            animation: waveExpand 2s ease-out infinite;
        }

        .wave-1 { width: 180px; height: 180px; animation-delay: 0s; }
        .wave-2 { width: 240px; height: 240px; animation-delay: 0.5s; }
        .wave-3 { width: 300px; height: 300px; animation-delay: 1s; }

        @keyframes waveExpand {
            0% { transform: translate(-50%, -50%) scale(0.8); opacity: 1; }
            100% { transform: translate(-50%, -50%) scale(1.4); opacity: 0; }
        }

        .controls {
            display: flex; gap: 30px; justify-content: center; margin-top: 40px;
            animation: controlsSlide 1s ease-out 0.8s both;
        }

        @keyframes controlsSlide {
            0% { opacity: 0; transform: translateY(30px); }
            100% { opacity: 1; transform: translateY(0); }
        }

        .btn {
            padding: 18px 35px; border: none; border-radius: 50px;
            font-size: 16px; font-weight: 600; cursor: pointer;
            min-width: 140px; position: relative; overflow: hidden;
            transition: all 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55);
            transform: translateY(0);
        }

        .btn::before {
            content: ''; position: absolute; top: 0; left: -100%;
            width: 100%; height: 100%;
            background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.4), transparent);
            transition: left 0.6s;
        }

        .btn:hover::before { left: 100%; }

        .btn-record {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white; box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
            border: 2px solid transparent;
        }

        .btn-record:hover {
            transform: translateY(-5px) scale(1.05);
            box-shadow: 0 15px 40px rgba(102, 126, 234, 0.4);
        }

        .btn-record.recording {
            background: linear-gradient(135deg, #ff453a 0%, #ff6b35 100%);
            animation: recordingButton 2s ease-in-out infinite;
        }

        @keyframes recordingButton {
            0%, 100% { transform: translateY(-5px) scale(1.05); }
            50% { transform: translateY(-5px) scale(1.1); }
        }

        .btn-send {
            background: linear-gradient(135deg, #52c7f2 0%, #4facfe 100%);
            color: white; box-shadow: 0 10px 30px rgba(82, 199, 242, 0.3);
            border: 2px solid transparent;
        }

        .btn-send:hover:not(:disabled) {
            transform: translateY(-5px) scale(1.05);
            box-shadow: 0 15px 40px rgba(82, 199, 242, 0.4);
        }

        .btn-send:disabled {
            opacity: 0.5; cursor: not-allowed; transform: none;
        }

        .status {
            margin-top: 25px; font-size: 15px; color: #666;
            animation: statusFade 1s ease-out 1s both;
            transition: all 0.3s ease;
        }

        @keyframes statusFade {
            0% { opacity: 0; }
            100% { opacity: 1; }
        }

        .response {
            margin-top: 25px; padding: 20px;
            background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%);
            border-radius: 15px; text-align: left; display: none;
            border: 2px solid rgba(102, 126, 234, 0.2); color: #333;
            animation: responseSlideIn 0.5s ease-out;
        }

        @keyframes responseSlideIn {
            0% { opacity: 0; transform: translateY(20px); }
            100% { opacity: 1; transform: translateY(0); }
        }

        .response.visible { display: block; }

        .lang-support {
            margin-top: 20px; font-size: 13px; color: #777;
            background: linear-gradient(135deg, rgba(102, 126, 234, 0.1) 0%, rgba(118, 75, 162, 0.1) 100%);
            padding: 10px 20px; border-radius: 25px; display: inline-block;
            animation: langSupportGlow 4s ease-in-out infinite alternate;
            border: 1px solid rgba(102, 126, 234, 0.2);
        }

        @keyframes langSupportGlow {
            0% { box-shadow: 0 0 5px rgba(102, 126, 234, 0.2); }
            100% { box-shadow: 0 0 15px rgba(102, 126, 234, 0.4); }
        }

        /* Decorative rotating gradient */
        .container::before {
            content: ''; position: absolute; top: -2px; left: -2px; right: -2px; bottom: -2px;
            background: conic-gradient(from 0deg, #667eea, #764ba2, #52c7f2, #f093fb, #667eea);
            border-radius: 32px; z-index: -1; opacity: 0.3;
            animation: rotateGradient 10s linear infinite;
        }

        @keyframes rotateGradient {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }

        /* Mobile responsiveness */
        @media (max-width: 600px) {
            .container { padding: 40px 20px; }
            h1 { font-size: 2.2em; }
            .mic-icon { width: 120px; height: 120px; }
            .controls { gap: 20px; }
            .btn { padding: 15px 25px; min-width: 110px; }
        }
    </style>
</head>
<body>
    <!-- Animated background -->
    <div class="bg-animation">
        <div class="floating-circle"></div>
        <div class="floating-circle"></div>
        <div class="floating-circle"></div>
        <div class="floating-circle"></div>
        <div class="floating-circle"></div>
        <div class="floating-circle"></div>
    </div>

    <!-- Particle system -->
    <div class="particles" id="particles"></div>

    <div class="container">
        <h1>🎙️ Speech to Text</h1>
        <div class="colab-badge">🚀 Powered by Google Colab</div>

        <div class="mic-container">
            <div class="mic-icon" id="micIcon">
                <svg class="mic-svg" viewBox="0 0 24 24">
                    <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/>
                    <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/>
                </svg>
            </div>

            <!-- Sound waves -->
            <div class="sound-waves" id="soundWaves">
                <div class="wave wave-1"></div>
                <div class="wave wave-2"></div>
                <div class="wave wave-3"></div>
            </div>
        </div>

        <div class="controls">
            <button class="btn btn-record" id="recordBtn">
                <span id="recordText">Record</span>
            </button>
            <button class="btn btn-send" id="sendBtn" disabled>Send</button>
        </div>

        <div class="status" id="status">Ready to record! 🎤</div>
        <div class="lang-support">🌍 Arabic • English • 90+ Languages</div>

        <div class="response" id="response">
            <strong>📝 Transcription:</strong>
            <p id="transcriptionText"></p>
            <div id="transcriptionDetails"></div>
        </div>
    </div>

    <script>
        // Create floating particles
        function createParticles() {
            const particles = document.getElementById('particles');
            const colors = ['#667eea', '#764ba2', '#52c7f2', '#f093fb', '#a8e6cf', '#ffd93d'];

            setInterval(() => {
                const particle = document.createElement('div');
                particle.className = 'particle';
                particle.style.left = Math.random() * 100 + '%';
                particle.style.backgroundColor = colors[Math.floor(Math.random() * colors.length)];
                particle.style.animationDuration = (Math.random() * 8 + 8) + 's';
                particle.style.opacity = Math.random() * 0.5 + 0.1;
                particles.appendChild(particle);

                setTimeout(() => {
                    particle.remove();
                }, 12000);
            }, 500);
        }

        class SpeechApp {
            constructor() {
                this.mediaRecorder = null;
                this.audioChunks = [];
                this.isRecording = false;
                this.recordedBlob = null;
                this.initElements();
                this.setupEvents();
                this.checkMic();
                createParticles(); // Start particle animation
            }

            initElements() {
                this.micIcon = document.getElementById('micIcon');
                this.recordBtn = document.getElementById('recordBtn');
                this.sendBtn = document.getElementById('sendBtn');
                this.status = document.getElementById('status');
                this.recordText = document.getElementById('recordText');
                this.response = document.getElementById('response');
                this.transcriptionText = document.getElementById('transcriptionText');
                this.soundWaves = document.getElementById('soundWaves');
            }

            setupEvents() {
                this.recordBtn.onclick = () => this.toggleRecording();
                this.micIcon.onclick = () => this.toggleRecording();
                this.sendBtn.onclick = () => this.sendAudio();
            }

            async checkMic() {
                try {
                    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                    stream.getTracks().forEach(track => track.stop());
                    this.status.textContent = '✅ Microphone ready! Click Record to start';
                } catch (error) {
                    this.status.textContent = '❌ Please allow microphone access';
                    this.recordBtn.disabled = true;
                }
            }

            async toggleRecording() {
                if (!this.isRecording) {
                    await this.startRecording();
                } else {
                    this.stopRecording();
                }
            }

            async startRecording() {
                try {
                    const stream = await navigator.mediaDevices.getUserMedia({
                        audio: {
                            echoCancellation: true,
                            noiseSuppression: true,
                            autoGainControl: true
                        }
                    });

                    this.mediaRecorder = new MediaRecorder(stream, {
                        mimeType: 'audio/webm;codecs=opus'
                    });
                    this.audioChunks = [];

                    this.mediaRecorder.ondataavailable = (event) => {
                        this.audioChunks.push(event.data);
                    };

                    this.mediaRecorder.onstop = () => {
                        this.recordedBlob = new Blob(this.audioChunks, { type: 'audio/webm' });
                        this.sendBtn.disabled = false;
                        stream.getTracks().forEach(track => track.stop());
                    };

                    this.mediaRecorder.start();
                    this.isRecording = true;
                    this.updateUI();

                } catch (error) {
                    console.error('Recording error:', error);
                    this.status.textContent = '❌ Error accessing microphone';
                }
            }

            stopRecording() {
                if (this.mediaRecorder && this.isRecording) {
                    this.mediaRecorder.stop();
                    this.isRecording = false;
                    this.updateUI();
                }
            }

            updateUI() {
                if (this.isRecording) {
                    this.micIcon.classList.add('recording');
                    this.recordBtn.classList.add('recording');
                    this.soundWaves.classList.add('active');
                    this.recordText.textContent = 'Stop';
                    this.status.textContent = '🎙️ Recording... (speak in Arabic or English)';
                    this.sendBtn.disabled = true;
                    this.response.classList.remove('visible');
                } else {
                    this.micIcon.classList.remove('recording');
                    this.recordBtn.classList.remove('recording');
                    this.soundWaves.classList.remove('active');
                    this.recordText.textContent = 'Record';
                    this.status.textContent = this.recordedBlob ?
                        '✅ Recording ready! Click Send to transcribe' :
                        '✅ Ready to record';
                }
            }

            async sendAudio() {
                if (!this.recordedBlob) return;

                this.sendBtn.disabled = true;
                this.status.textContent = '🔄 Converting to MP3 and transcribing...';

                try {
                    const formData = new FormData();
                    formData.append('audio', this.recordedBlob, 'recording.webm');

                    const response = await fetch('/api/transcribe', {
                        method: 'POST',
                        body: formData
                    });

                    if (response.ok) {
                        const data = await response.json();
                        if (data.success) {
                            this.displayResult(data);
                        } else {
                            throw new Error('Transcription failed');
                        }
                    } else {
                        const errorData = await response.json();
                        throw new Error(errorData.detail || 'Server error');
                    }
                } catch (error) {
                    console.error('Send error:', error);
                    this.status.textContent = `❌ Error: ${error.message}`;
                }

                this.sendBtn.disabled = false;
            }

            displayResult(data) {
                this.transcriptionText.textContent = data.transcription;
                this.response.classList.add('visible');

                const details = document.getElementById('transcriptionDetails');
                if (details && data.language) {
                    const confidence = Math.round(data.language_probability * 100);
                    details.innerHTML = `
                        <small style="color: #666; margin-top: 15px; display: block; line-height: 1.5;">
                            <strong>🌍 Language:</strong> ${data.language_name} (${confidence}% confidence)<br>
                            <strong>⏱️ Duration:</strong> ${data.duration}s | <strong>📊 Words:</strong> ${data.word_count}
                            ${data.contains_arabic ? '<br><span style="color: #667eea;">🇸🇦 Arabic text detected</span>' : ''}
                        </small>
                    `;
                }

                this.status.textContent = `✅ Transcribed in ${data.language_name}!`;
            }
        }

        // Start the app when page loads
        window.addEventListener('DOMContentLoaded', () => {
            new SpeechApp();
        });
    </script>
</body>
</html>'''

# API Routes (same as before)
@app.get("/")
async def home():
    """Serve the main web interface"""
    return HTMLResponse(content=HTML_CONTENT)

@app.get("/health")
async def health():
    """Health check endpoint"""
    return {
        "status": "running",
        "model_loaded": model is not None,
        "cuda_available": torch.cuda.is_available(),
        "message": "Speech-to-Text API is healthy!"
    }

@app.post("/api/transcribe")
async def transcribe(audio: UploadFile = File(...)):
    """Main transcription endpoint"""
    if not model:
        raise HTTPException(status_code=500, detail="Whisper model not loaded")

    with tempfile.TemporaryDirectory() as temp_dir:
        try:
            # Save uploaded audio file
            input_path = os.path.join(temp_dir, "input.webm")
            with open(input_path, "wb") as f:
                content = await audio.read()
                f.write(content)

            print(f"📥 Received audio: {len(content)} bytes")

            # Convert to MP3
            mp3_path = os.path.join(temp_dir, "audio.mp3")
            if not convert_to_mp3(input_path, mp3_path):
                raise HTTPException(status_code=400, detail="Audio conversion to MP3 failed")

            print("🔄 Converted to MP3 successfully")

            # Transcribe the audio
            result = transcribe_audio(mp3_path)

            print(f"✅ Transcription complete: {result['transcription'][:50]}...")

            return JSONResponse(content={"success": True, **result})

        except Exception as e:
            print(f"❌ Transcription error: {e}")
            raise HTTPException(status_code=500, detail=str(e))

print("✅ FastAPI app created with WHITE BACKGROUND + AMAZING ANIMATIONS!")
print("📡 API endpoints ready:")
print("   • GET  /        → Web interface")
print("   • GET  /health  → Health check")
print("   • POST /api/transcribe → Audio transcription")

✅ FastAPI app created with WHITE BACKGROUND + AMAZING ANIMATIONS!
📡 API endpoints ready:
   • GET  /        → Web interface
   • GET  /health  → Health check
   • POST /api/transcribe → Audio transcription


In [18]:
# Cell 7: Start the server with public access
def run_server():
    """Run the FastAPI server"""
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="warning")

def start_app():
    """Start the complete application"""
    print("🚀 Starting Speech-to-Text application...")

    # Start server in background thread
    server_thread = threading.Thread(target=run_server, daemon=True)
    server_thread.start()

    # Wait for server to start
    time.sleep(4)
    print("✅ Server started successfully!")

    # Try to create public URL with ngrok
    try:
        public_url = ngrok.connect(8000)
        print(f"\n🌐 PUBLIC URL: {public_url}")

        # Display beautiful link in Colab
        display(HTML(f'''
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    color: white; padding: 30px; border-radius: 15px; text-align: center;
                    box-shadow: 0 10px 30px rgba(0,0,0,0.3); margin: 20px 0;">
            <h2 style="margin-bottom: 15px;">🎙️ Your Speech-to-Text App is Live!</h2>
            <p style="font-size: 22px; margin: 20px 0;">
                <a href="{public_url}" target="_blank"
                   style="color: #FFD700; text-decoration: none; font-weight: bold;
                          background: rgba(255,215,0,0.2); padding: 10px 20px; border-radius: 25px;
                          border: 2px solid #FFD700;">
                   🌐 CLICK HERE TO OPEN YOUR APP
                </a>
            </p>
            <p style="font-size: 16px; opacity: 0.9;">
                ✨ Supports Arabic & English • 🤖 Whisper AI • 🚀 Google Colab
            </p>
            <p style="font-size: 14px; margin-top: 15px; opacity: 0.8;">
                📱 Works on phone/tablet • 🔗 Share with others • 🎯 Real-time transcription
            </p>
        </div>
        '''))

        return public_url

    except Exception as e:
        print(f"\n⚠️ ngrok failed: {e}")
        print("📱 App running locally in Colab only")

        display(HTML(f'''
        <div style="background: #e67e22; color: white; padding: 25px; border-radius: 10px; text-align: center;">
            <h3>🏠 Local App Running</h3>
            <p style="font-size: 16px; margin: 10px 0;">
                Your app is running at: <strong>http://localhost:8000</strong>
            </p>
            <p>⚠️ Only accessible within this Colab session</p>
            <p>💡 For public access, add your ngrok token in Cell 3</p>
        </div>
        '''))

        return "http://localhost:8000"

# Start the application
app_url = start_app()

print(f"\n📋 How to Test:")
print(f"1. 🌐 Click the link above to open your app")
print(f"2. 🎤 Allow microphone access when prompted")
print(f"3. 🔴 Click 'Record' and speak clearly")
print(f"4. ⏹️ Click 'Record' again to stop")
print(f"5. 📤 Click 'Send' to get your transcription")
print(f"\n🧪 Test Examples:")
print(f"   English: 'Hello, this is a test'")
print(f"   Arabic: 'مرحبا، هذا اختبار'")
print(f"   Mixed: 'Hello مرحبا world'")
print(f"\n🔄 Keep this cell running to maintain the server!")

🚀 Starting Speech-to-Text application...


ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use


✅ Server started successfully!

🌐 PUBLIC URL: NgrokTunnel: "https://3001f1efc36e.ngrok-free.app" -> "http://localhost:8000"



📋 How to Test:
1. 🌐 Click the link above to open your app
2. 🎤 Allow microphone access when prompted
3. 🔴 Click 'Record' and speak clearly
4. ⏹️ Click 'Record' again to stop
5. 📤 Click 'Send' to get your transcription

🧪 Test Examples:
   English: 'Hello, this is a test'
   Arabic: 'مرحبا، هذا اختبار'
   Mixed: 'Hello مرحبا world'

🔄 Keep this cell running to maintain the server!
