In [None]:
# First, let's install the correct whisper implementation
!pip install openai-whisper gradio torch torchaudio

# Alternative: if the above doesn't work, try this:
!pip install git+https://github.com/openai/whisper.git
!pip install gradio

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-r3vvq1yh
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-r3vvq1yh
  Resolved https://github.com/openai/whisper.git to commit c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
# Load the model
model = whisper.load_model("base")
print("✅ Whisper model loaded!")

def transcribe_audio(audio_file, model_size="base"):
    if audio_file is None:
        return "❌ Please upload or record an audio file first"

    try:
        # Use different model if selected
        if model_size != "base":
            current_model = whisper.load_model(model_size)
        else:
            current_model = model

        # Transcribe
        result = current_model.transcribe(audio_file, language='en')

        text = result['text'].strip()
        duration = result.get('duration', 0)

        output = f"""
🎯 **Transcription Result:**
{text}

---
📊 **Details:**
• Duration: {duration:.2f} seconds
• Model: Whisper {model_size}
• Device: {'GPU' if torch.cuda.is_available() else 'CPU'}
"""
        return output

    except Exception as e:
        return f"❌ Error: {str(e)}"

# Create the interface
with gr.Blocks() as demo:
    gr.Markdown("# 🎤 Whisper Speech Recognition")

    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(
                label="Upload or Record Audio",
                sources=["upload", "microphone"],
                type="filepath"
            )

            model_selector = gr.Dropdown(
                choices=["tiny", "base", "small", "medium", "large"],
                value="base",
                label="Model Size"
            )

            transcribe_btn = gr.Button("Transcribe", variant="primary")

        with gr.Column():
            output_text = gr.Textbox(
                label="Transcription Result",
                lines=10
            )

    transcribe_btn.click(
        fn=transcribe_audio,
        inputs=[audio_input, model_selector],
        outputs=output_text
    )

# Launch in Colab (no share)
print("🎯 Launching in Colab...")
demo.launch(share=False, debug=True)

✅ Whisper model loaded!
🎯 Launching in Colab...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

Keyboard interruption in main thread... closing server.


