In [1]:
pip install gradio openai-whisper requests

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pyd

In [2]:

import whisper
import requests
import gradio as gr
import os

# 🔑 Gemini API Key
GEMINI_API_KEY = "AIzaSyC1dt3fLxyo7OpktLhcIc30OUi4wT0U6Pc"

# Gemini API URL (for gemini-2.0-flash)
GEMINI_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"

# Load Whisper Model
whisper_model = whisper.load_model("base")

# 🔁 Function to transcribe audio
def transcribe_audio(audio_path):
    try:
        result = whisper_model.transcribe(audio_path)
        return result["text"]
    except Exception as e:
        return f"❌ Whisper Error: {str(e)}"

# 🧠 Ask Gemini a question
def ask_gemini(question, context):
    headers = {"Content-Type": "application/json"}
    data = {
        "contents": [
            {"role": "user", "parts": [{"text": f"Context:\n{context}\n\nQuestion: {question}"}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, json=data)
    if response.status_code == 200:
        try:
            return response.json()["candidates"][0]["content"]["parts"][0]["text"]
        except:
            return "⚠️ Error parsing Gemini response"
    else:
        return f"❌ API Error: {response.status_code} - {response.text}"

# 🎯 Main Function
def audio_chat(audio_path, question):
    if not audio_path:
        return "❌ Please upload an audio file."

    # Step 1: Transcribe the audio
    transcript = transcribe_audio(audio_path)

    if "❌" in transcript:
        return transcript

    # Step 2: Ask Gemini with the transcript as context
    answer = ask_gemini(question, transcript)

    return f"📜 Transcription:\n{transcript}\n\n💬 Answer:\n{answer}"

# 🎛️ Gradio UI
gr.Interface(
    fn=audio_chat,
    inputs=[
        gr.Audio(label="Upload Audio", type="filepath"),
        gr.Textbox(label="Ask a Question About the Audio", placeholder="e.g., What is the main topic?", lines=2)
    ],
    outputs=gr.Textbox(label="Answer", lines=12),
    title="🔊 Audio Q&A Chatbot",
    description="Upload an audio file and ask questions about its content using Whisper for transcription and Gemini for answers."
).launch()


100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 137MiB/s]


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://44dd492f73c626fba2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


