In [None]:
pip install gradio openai-whisper requests
pip install -q google-generativeai gradio Pillow

import whisper
import requests
import gradio as gr
import os

# 🔑 Gemini API Key
load_dotenv()
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

# Gemini API URL (for gemini-2.0-flash)
GEMINI_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"

# Load Whisper Model
whisper_model = whisper.load_model("base")

# 🔁 Function to transcribe audio
def transcribe_audio(audio_path):
    try:
        result = whisper_model.transcribe(audio_path)
        return result["text"]
    except Exception as e:
        return f"❌ Whisper Error: {str(e)}"

# 🧠 Ask Gemini a question
def ask_gemini(question, context):
    headers = {"Content-Type": "application/json"}
    data = {
        "contents": [
            {"role": "user", "parts": [{"text": f"Context:\n{context}\n\nQuestion: {question}"}]}
        ]
    }
    response = requests.post(GEMINI_URL, headers=headers, json=data)
    if response.status_code == 200:
        try:
            return response.json()["candidates"][0]["content"]["parts"][0]["text"]
        except:
            return "⚠️ Error parsing Gemini response"
    else:
        return f"❌ API Error: {response.status_code} - {response.text}"

# 🎯 Main Function
def audio_chat(audio_path, question):
    if not audio_path:
        return "❌ Please upload an audio file."

    # Step 1: Transcribe the audio
    transcript = transcribe_audio(audio_path)

    if "❌" in transcript:
        return transcript

    # Step 2: Ask Gemini with the transcript as context
    answer = ask_gemini(question, transcript)

    return f"📜 Transcription:\n{transcript}\n\n💬 Answer:\n{answer}"

# 🎛️ Gradio UI
gr.Interface(
    fn=audio_chat,
    inputs=[
        gr.Audio(label="Upload Audio", type="filepath"),
        gr.Textbox(label="Ask a Question About the Audio", placeholder="e.g., What is the main topic?", lines=2)
    ],
    outputs=gr.Textbox(label="Answer", lines=12),
    title="🔊 Audio Q&A Chatbot",
    description="Upload an audio file and ask questions about its content using Whisper for transcription and Gemini for answers."
).launch()