In [19]:
# -*- coding: utf-8 -*-
"""Instant-Response Chatbot with Sub-Second Replies"""

!pip install gradio transformers --quiet
!pip install google-generativeai --quiet

import gradio as gr
from transformers import pipeline
import google.generativeai as genai
import os
import time
from typing import List, Tuple

# 1. LIGHTWEIGHT CONFIGURATION
# ============================
GOOGLE_API_KEY = os.getenv('GEMINI_KEY')  # Set this in your environment
MAX_RESPONSE_TOKENS = 120  # Shorter responses
TIMEOUT_SECONDS = 3  # Fail fast if response takes too long
CACHE_SIZE = 10  # Number of responses to cache

# 2. OPTIMIZED MODEL LOADING
# ==========================
print("⚡ Loading optimized models...")
start_time = time.time()

# Use smaller, faster emotion model
emotion_pipe = pipeline(
    "text-classification",
    model="finiteautomata/bertweet-base-sentiment-analysis",
    device=-1  # Force CPU for faster cold starts
)

# Configure Gemini for speed
genai.configure(api_key=GOOGLE_API_KEY)
generation_config = {
    "temperature": 0.8,
    "top_p": 1.0,
    "top_k": 1,  # Fewer options = faster
    "max_output_tokens": MAX_RESPONSE_TOKENS
}
model = genai.GenerativeModel(
    'gemini-pro',
    generation_config=generation_config
)

print(f"✅ Models loaded in {time.time()-start_time:.2f}s")

# 3. RESPONSE CACHING SYSTEM
# ==========================
response_cache = {}

def get_cached_response(prompt: str) -> str:
    """Check cache before generating new response"""
    if prompt in response_cache:
        return response_cache[prompt]
    return None

# 4. ULTRA-FAST RESPONSE GENERATION
# =================================
def generate_instant_response(user_input: str) -> str:
    """Optimized pipeline with multiple fallbacks"""
    start_time = time.time()

    # Check cache first
    cached = get_cached_response(user_input[:100])  # First 100 chars as key
    if cached:
        return cached

    # Fast path - simple response patterns
    lower_input = user_input.lower()
    if any(greeting in lower_input for greeting in ["hi", "hello", "hey"]):
        return "Hello! How can I help you today?"
    if "thank" in lower_input:
        return "You're welcome! Is there anything else?"

    # Medium path - try quick generation
    try:
        prompt = f"Brief response to: {user_input[:150]}"  # Truncate input
        response = model.generate_content(
            prompt,
            request_options={"timeout": TIMEOUT_SECONDS}
        )
        if response.text:
            # Cache successful responses
            response_cache[user_input[:100]] = response.text[:MAX_RESPONSE_TOKENS]
            if len(response_cache) > CACHE_SIZE:
                response_cache.pop(next(iter(response_cache)))
            return response.text[:MAX_RESPONSE_TOKENS]
    except:
        pass

    # Fallback path
    fallbacks = [
        "I appreciate your message. Could you tell me more?",
        "That's interesting. What else is on your mind?",
        "Thanks for sharing. How can I assist you further?"
    ]
    return fallbacks[len(user_input) % len(fallbacks)]

# 5. STREAMLINED CHAT INTERFACE
# =============================
def chat_fn(message: str, history: List[Tuple[str, str]]):
    """Ultra-fast chat function"""
    start_time = time.time()
    response = generate_instant_response(message)
    history.append((message, response))
    print(f"Generated response in {time.time()-start_time:.3f}s")
    return "", history

# 6. MINIMAL GRADIO UI
# ====================
with gr.Blocks() as app:
    gr.Markdown("## ⚡ Instant Chat")

    chatbot = gr.Chatbot(
        height=300,
        layout="compact",
        show_label=False
    )

    msg = gr.Textbox(
        placeholder="Type your message...",
        max_lines=2,
        container=False
    )

    clear_btn = gr.Button("Clear", size="sm")

    msg.submit(
        chat_fn,
        [msg, chatbot],
        [msg, chatbot]
    )

    clear_btn.click(
        lambda: None,
        None,
        chatbot,
        queue=False
    )

# 7. PERFORMANCE-OPTIMIZED LAUNCH
# ===============================
print("\n🚀 Launching ultra-fast chat interface...")
app.launch(
    share=True,
    max_threads=1,  # Single thread for stability
    # Removed enable_queue=True as it's not a valid argument
    show_error=True
)

⚡ Loading optimized models...


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Device set to use cpu
  chatbot = gr.Chatbot(


✅ Models loaded in 0.77s

🚀 Launching ultra-fast chat interface...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ac86f60e39dcc6ca56.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


