In [1]:
!pip install -q gradio transformers gTTS torch --upgrade

import os, time
import gradio as gr
from transformers import pipeline
from gtts import gTTS

# --- better free model ---
llm = pipeline(
    "text2text-generation",
    model="google/flan-t5-large"    # larger, more instruction tuned
)

# --- persona ---
PERSONA = """You are Riya, a youthful and witty 21-year-old personal assistant.
Be enthusiastic, kind, practical, and a little witty.
Answer directly to the user as "I" (Riya)."""

def build_prompt(user_message, history, persona, max_turns=3):
    # keep only last few turns to avoid long confusing context
    prompt = persona + "\n\n"
    recent = history[-max_turns:] if history else []
    for u, b in recent:
        prompt += f"User: {u}\nRiya: {b}\n"
    prompt += f"User: {user_message}\nRiya:"
    return prompt

def respond(user_message, history):
    prompt = build_prompt(user_message, history, PERSONA)
    raw = llm(
        prompt,
        max_new_tokens=120,        # shorter output
        temperature=0.7,           # less deterministic, but no looping
    )[0]["generated_text"]

    # Clean the reply
    reply = raw.split("User:")[0]   # cut if model starts a new "User:"
    reply = reply.replace("Riya:", "").strip()
    if not reply:
        reply = "Hmm, can you ask that differently?"

    # TTS audio
    audio_file = f"riya_{int(time.time())}.mp3"
    gTTS(reply).save(audio_file)
    return reply, audio_file

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("## Riya — your enthusiastic personal assistant (Text + Voice)")
    chat = gr.Chatbot()
    msg = gr.Textbox(placeholder="Ask Riya anything…")
    audio = gr.Audio(label="Riya's Voice", type="filepath")

    def on_submit(u, hist):
        if not u:
            return "", hist, None
        r, a = respond(u, hist)
        hist = hist + [(u, r)]
        return "", hist, a

    msg.submit(on_submit, [msg, chat], [msg, chat, audio])

demo.launch(share=True)


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.4/325.4 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu
  chat = gr.Chatbot()


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://34660f197818289865.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


