<a href="https://colab.research.google.com/github/RedHerring10/Project1/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install --upgrade transformers accelerate torch gradio sentencepiece safetensors

In [None]:
# 1) Imports & Model Choice
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch, os, time
import gradio as gr

# Pick a SMALL instruct/chat model (change this string if you like):
# Good CPU-friendly picks:
#   - "Qwen/Qwen2.5-0.5B-Instruct"       (tiny, supports chat templates)
#   - "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MODEL_ID = "your model here"

# Device setup (GPU if available)
if torch.cuda.is_available():
    device = "cuda"
    dtype = torch.float16
elif torch.backends.mps.is_available():
    device = "mps"
    dtype = torch.float16
else:
    device = "cpu"
    dtype = torch.float32

tokenizer = "-"
model = "_"
if device == "cpu":
    model = model.to(device)

print(f"Loaded {MODEL_ID} on {device}.")

In [None]:
AX_HISTORY = 6        # keep last N turns
MAX_NEW_TOKENS = 256   # generation length
TEMPERATURE = 0.7
TOP_P = 0.95

def generate_reply(history):
    """history is a list of [user, assistant] pairs from Gradio Chatbot."""
    # Convert to messages format expected by chat templates
    messages = []
    for user_msg, bot_msg in history[-MAX_HISTORY:]:
        messages.append({"role": "user", "content": user_msg})
        if bot_msg is not None:
            messages.append({"role": "assistant", "content": bot_msg})
    # Add the latest user message (Gradio will pass it in the last pair)
    if not messages or messages[-1]["role"] != "user" and history:
        last_user = history[-1][0]
        messages.append({"role": "user", "content": last_user})


    # Build model input via chat template if available
    try:
        input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt",
        )
    except Exception:
        # Fallback: simple concatenation if template missing
        prompt = "\n".join([f"User: {m['content']}" if m["role"]=="user" else f"Assistant: {m['content']}" for m in messages])
        prompt += "\nAssistant:"
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids

    input_ids = input_ids.to(model.device)


    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            max_new_tokens=MAX_NEW_TOKENS,
            do_sample=True,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
        )

    # Slice off the prompt, decode only the new tokens if possible
    gen_ids = outputs[0][input_ids.shape[-1]:]
    text = tokenizer.decode(gen_ids, skip_special_tokens=True)

    return text.strip()

In [None]:
# 3) Wire up a tiny Gradio UI (one component, one function)
with gr.Blocks() as demo:
    gr.Markdown("""## 🤖 Tiny Chatbot\nType below. The model runs locally on your machine.""")
    chat = gr.Chatbot(height=400)
    msg = gr.Textbox(placeholder="Ask me anything...", label="Your message")
    clear = gr.Button("Clear chat")

    def user_submit(user_message, history):
        if not history:
            history = []
        history.append([user_message, None])
        return "", history

    def bot_reply(history):
        reply = generate_reply(history)
        history[-1][1] = reply
        return history

    msg.submit(user_submit, [msg, chat], [msg, chat], queue=False).then(
        bot_reply, inputs=[chat], outputs=[chat]
    )
    clear.click(lambda: [], outputs=[chat], queue=False)

demo.launch()