<a href="https://colab.research.google.com/github/Prianka-Mukhopadhyay/QA-Chatbot-HuggingFace/blob/main/TinyLlama_DialoGPT_BlenderBot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Hugging Face libraries
!pip install -q transformers accelerate

# Import required libraries
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load TinyLlama (very lightweight, good for CPU)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu",   # Force CPU
)



## Using TinyLlama

In [None]:
# Create a chatbot pipeline
chatbot = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    #device=-1   # CPU
)

# Function to chat with the model
def ask_bot(prompt, max_new_tokens=200):
    response = chatbot(
        prompt,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )
    return response[0]['generated_text']

# Test the chatbot
context = "Hugging Face is a company that develops tools for natural language processing."
question = "What does Hugging Face specialize in?"

prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
print(ask_bot(prompt))


In [None]:
tests = [
    {
        "context": "The Eiffel Tower is located in Paris, France.",
        "question": "Where is the Eiffel Tower located?"
    },
    {
        "context": "Python is a programming language often used for machine learning.",
        "question": "What is Python commonly used for?"
    },
    {
        "context": "Cristiano Ronaldo is a professional football player from Portugal.",
        "question": "What sport does Cristiano Ronaldo play?"
    }
]

for t in tests:
    prompt = f"Context: {t['context']}\n\nQuestion: {t['question']}\nAnswer:"
    print("Q:", t['question'])
    print("A:", ask_bot(prompt))
    print("-" * 50)


Using DialoGPT-small

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load DialoGPT-small
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")

# Start chatting
chat_history_ids = None
print("Chatbot is ready! Type 'quit' to stop.\n")

for step in range(5):  # 5 turns of dialogue
    user_input = input("You: ")
    if user_input.lower() == "quit":
        break

    # Encode user input & append to chat history
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')
    bot_input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1) if chat_history_ids is not None else new_input_ids

    # Generate response
    chat_history_ids = model.generate(
        bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id
    )

    # Decode & print
    bot_reply = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
    print(f"Bot: {bot_reply}\n")


##** BlenderBot Small**

In [None]:
!pip install transformers gradio -q

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr

MODEL_ID = "facebook/blenderbot_small-90M"  # light, dialogue-tuned

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)

In [None]:
def chat_fn(message, history):
    # Build a short context from the last few turns to keep RAM low
    past = "\n".join([f"User: {u}\nBot: {b}" for u,b in history[-3:]])  # last 3 turns
    prompt = (past + "\n" if past else "") + f"User: {message}\nBot:"
    inputs = tokenizer(
        [prompt],
        return_tensors="pt",
        truncation=True,
        max_length=256,          # keep input small on CPU
        padding=False,
    )
    output_ids = model.generate(
        **inputs,
        max_new_tokens=120,      # cap output length
        do_sample=True,
        top_p=0.9,
        temperature=0.8,
        no_repeat_ngram_size=3,
        pad_token_id=tokenizer.eos_token_id,
    )
    reply = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
    # If the model echoed the prompt, grab only the part after "Bot:"
    reply = reply.split("Bot:")[-1].strip() if "Bot:" in reply else reply.strip()
    return reply

demo = gr.ChatInterface(
    fn=chat_fn,
    title="BlenderBot Small (CPU)",
    description="Lightweight conversational bot that runs on CPU.",
    examples=["hey", "what are you doing?", "tell me a joke"],
)
demo.launch()

In [None]:
# Chat function
def chat(history, user_input):
    inputs = tokenizer(user_input, return_tensors="pt")
    reply_ids = model.generate(**inputs)
    reply = tokenizer.decode(reply_ids[0], skip_special_tokens=True)

    # Append to chat history
    history = history + [(user_input, reply)]
    return history, history

# Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    state = gr.State([])  # keeps track of conversation history
    msg = gr.Textbox(label="Type your message here...")

    def respond(user_message, history):
        history, updated_history = chat(history, user_message)
        return "", updated_history

    msg.submit(respond, [msg, state], [msg, chatbot])

demo.launch(debug='True')
