<a href="https://colab.research.google.com/github/IkramAlgo/RAG-Model/blob/main/RAG_Model_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

# ✅ Load a better model for more accurate answers
model_name = "google/flan-t5-xl"  # Use 'flan-t5-xl' instead of 'flan-t5-small' or 'large'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# ✅ Move model to CPU (or GPU if available)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# ✅ Function to Generate More Accurate Answers
def generate_response(question):
    # Improved prompt format for factual accuracy
    input_text = f"Provide a factual answer based on general knowledge.\nQuestion: {question}\nAnswer:"

    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=100,
            num_beams=5,  # Beam search for better accuracy
            early_stopping=True
        )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ✅ Interactive Chat
print("🤖 AI Chatbot (Type 'exit' to quit)")
while True:
    question = input("\nYou: ")  # User enters question

    if question.lower() == "exit":
        print("Goodbye! 👋")
        break

    response = generate_response(question)
    print("AI:", response)
