# RAG (Retrieval-Augmented Generation) based QA system

In [16]:
import numpy as np
from rank_bm25 import BM25Okapi
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import torch

# Load sample dataset
dataset = load_dataset("wikipedia", "20220301.simple", split="train[:10000]")  # 1000 articles
docs = dataset["text"][:5000]  # Taking 500 docs for efficiency

# Embed using Sentence Transformer
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
doc_embeddings = embedder.encode(docs, convert_to_numpy=True)

# Tokenize documents for BM25
tokenized_docs = [doc.split() for doc in docs]
bm25 = BM25Okapi(tokenized_docs)

def retrieve_documents(query, k=3):
    tokenized_query = query.split()
    scores = bm25.get_scores(tokenized_query)
    top_indices = np.argsort(scores)[::-1][:k]
    return [docs[i] for i in top_indices]

# Load HuggingFace Model for Generation
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

def generate_answer(question):
    retrieved_docs = retrieve_documents(question)
    if not retrieved_docs:
        return "No relevant documents found."

    context = " ".join(retrieved_docs)  # Combine retrieved documents
    input_text = f"Context: {context} Question: {question}"

    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        output = model.generate(**inputs, max_length=100, num_return_sequences=1, do_sample=True, temperature=0.7)

    return tokenizer.decode(output[0], skip_special_tokens=True)

# Example Usage



KeyboardInterrupt: 

In [15]:
if __name__ == "__main__":
    question = input("Enter your question: ")
    answer = generate_answer(question)
    print("Answer:", answer)

Enter your question: What is the capital of France?
Answer: Nova Scotia is a small province on the east coast of Canada. The capital and largest city is Halifax. There are over 900,000 people who live in Nova Scotia, called Nova Scotians. The name 'Nova' is Latin for 'New Scotland' and means 'love of wisdom'
