In [None]:
# Import necessary libraries
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceLLM
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset

# Load a small dataset for demonstration purposes
dataset = load_dataset('ag_news', split='test[:100]')

# Prepare documents
documents = [{"text": item['text']} for item in dataset]

# Initialize the embedding model
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# Compute document embeddings
document_texts = [doc['text'] for doc in documents]
document_embeddings = embeddings.embed_documents(document_texts)

# Create a FAISS index
index = FAISS.from_embeddings(document_embeddings, documents)

# Initialize the inference model (Mistral) using Hugging Face
generation_model_name = "mistralai/mistral-7B"
tokenizer = AutoTokenizer.from_pretrained(generation_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(generation_model_name)

# Define a simple generation function
def generate_answer(question, context):
    inputs = tokenizer(question + " " + context, return_tensors='pt', truncation=True)
    outputs = model.generate(inputs['input_ids'], max_length=150)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Create the RetrievalQA chain
qa = RetrievalQA(
    retriever=index,
    llm=generate_answer,
    return_source_documents=True
)

# Ask a question
question = "What is the news about?"

# Retrieve documents
retrieved_docs = qa.retriever.retrieve(question)
context = " ".join([doc['text'] for doc in retrieved_docs[:3]])

# Get the answer from the RAG model
answer = generate_answer(question, context)

# Print the result
print("Question:", question)
print("Answer:", answer)
print("Source Document:", context)
