In [9]:
# Install required libraries
!pip install PyPDF2
!pip install sentence-transformers
!pip install transformers
!pip install torch
!pip install faiss-cpu

# Import libraries
import PyPDF2
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
from transformers import pipeline
import torch



In [10]:
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

# Function to chunk text into smaller pieces
def chunk_text(text, chunk_size=500):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size):
        chunk = " ".join(words[i:i + chunk_size])
        chunks.append(chunk)
    return chunks

# Load and preprocess the papers (replace with your PDF file paths)
pdf_paths = ["/1706.03762v7.pdf", "/2005.11401v4.pdf", "/1706.03762v7.pdf"]  # Add your file paths here
all_chunks = []
chunk_sources = []  # To track which chunk came from which paper

for idx, pdf_path in enumerate(pdf_paths):
    text = extract_text_from_pdf(pdf_path)
    chunks = chunk_text(text)
    all_chunks.extend(chunks)
    chunk_sources.extend([f"Paper {idx+1}"] * len(chunks))

# Load the embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Convert chunks to embeddings
embeddings = embedder.encode(all_chunks, convert_to_numpy=True)

# Create a FAISS index for retrieval
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

In [11]:
# Function to retrieve relevant chunks
def retrieve_chunks(query, top_k=3):
    query_embedding = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(query_embedding, top_k)
    relevant_chunks = [all_chunks[idx] for idx in indices[0]]
    sources = [chunk_sources[idx] for idx in indices[0]]
    return relevant_chunks, sources

In [12]:
# Load the LLM for answer generation
qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# Function to generate an answer
def generate_answer(query, chunks, sources):
    # Combine the chunks into a single context
    context = "\n".join([f"[{source}] {chunk}" for chunk, source in zip(chunks, sources)])

    # Use the QA model to generate an answer
    result = qa_model(question=query, context=context)
    answer = result['answer']

    # Identify the source of the answer
    answer_source = None
    for chunk, source in zip(chunks, sources):
        if answer in chunk:
            answer_source = source
            break

    return answer, answer_source, context

# Function to run the full RAG pipeline
def rag_pipeline(query):
    chunks, sources = retrieve_chunks(query)
    answer, source, context = generate_answer(query, chunks, sources)
    return answer, source, context

Device set to use cpu


In [13]:
# Sample questions
questions = [
    "What are the main components of a RAG model, and how do they interact?",
    "What are the two sub-layers in each encoder layer of the Transformer model?",
    "Explain how positional encoding is implemented in Transformers and why it is necessary.",
    "Describe the concept of multi-head attention in the Transformer architecture. Why is it beneficial?",
    "What is few-shot learning, and how does GPT-3 implement it during inference?"
]

# Test the RAG system
for question in questions:
    print(f"\nQuestion: {question}")
    answer, source, context = rag_pipeline(question)
    print(f"Answer: {answer}")
    print(f"Source: {source}")
    print(f"Context:\n{context}\n")


Question: What are the main components of a RAG model, and how do they interact?
Answer: language modeling
Source: Paper 2
Context:
[Paper 2] retriever p(zjx)with parameters that returns (top-K truncated) distributions over text passages given a query xand (ii) a generator p(yijx;z;y 1:i 1)parametrized 1Code to run experiments with RAG has been open-sourced as part of the HuggingFace Transform- ers Library [ 66] and can be found at https://github.com/huggingface/transformers/blob/master/ examples/rag/ . An interactive demo of RAG models can be found at https://huggingface.co/rag/ 2 bythat generates a current token based on a context of the previous i 1tokensy1:i 1, the original inputxand a retrieved passage z. To train the retriever and generator end-to-end, we treat the retrieved document as a latent variable. We propose two models that marginalize over the latent documents in different ways to produce a distribution over generated text. In one approach, RAG-Sequence , the model 