# Re-ranking Retrieval RAG


## Overview
This notebook demonstrates how to implement a Retrieval-Augmented Generation (RAG) system with reranking capabilities using LangChain. The system enhances the quality of retrieved documents through a two-stage process: initial retrieval followed by reranking.

### Table of Contents

##### 1.Document Processing
The system processes documents through multiple stages:
- Document loading and text splitting
- Embedding generation using HuggingFace sentence transformers
- Vector store creation with FAISS for efficient similarity search

##### 2. Reranking System
Implements a sophisticated retrieval process:
- Initial retrieval using FAISS vector similarity
- Document reranking using Cohere's rerank model
- Top-K filtering for most relevant context selection

##### 3. Response Generation
Leverages modern language models for response generation:
- Uses GPT-4 for text generation
- Implements contextual prompt templates
- Processes retrieved context for coherent responses


### Enviornment

`(1) Packages`

In [1]:
pip install langchain langchain-community faiss-cpu sentence-transformers cohere huggingface-hub

### Document Processing

In [20]:
import os
from typing import List, Dict, Any
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.retrievers import MultiQueryRetriever
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [21]:

class DocumentProcessor:
    def __init__(self, chunk_size: int = 500, chunk_overlap: int = 50):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        )
    
    def load_and_split(self, file_path: str) -> List[Document]:
        """Load and split documents"""
        loader = PyPDFLoader(file_path)
        documents = loader.load()
        split_docs = self.text_splitter.split_documents(documents)
        return split_docs

In [11]:
class CustomReranker:
    def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        
    def compute_relevance_scores(self, query: str, passages: List[str]) -> List[float]:
        """Compute relevance scores between query and passages"""
        pairs = [[query, passage] for passage in passages]
        features = self.tokenizer(
            pairs,
            padding=True,
            truncation=True,
            return_tensors="pt",
            max_length=512
        )
        
        with torch.no_grad():
            scores = self.model(**features).logits.squeeze(-1)
        
        return scores.tolist()

### Load Embedding and Chat Model

In [None]:
from llm_call import *
chat_llm=get_llm()
embedding_model=get_embedding_model()

### Enhanced Retriever with Reranking

In [18]:
class EnhancedRetriever:
    def __init__(self):
        self.embeddings = embedding_model
        self.vectorstore = None
        self.reranker = CustomReranker()
        
    def index_documents(self, documents: List[Document]):
        """Create FAISS index from documents"""
        self.vectorstore = FAISS.from_documents(documents, self.embeddings)
    
    def retrieve_and_rerank(self, query: str, k: int = 5, rerank_k: int = 3) :
        """Retrieve documents and rerank them"""
        # Initial retrieval
        docs = self.vectorstore.similarity_search(query, k=k)
        
        # Extract passages for reranking
        passages = [doc.page_content for doc in docs]
        
        # Compute relevance scores
        relevance_scores = self.reranker.compute_relevance_scores(query, passages)
        
        # Sort documents by relevance score
        scored_docs = list(zip(docs, relevance_scores))
        scored_docs.sort(key=lambda x: x[1], reverse=True)
        
        # Return top rerank_k documents
        return [doc for doc, _ in scored_docs[:rerank_k]]


### Query Generator

In [14]:

def setup_query_generator():
    llm = chat_llm
    
    prompt_template = """Generate three different versions of the given question to retrieve relevant documents. 
    Make the questions diverse while preserving the original meaning.
    
    Original Question: {query}
    
    Generated Questions:
    1."""
    
    prompt = PromptTemplate(
        input_variables=["query"],
        template=prompt_template
    )
    
    return LLMChain(llm=llm, prompt=prompt)

## Complete RAG Pipeline with Re-ranking

In [16]:
class RAGPipelineWithReranking:
    def __init__(self):
        self.document_processor = DocumentProcessor()
        self.retriever = EnhancedRetriever()
        self.query_generator = setup_query_generator()
        
    def index_documents(self, file_path: str):
        """Process and index documents"""
        documents = self.document_processor.load_and_split(file_path)
        self.retriever.index_documents(documents)
        
    def process_query(self, query: str, k: int = 5, rerank_k: int = 3):
        """Process query with re-ranking"""
        # Generate multiple queries
        query_variations = self.query_generator.run(query).split("\n")
        
        # Get results for each query variation
        all_docs = []
        for q in query_variations:
            if q.strip():
                docs = self.retriever.retrieve_and_rerank(q, k=k, rerank_k=rerank_k)
                all_docs.extend(docs)
        
        # Remove duplicates and get final top documents
        seen = set()
        unique_docs = []
        for doc in all_docs:
            if doc.page_content not in seen:
                seen.add(doc.page_content)
                unique_docs.append(doc)
        
        return {
            "original_query": query,
            "query_variations": query_variations,
            "retrieved_documents": unique_docs[:rerank_k]
        }


In [2]:
# Initialize pipeline
pipeline = RAGPipelineWithReranking()

file_path="./papers/2005.14165v4.pdf"
# Index documents
pipeline.index_documents(file_path)

# Example query
query = "What is the difference between GPT2 and GPT3?"

# Process query
result = pipeline.process_query(query)

# Print results
print(f"Original Query: {result['original_query']}\n")
print("Query Variations:")
for q in result['query_variations']:
        if q.strip():
                print(f"- {q.strip()}")
print("\nRetrieved Documents:")
for i, doc in enumerate(result['retrieved_documents'], 1):
        print(f"\n{i}. {doc.page_content}")
