#Reasoning Enhanced RAG System

A chatbot that combines retrieval-augmented generation with chain-of-thought reasoning

###Installation and Dependencies

In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


In [None]:
!pip install docx2txt

Collecting docx2txt
  Downloading docx2txt-0.9-py3-none-any.whl.metadata (529 bytes)
Downloading docx2txt-0.9-py3-none-any.whl (4.0 kB)
Installing collected packages: docx2txt
Successfully installed docx2txt-0.9


In [None]:
import os
import numpy as np
from typing import List, Dict, Any, Optional, Tuple
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM
import faiss
import json
import logging
from tqdm import tqdm
import glob  # For finding files matching a pattern

In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

###DocumentStore: Manages document embeddings and retrieval

Uses SentenceTransformer for embedding documents

Implements FAISS for efficient vector similarity search

In [None]:
class DocumentStore:
    """Store and retrieve documents with vector embeddings"""

    def __init__(self, embedding_model_name: str = "all-MiniLM-L6-v2"):
        """Initialize the document store with an embedding model"""
        self.embedding_model = SentenceTransformer(embedding_model_name)
        self.documents = []
        self.document_embeddings = None
        self.index = None

    def add_documents(self, documents: List[Dict[str, Any]]):
        """Add documents to the store and update index"""
        self.documents.extend(documents)

        # Extract text for embedding
        texts = [doc["text"] for doc in documents]

        # Generate embeddings
        new_embeddings = self.embedding_model.encode(texts, show_progress_bar=True)

        if self.document_embeddings is None:
            self.document_embeddings = new_embeddings
        else:
            self.document_embeddings = np.vstack([self.document_embeddings, new_embeddings])

        # Build or update FAISS index
        self._build_index()

    def _build_index(self):
        """Build FAISS index for fast similarity search"""
        vector_dimension = self.document_embeddings.shape[1]      #second dimension which represents the length of each embedding vector (384 for used model)
        self.index = faiss.IndexFlatL2(vector_dimension)          #creates a new index using L2 (Euclidean) distance
        self.index.add(self.document_embeddings)

    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        """Search for documents similar to the query"""
        query_embedding = self.embedding_model.encode([query])

        # Search the index
        distances, indices = self.index.search(query_embedding, top_k)

        # Return the top k documents
        results = []
        for i, idx in enumerate(indices[0]):
            if idx < len(self.documents):
                doc = self.documents[idx].copy()
                doc["score"] = float(distances[0][i])
                results.append(doc)

        return results

###ReasoningModule: Generates chain-of-thought reasoning

Uses a language model to analyze context

Produces step-by-step reasoning about retrieved documents

####Reasoning Module with flan-t5-base

In [None]:
class ReasoningModule:
    """Module for generating chain-of-thought reasoning"""

    def __init__(self, model_name: str = "google/flan-t5-base"):
        """Initialize with a reasoning model"""
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    def generate_reasoning(self, query: str, context: List[Dict[str, Any]]) -> str:
        """Generate reasoning steps for a query given context"""
        # Prepare reasoning prompt
        prompt = self._create_reasoning_prompt(query, context)

        # Generate reasoning
        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model.generate(
            **inputs,
            max_length=512,
            num_beams=3,
            early_stopping=True
        )

        reasoning = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return reasoning

    def _create_reasoning_prompt(self, query: str, context: List[Dict[str, Any]]) -> str:
        """Create a prompt for the reasoning model"""
        context_str = "\n\n".join([f"Document {i+1}: {doc['text']}" for i, doc in enumerate(context)])

        prompt = f"""
Given the following context information and question, reason step by step to find the answer.

Context:
{context_str}

Question: {query}

Let's think about this step by step:
"""
        return prompt



###RAGReasoner: Orchestrates the entire pipeline

Initial document retrieval

Reasoning generation

Query refinement based on reasoning

Final document retrieval with refined query

Answer generation

#### RAGReasoner with Flan-T5-Base

In [None]:
class RAGReasoner:
    """Main class that combines retrieval and reasoning"""

    def __init__(
        self,
        document_store: DocumentStore,
        reasoning_module: ReasoningModule,
        model_name: str = "google/flan-t5-base",
        retrieval_k: int = 5
    ):
        """Initialize with components and parameters"""
        self.document_store = document_store
        self.reasoning_module = reasoning_module
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        self.retrieval_k = retrieval_k                                    #number of documents to retrieve during the search process

    def process_query(self, query: str) -> Dict[str, Any]:
        """Process a query through the entire pipeline"""
        # 1. Initial document retrieval
        initial_docs = self.document_store.search(query, self.retrieval_k)

        # 2. Generate reasoning based on retrieved documents
        reasoning = self.reasoning_module.generate_reasoning(query, initial_docs)

        # 3. Use reasoning to refine the query
        refined_query = self._refine_query(query, reasoning)

        # 4. Retrieve documents again with the refined query
        refined_docs = self.document_store.search(refined_query, self.retrieval_k)

        # 5. Generate the final answer
        answer = self._generate_answer(query, reasoning, refined_docs)

        return {
            "query": query,
            "refined_query": refined_query,
            "reasoning": reasoning,
            "initial_docs": initial_docs,
            "refined_docs": refined_docs,
            "answer": answer
        }

    def _refine_query(self, original_query: str, reasoning: str) -> str:
        """Refine the query based on reasoning"""
        prompt = f"""
Original query: {original_query}

Reasoning process:
{reasoning}

Based on this reasoning, provide a refined and expanded search query that would better retrieve relevant information:
"""

        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model.generate(
            **inputs,
            max_length=128,
            num_beams=3,
            early_stopping=True
        )

        refined_query = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return refined_query

    def _generate_answer(self, query: str, reasoning: str, documents: List[Dict[str, Any]]) -> str:
        """Generate final answer based on query, reasoning and documents"""
        # Create context string from documents
        context_str = "\n\n".join([f"Document {i+1}: {doc['text']}" for i, doc in enumerate(documents)])

        prompt = f"""
Query: {query}

Reasoning process:
{reasoning}

Retrieved documents:
{context_str}

Based on the reasoning and documents, provide a comprehensive and accurate answer to the query:
"""

        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
        outputs = self.model.generate(
            **inputs,
            max_length=256,
            num_beams=5,
            early_stopping=True
        )

        answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return answer



###ReasoningRAGChatbot: Provides the chat interface

Tracks conversation history

Handles user inputs

Returns responses or detailed results

In [None]:
class ReasoningRAGChatbot:
    """Chatbot interface for the RAG Reasoning system"""

    def __init__(self, rag_reasoner: RAGReasoner):
        """Initialize with RAGReasoner"""
        self.rag_reasoner = rag_reasoner
        self.conversation_history = []

    def chat(self, user_input: str) -> str:
        """Process user input and generate a response"""
        # Add user input to conversation history
        self.conversation_history.append({"role": "user", "content": user_input})

        # Process the query
        result = self.rag_reasoner.process_query(user_input)

        # Format a response message
        response = f"Answer: {result['answer']}"

        # Add response to conversation history
        self.conversation_history.append({"role": "assistant", "content": response})

        return response

    def get_detailed_response(self, user_input: str) -> Dict[str, Any]:
        """Process user input and return detailed results including reasoning"""
        self.conversation_history.append({"role": "user", "content": user_input})

        result = self.rag_reasoner.process_query(user_input)

        response = f"Answer: {result['answer']}"
        self.conversation_history.append({"role": "assistant", "content": response})

        return result

    def clear_history(self):
        """Clear conversation history"""
        self.conversation_history = []



###Prepare Dataset

In [None]:
class Dataset:
    """Utility class for loading datasets"""

    @staticmethod
    def load_json(filepath: str) -> List[Dict[str, Any]]:
        """Load documents from a JSON file"""
        with open(filepath, 'r') as f:
            data = json.load(f)
        return data

    @staticmethod
    def create_documents_from_texts(texts: List[str], metadata: Optional[List[Dict[str, Any]]] = None) -> List[Dict[str, Any]]:
        """Create document objects from text strings"""
        documents = []

        for i, text in enumerate(texts):
            doc = {"id": i, "text": text}

            if metadata and i < len(metadata):
                doc.update(metadata[i])

            documents.append(doc)

        return documents


###Evaluation

In [None]:
class Evaluation:
    """Evaluation metrics for the RAG system"""

    @staticmethod
    def evaluate_retrieval(retrieved_docs: List[Dict[str, Any]], relevant_ids: List[int]) -> Dict[str, float]:
        """Evaluate retrieval performance"""
        retrieved_ids = [doc["id"] for doc in retrieved_docs]

        # Calculate precision
        if not retrieved_ids:
            precision = 0.0
        else:
            precision = len(set(retrieved_ids) & set(relevant_ids)) / len(retrieved_ids)

        # Calculate recall
        if not relevant_ids:
            recall = 1.0
        else:
            recall = len(set(retrieved_ids) & set(relevant_ids)) / len(relevant_ids)

        # Calculate F1
        if precision + recall == 0:
            f1 = 0.0
        else:
            f1 = 2 * (precision * recall) / (precision + recall)

        return {
            "precision": precision,
            "recall": recall,
            "f1": f1
        }


###Main function for Sample texts

In [None]:
# Main execution
def main():
    """Main function to demonstrate the system"""
    # Sample documents
    sample_texts = ["OpenAI's GPT-4 is a multimodal large language model capable of understanding both text and image inputs.",
                    "Retrieval-Augmented Generation (RAG) enhances chatbot performance by fetching relevant external information in real-time before generating a response.",
                    "Vector databases like FAISS, Pinecone, and Weaviate are commonly used for storing and retrieving document embeddings in RAG pipelines.",
                    "Fine-tuning or prompt engineering can help tailor the responses of LLMs to domain-specific use cases, such as legal, healthcare, or customer support.",
                    "Embedding models like text-embedding-ada-002 from OpenAI or all-MiniLM-L6-v2 from Sentence Transformers are popular for generating vector representations of text.",
                    "LangChain and LlamaIndex are popular frameworks for building RAG-based applications and connecting LLMs with knowledge sources.",
                    "Chunking strategies, such as splitting by sentence or paragraph, play a critical role in retrieval accuracy and context relevance.",
                    "RAG systems can outperform standard chatbots in enterprise environments by reducing hallucinations and increasing factual accuracy.",
                    "Prompt engineering involves crafting inputs to guide language models toward more accurate, useful, and reliable outputs.",
                    "Document pre-processing, metadata tagging, and semantic search are foundational components for building effective RAG systems."
                  ]

    # Create documents
    documents = Dataset.create_documents_from_texts(sample_texts)

    # Initialize components
    document_store = DocumentStore()
    document_store.add_documents(documents)

    reasoning_module = ReasoningModule()

    rag_reasoner = RAGReasoner(document_store, reasoning_module)

    chatbot = ReasoningRAGChatbot(rag_reasoner)

    # Demo query
    query = "How does RAG relate to NLP?"
    print(f"Query: {query}")

    result = chatbot.get_detailed_response(query)
    print("\nDetailed Response:")
    print(f"Reasoning: {result['reasoning']}")
    print(f"Refined Query: {result['refined_query']}")
    print(f"Answer: {result['answer']}")

    # Simple chat interface
    print("\n--- Simple Chat Interface ---")
    while True:
        user_input = input("\nYou: ")
        if user_input.lower() in ["exit", "quit"]:
            break

        response = chatbot.chat(user_input)
        print(f"Bot: {response}")


In [None]:
if __name__ == "__main__":
    main()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query: How does RAG relate to NLP?

Detailed Response:
Reasoning: Document pre-processing, metadata tagging, and semantic search are foundational components for building effective RAG systems. Document 3: Vector databases like FAISS, Pinecone, and Weaviate are commonly used for storing and retrieving document embeddings in RAG pipelines. Document 4: RAG systems can outperform standard chatbots in enterprise environments by reducing hallucinations and increasing factual accuracy. Document 5: Chunking strategies, such as splitting by sentence or paragraph, play a critical role in retrieval accuracy and context relevance. Therefore, the final answer is document pre-processing.
Refined Query: Document pre-processing, metadata tagging, and semantic search are foundational components for building effective RAG systems
Answer: document pre-processing

--- Simple Chat Interface ---

You: What is the role of RAG?
Bot: Answer: Enhances chatbot performance by fetching relevant external informatio