## Data Ingestion

In [3]:
## Document structure

from langchain_core.documents import Document

In [4]:
doc = Document(
    page_content="this is the main text context I am using to create RAG",
    metadata = {
        "source": "example.txt",
        "pages":1,
        "author": "Diptiranjan Nayak",
        "date_created":"2025-01-01"
    }
)

doc

Document(metadata={'source': 'example.txt', 'pages': 1, 'author': 'Diptiranjan Nayak', 'date_created': '2025-01-01'}, page_content='this is the main text context I am using to create RAG')

In [5]:
## Creating a simple txt file

import os
os.makedirs("data/text_files", exist_ok=True)

In [6]:
sample_texts={
    "data/text_files/python_intro.txt":"""Python Programming Introduction

Python is a high-level, interpreted programming language known for its simplicity and readability.
Created by Guido van Rossum and first released in 1991, Python has become one of the most popular
programming languages in the world.

Key Features:
- Easy to learn and use
- Extensive standard library
- Cross-platform compatibility
- Strong community support

Python is widely used in web development, data science, artificial intelligence, and automation.""",
    
    "data/text_files/machine_learning.txt": """Machine Learning Basics

Machine learning is a subset of artificial intelligence that enables systems to learn and improve
from experience without being explicitly programmed. It focuses on developing computer programs
that can access data and use it to learn for themselves.

Types of Machine Learning:
1. Supervised Learning: Learning with labeled data
2. Unsupervised Learning: Finding patterns in unlabeled data
3. Reinforcement Learning: Learning through rewards and penalties

Applications include image recognition, speech processing, and recommendation systems
    
    
    """

}

for filepath,content in sample_texts.items():
    with open(filepath,'w',encoding="utf-8") as f:
        f.write(content)

print("‚úÖ Sample text files created!")

‚úÖ Sample text files created!


In [7]:
## TextLoader - to read the file using it

# from langchain.document_loaders import TextLoader
from langchain_community.document_loaders import TextLoader

loader = TextLoader("data/text_files/python_intro.txt", encoding = "utf-8")
document=loader.load()
print(document)

[Document(metadata={'source': 'data/text_files/python_intro.txt'}, page_content='Python Programming Introduction\n\nPython is a high-level, interpreted programming language known for its simplicity and readability.\nCreated by Guido van Rossum and first released in 1991, Python has become one of the most popular\nprogramming languages in the world.\n\nKey Features:\n- Easy to learn and use\n- Extensive standard library\n- Cross-platform compatibility\n- Strong community support\n\nPython is widely used in web development, data science, artificial intelligence, and automation.')]


In [8]:
## Directory Loader

from langchain_community.document_loaders import DirectoryLoader

# to load all the text files from the directory
dir_loader = DirectoryLoader(
    "data/text_files",
    glob = "**/*.txt",  # Pattern to match the files
    loader_cls = TextLoader,  # loader class to use
    loader_kwargs = {'encoding':'utf-8'},
    show_progress = False
)

documents = dir_loader.load()
documents

[Document(metadata={'source': 'data\\text_files\\machine_learning.txt'}, page_content='Machine Learning Basics\n\nMachine learning is a subset of artificial intelligence that enables systems to learn and improve\nfrom experience without being explicitly programmed. It focuses on developing computer programs\nthat can access data and use it to learn for themselves.\n\nTypes of Machine Learning:\n1. Supervised Learning: Learning with labeled data\n2. Unsupervised Learning: Finding patterns in unlabeled data\n3. Reinforcement Learning: Learning through rewards and penalties\n\nApplications include image recognition, speech processing, and recommendation systems\n\n\n    '),
 Document(metadata={'source': 'data\\text_files\\python_intro.txt'}, page_content='Python Programming Introduction\n\nPython is a high-level, interpreted programming language known for its simplicity and readability.\nCreated by Guido van Rossum and first released in 1991, Python has become one of the most popular\npro

In [9]:
from langchain_community.document_loaders import PyMuPDFLoader
# we can also use PyPDFLoader

# to load all the text files from the directory
dir_loader = DirectoryLoader(
    "data/pdf",
    glob = "**/*.pdf",  # Pattern to match the files
    loader_cls = PyMuPDFLoader,  # loader class to use
    show_progress = False
)

pdf_documents = dir_loader.load()
pdf_documents

[Document(metadata={'producer': 'Microsoft¬Æ Word 2016', 'creator': 'Microsoft¬Æ Word 2016', 'creationdate': '2025-12-26T10:34:43+05:30', 'source': 'data\\pdf\\ch-7_CN.pdf', 'file_path': 'data\\pdf\\ch-7_CN.pdf', 'total_pages': 15, 'format': 'PDF 1.5', 'title': '', 'author': 'ANIMA PANDA', 'subject': '', 'keywords': '', 'moddate': '2025-12-26T10:34:43+05:30', 'trapped': '', 'modDate': "D:20251226103443+05'30'", 'creationDate': "D:20251226103443+05'30'", 'page': 0}, page_content='Data Center Architecture and Cloud \nComputing \nChapter-7 \n \nWhen designing a network for an organization, whether it‚Äôs a small, medium, or large \nnetwork, it‚Äôs important to consider the following factors: \n\uf0b7 Fault tolerance and redundancy \n\uf0b7 Scalability \n\uf0b7 Security \n\uf0b7 QoS \nFault Tolerance \n\uf0b7 \nAbility of a network/device to continue functioning even if a component fails. \n\uf0b7 \nEnsures continuous service even during hardware failures like damaged routers, \nswitches, 

## Chunking

In [10]:
# Creating Data Chunks 

from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_documents(documents,chunk_size=1000,chunk_overlap=200):
    """
    Split documents into smaller chunks for better RAG performance.
    
    Parameters:
    - chunk_size: Maximum characters per chunk (adjust based on your LLM)
    - chunk_overlap: Characters to overlap between chunks (preserves context)
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, # Each chunk: ~1000 characters
        chunk_overlap=chunk_overlap, # 200 chars overlap for context
        length_function=len, # How to measure length
        separators=["\n\n", "\n", " ", ""] # Split hierarchy
    )
    # Actually split the documents
    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")
    
    # Show what a chunk looks like
    if split_docs:
        print(f"\nExample chunk:")
        print(f"Content: {split_docs[0].page_content[:200]}...")
        print(f"Metadata: {split_docs[0].metadata}")
    
    return split_docs

In [11]:
chunks = split_documents(pdf_documents)
chunks

Split 90 documents into 164 chunks

Example chunk:
Content: Data Center Architecture and Cloud 
Computing 
Chapter-7 
 
When designing a network for an organization, whether it‚Äôs a small, medium, or large 
network, it‚Äôs important to consider the following fact...
Metadata: {'producer': 'Microsoft¬Æ Word 2016', 'creator': 'Microsoft¬Æ Word 2016', 'creationdate': '2025-12-26T10:34:43+05:30', 'source': 'data\\pdf\\ch-7_CN.pdf', 'file_path': 'data\\pdf\\ch-7_CN.pdf', 'total_pages': 15, 'format': 'PDF 1.5', 'title': '', 'author': 'ANIMA PANDA', 'subject': '', 'keywords': '', 'moddate': '2025-12-26T10:34:43+05:30', 'trapped': '', 'modDate': "D:20251226103443+05'30'", 'creationDate': "D:20251226103443+05'30'", 'page': 0}


[Document(metadata={'producer': 'Microsoft¬Æ Word 2016', 'creator': 'Microsoft¬Æ Word 2016', 'creationdate': '2025-12-26T10:34:43+05:30', 'source': 'data\\pdf\\ch-7_CN.pdf', 'file_path': 'data\\pdf\\ch-7_CN.pdf', 'total_pages': 15, 'format': 'PDF 1.5', 'title': '', 'author': 'ANIMA PANDA', 'subject': '', 'keywords': '', 'moddate': '2025-12-26T10:34:43+05:30', 'trapped': '', 'modDate': "D:20251226103443+05'30'", 'creationDate': "D:20251226103443+05'30'", 'page': 0}, page_content='Data Center Architecture and Cloud \nComputing \nChapter-7 \n \nWhen designing a network for an organization, whether it‚Äôs a small, medium, or large \nnetwork, it‚Äôs important to consider the following factors: \n\uf0b7 Fault tolerance and redundancy \n\uf0b7 Scalability \n\uf0b7 Security \n\uf0b7 QoS \nFault Tolerance \n\uf0b7 \nAbility of a network/device to continue functioning even if a component fails. \n\uf0b7 \nEnsures continuous service even during hardware failures like damaged routers, \nswitches, 

## Embeddings and VectorStoreDB

In [12]:
%pip install langchain-huggingface sentence-transformers langchain-chroma

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: C:\Users\drnay\Documents\AI Engg\rag_pipeline_env\Scripts\python.exe -m pip install --upgrade pip


In [13]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
class EmbeddingManager:
    """Handles document embedding generation using SentenceTransformer"""
    
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        """
        Initialize the embedding manager
        
        Args:
            model_name: HuggingFace model name for sentence embeddings
        """
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        """Load the SentenceTransformer model"""
        try:
            print(f"Loading embedding model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading model {self.model_name}: {e}")
            raise

    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeddings for a list of texts
        
        Args:
            texts: List of text strings to embed
            
        Returns:
            numpy array of embeddings with shape (len(texts), embedding_dim)
        """
        if not self.model:
            raise ValueError("Model not loaded")
        
        print(f"Generating embeddings for {len(texts)} texts...")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"Generated embeddings with shape: {embeddings.shape}")
        return embeddings


## initialize the embedding manager

embedding_manager=EmbeddingManager()
embedding_manager

Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384


<__main__.EmbeddingManager at 0x192244cfa10>

## Vector Store

In [15]:
class VectorStore:
    """Messages document embeddings in a ChromaDB Vector Store"""

    def __init__(self, collection_name = "pdf_documents", persist_directory: str = "data/vector_store"):
        """
        Initialize the vector store

        Args:
            collection_name: Name of the chromaDB collection
            persist_directory: Directory to persist the vector store
        """

        self.collection_name = collection_name
        self.persist_directory  = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()


    def _initialize_store(self):
        """Initialize the ChromaDB client and collection"""

        try:
            # Create persitent ChromaDB client
            os.makedirs(self.persist_directory, exist_ok = True)
            self.client = chromadb.PersistentClient(path = self.persist_directory)

            # Get or create collection
            self.collection = self.client.get_or_create_collection(
                name = self.collection_name,
                metadata = {"description": "PDF Document embeddings for RAG"}
            )

            print(f"Vector store initialized. Collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")

        except Exception as e:
            print(f"Error initializing the vector store: {e}")
            raise
            
    def add_documents(self, documents: List[Any], embeddings: np.ndarray):
        """
        Add documents and their embeddings to the vector store

        Args:
            documents: List of LangChain documents
            embeddings: Corresponding embeddings for the documents
        """

        if(len(documents) != len(embeddings)):
            raise ValueError("Number of documents must match number of embeddings")

        print(f"Adding {len(documents)} documents to vector store...")

        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []

        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # Generate unique id
            doc_id  = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            # Perpare metadata
            metadata = dict(doc.metadata)
            metadata["doc_index"] = i
            metadata["content_length"] = len(doc.page_content)
            metadatas.append(metadata)

            # Document content
            documents_text.append(doc.page_content)

            # Embedding
            embeddings_list.append(embedding.tolist())

        # Add to collection
        try:
            self.collection.add(
                ids = ids,
                embeddings = embeddings_list,
                metadatas = metadatas,
                documents = documents_text
            )

            print(f"Successfully added {len(documents)} documents to vector store")
            print(f"Total documents in collection: {self.collection.count()}")

        except Exception as e:
            print(f"Error adding documents to the vector store: {e}")
            raise


vectorstore = VectorStore()
vectorstore

Vector store initialized. Collection: pdf_documents
Existing documents in collection: 492


<__main__.VectorStore at 0x19227395010>

## Extract the text and coverting them to Embeddings

In [16]:
 # Convert the text to embeddings
texts = [doc.page_content for doc in chunks]

# Genrate the embeddings
embeddings = embedding_manager.generate_embeddings(texts)

# Store in the vector db
vectorstore.add_documents(chunks, embeddings)

# This is the end of the ingestion pipeline, after this will be the start of retrieval pipeline

Generating embeddings for 164 texts...


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Generated embeddings with shape: (164, 384)
Adding 164 documents to vector store...
Successfully added 164 documents to vector store
Total documents in collection: 656


## Retriever Pipeline from Vector Store

In [17]:
class RAGRetriever:
    """Handles query based retrieval from the vector store"""
    
    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        """
        Initialize the retriever

        Args:
            vector_store = vector store containing document embeddings
            embedding_manager = Manager for generating query emebeddings
        """

        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
        """
        Retrieve relevant documents for the query

        Args:
            query: the search query 
            top_k: Number of top results to return 
            score_threshold: Minimum similarity score threshold

        returns:
            list of dictionaries containing retrieved documents and metadata
        """
        
        print(f"Retrieving documents for query {query}")
        print(f"Top k: {top_k}, Score threshold: {score_threshold}")

        query_embeddings = self.embedding_manager.generate_embeddings([query])[0]

        try:
            results = self.vector_store.collection.query(
                query_embeddings = [query_embeddings.tolist()],
                n_results = top_k
            )

            retrieved_docs = []

            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]
                
                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                    # Convert distance to similarity score (ChromaDB uses cosine distance)
                    similarity_score = 1 - distance
                    
                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                            'id': doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': i + 1
                        })
                
                print(f"Retrieved {len(retrieved_docs)} documents (after filtering)")
            else:
                print("No documents found")
            
            return retrieved_docs
            
        except Exception as e:
            print(f"Error during retrieval: {e}")
            return []

rag_retriever = RAGRetriever(vectorstore,embedding_manager)            

In [18]:
rag_retriever

<__main__.RAGRetriever at 0x1922768c6e0>

In [19]:
rag_retriever.retrieve("Main Responsibilities of IANA")

Retrieving documents for query Main Responsibilities of IANA
Top k: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 4 documents (after filtering)


[{'id': 'doc_d74681a6_34',
  'content': 'Understanding IPv4 and IPv6 \nAddressing \nChapter-4 \nWhat is IANA? \nThe Internet Assigned Numbers Authority (IANA) is a global organization responsible for \ncoordinating and managing key elements of the Internet‚Äôs addressing system. \nIANA operates under ICANN (Internet Corporation for Assigned Names and Numbers). \nMain Responsibilities of IANA \n1. IPv4 and IPv6 address allocation \no IANA manages the entire global pool of public IPv4 and IPv6 addresses. \no It allocates large address blocks to the world‚Äôs Regional Internet Registries \n(RIRs). \n2. Autonomous System Number (ASN) allocation \no ASNs are used by organizations that run routers participating in BGP. \no IANA assigns ASN blocks to RIRs. \n3. Protocol assignments \no Manages protocol numbers, port numbers, and other identifiers (e.g., \nTCP/UDP ports, ICMP types). \n4. DNS Root Zone Management \no Maintains the world‚Äôs root DNS servers. \no Ensures proper functioning of T

## Integration of VectorDB Context pipeline with LLM Output

In [20]:
pip install langchain

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: C:\Users\drnay\Documents\AI Engg\rag_pipeline_env\Scripts\python.exe -m pip install --upgrade pip


In [21]:
pip install -U langchain-groq

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: C:\Users\drnay\Documents\AI Engg\rag_pipeline_env\Scripts\python.exe -m pip install --upgrade pip


In [26]:
# Simple RAG pipeline with Groq LLM

# from langchain.chat_models import ChatOpenAI
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

load_dotenv()

# aimlapi_api_key = os.getenv("AIMLAPI_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

# llm = ChatOpenAI(
#     model="gpt-4o-mini",
#     api_key=aimlapi_api_key,
#     base_url="https://api.aimlapi.com/v1",
#     temperature=0.2,
#     max_tokens=1024,
# )

llm = ChatGroq(
    model_name="llama-3.1-8b-instant",
    api_key=groq_api_key,
    temperature=0.2,
    max_tokens=1024,
)

# Simple RAG function: retireve context + generate response
def rag_simple(query, retriever, llm, top_k=3):
    results = retriever.retrieve(query, top_k=top_k)

    # retrieve the context
    context = "\n\n".join([doc["content"] for doc in results]) if results else ""

    if not context:
        return "No relevant context found for the query"

    # generating response
    prompt = f"""
Use the following context to answer the query concisely.

Context:
{context}

Query: {query}

Answer:
"""

    response = llm.invoke(prompt)
    return response.content


In [27]:
answer = rag_simple("How SFTP Works?", rag_retriever, llm)
print(answer)

Retrieving documents for query How SFTP Works?
Top k: 3, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
SFTP (SSH File Transfer Protocol) works by establishing two connections: 

1. Control Connection (Port 22): This connection is used for authentication and commands.
2. Data Connection (Port 20): This connection is used to transfer files or directory listings.

Once both connections are set up, data can be transferred in both directions over the data connection. SFTP sends all data through an encrypted SSH tunnel, protecting it from hackers.


## Enhanced RAG Pipeline Features

In [33]:
# RAG Pipelines with advanced features

def rag_advanced(query, retriever, llm, top_k=5, min_score=0.2, return_context=False):
    """
    RAG Pipeline with extra features:
    - Returns answer, sources, confidence score, and optionally full context
    :param query:
    :param retriever:
    :param llm:
    :param top_k:
    :param min_score:
    :param return_context:
    :return:
    """

    results = retriever.retrieve(query, top_k=top_k, score_threshold = min_score)
    if not results:
        return {'answer': 'No relevant context found.', 'sources': [], 'confidence': 0.0, 'context': ''}

    # Prepare context and sources
    context  = "\n\n".join([doc['content'] for doc in results])
    sources = [{
        'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
        'page': doc['metadata'].get('page', 'unknown'),
        'score': doc['similarity_score'],
        'preview': doc['content'][:200] + '...'
    } for doc in results]

    confidence = max([doc['similarity_score'] for doc in results])

    prompt = f"""Use the following context to answer the query concisely.\nContext: \n{context}\n\nQuery: {query}\n\nAnswer:"""

    response = llm.invoke([prompt.format(context=context, query=query)])

    output = {
        'answer': response.content,
        'sources': sources,
        'confidence': confidence
    }

    if return_context:
        output['context'] = context

    return output

result = rag_advanced("What is a Hub?", rag_retriever, llm, top_k=3, min_score=0.3, return_context=True)
print("Answer:" ,result['answer'])
print("Sources:",result['sources'])
print("Confidence:",result['confidence'])
print("Context:",result['context'][:200])

Retrieving documents for query What is a Hub?
Top k: 3, Score threshold: 0.3
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Answer: A network hub is an old networking device that works at OSI Layer 1, acting like a repeater by sending incoming electrical signals to all other ports, not just the intended device.
Sources: [{'source': 'data\\pdf\\chapter-8_CN.pdf', 'page': 0, 'score': 0.49643439054489136, 'preview': 'Networking Devices \n \nNetworking devices are tools that connect different parts of a network and help users share \ninformation and resources. \nHUB \nA network hub is an old networking device that works...'}, {'source': 'data\\pdf\\chapter-8_CN.pdf', 'page': 0, 'score': 0.49643439054489136, 'preview': 'Networking Devices \n \nNetworking devices are tools that connect different parts of a network and help users share \ninformation and resources. \nHUB \nA network hub is an old networking device that works...'}, {'source': 'data\\pdf\\chapter-8_CN.pdf', 'page': 0, 'score': 0.49643439054489136, 'preview': 'Networking

In [35]:
# --- Advanced RAG Pipeline: Streaming, Citations, History, Summarization ---
from typing import List, Dict, Any
import time

class AdvancedRAGPipeline:
    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.history = []  # Store query history

    def query(self, question: str, top_k: int = 5, min_score: float = 0.2, stream: bool = False, summarize: bool = False) -> Dict[str, Any]:
        # Retrieve relevant documents
        results = self.retriever.retrieve(question, top_k=top_k, score_threshold=min_score)
        if not results:
            answer = "No relevant context found."
            sources = []
            context = ""
        else:
            context = "\n\n".join([doc['content'] for doc in results])
            sources = [{
                'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
                'page': doc['metadata'].get('page', 'unknown'),
                'score': doc['similarity_score'],
                'preview': doc['content'][:120] + '...'
            } for doc in results]
            # Streaming answer simulation
            prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
            if stream:
                print("Streaming answer:")
                for i in range(0, len(prompt), 80):
                    print(prompt[i:i+80], end='', flush=True)
                    time.sleep(0.05)
                print()
            response = self.llm.invoke([prompt.format(context=context, question=question)])
            answer = response.content

        # Add citations to answer
        citations = [f"[{i+1}] {src['source']} (page {src['page']})" for i, src in enumerate(sources)]
        answer_with_citations = answer + "\n\nCitations:\n" + "\n".join(citations) if citations else answer

        # Optionally summarize answer
        summary = None
        if summarize and answer:
            summary_prompt = f"Summarize the following answer in 2 sentences:\n{answer}"
            summary_resp = self.llm.invoke([summary_prompt])
            summary = summary_resp.content

        # Store query history
        self.history.append({
            'question': question,
            'answer': answer,
            'sources': sources,
            'summary': summary
        })

        return {
            'question': question,
            'answer': answer_with_citations,
            'sources': sources,
            'summary': summary,
            'history': self.history
        }

# Example usage:
adv_rag = AdvancedRAGPipeline(rag_retriever, llm)
result = adv_rag.query("What is a Hub?", top_k=3, min_score=0.1, stream=True, summarize=True)
print("\nFinal Answer:", result['answer'])
print("Summary:", result['summary'])
print("History:", result['history'][-1])

Retrieving documents for query What is a Hub?
Top k: 3, Score threshold: 0.1
Generating embeddings for 1 texts...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Streaming answer:
Use the following context to answer the question concisely.
Context:
Networking Devices 
 
Networking devices are tools that connect different parts of a network and help users share 
information and resources. 
HUB 
A network hub is an old networking device that works at OSI Layer 1. 
It acts like a repeater, meaning it takes an incoming electrical signal and sends it out to 
all other ports, not just the intended device. 
+ 
 
All devices connected to a hub share one collision domain. 
A collision domain is a part of the network where two or more devices can send data at 
the same time and cause a collision. 
When a collision happens, the data is lost, and the sender has to send it again.

Networking Devices 
 
Networking devices are tools that connect different parts of a network and help users share 
information and resources. 
HUB 
A network hub is an old networking device that work