### RAG PIPLINES

In [2]:
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# from langchain.text_splitter import RecursiveCharacterTextSplitter
from pathlib import Path

In [3]:
### Read all tyhe pdf's inside the directory

def process_all_pdfs(pdf_directory):
    """Processes all the pdf files in the directory"""
    all_documents=[]
    pdf_dir = Path(pdf_directory)

    # Find all the PDF files "**/*.pdf"
    pdf_files = list(pdf_dir.glob("**/*.pdf"))

    print(f"Found {len(pdf_files)} PDF Files to Process")

    for pdf_file in pdf_files:
        print(f"\n processing : {pdf_file.name}")

        try:
            loader = PyMuPDFLoader(str(pdf_file))

            document = loader.load()

            # Add source information to metadata

            for doc in document:
                doc.metadata['source_file']=pdf_file.name
                doc.metadata['file_type']='pdf'
            all_documents.extend(document)
            print(f" Loaded {len(document)} pages")
        
        except Exception as e:
            print(f" Error: {e}")
    
    print(f"\nTotal documents loaded : {len(all_documents)}")

    return all_documents


In [4]:
all_pdf_documents = process_all_pdfs("../data")

Found 3 PDF Files to Process

 processing : 1Notes.pdf
 Loaded 2 pages

 processing : 2Notes.pdf
 Loaded 4 pages

 processing : 3notes.pdf
 Loaded 5 pages

Total documents loaded : 11


In [5]:
all_pdf_documents

[Document(metadata={'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': '', 'creationdate': '', 'source': '..\\data\\pdf\\1Notes.pdf', 'file_path': '..\\data\\pdf\\1Notes.pdf', 'total_pages': 2, 'format': 'PDF 1.4', 'title': 'Untitled document', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0, 'source_file': '1Notes.pdf', 'file_type': 'pdf'}, page_content='Lecture 1 Notes :\u200b\n\u200b\n1.\u2060 \u2060What is Low‑Level Design (LLD)? \n \nDefinition: Designing the internal structure (“skeleton”) of an application by identifying \nclasses/objects, their relationships, data flows, and how DSA solutions plug into this \nstructure. \n●\u200b DSA: Solves isolated problems (e.g. “find shortest path in an array/graph”) using \nalgorithms like binary search, quicksort, Dijkstra’s, heaps, etc. \n●\u200b LLD: Determines which objects exist in the system and how they interact, then \napplies DSA inside that structur

In [6]:
###  Text spliting for getting into Chunking

def split_documents(documents,chunk_size=1000,chunk_overlap=200):
    """Split documents ointo smaller chunks for better Rag Performance"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n","\n"," ",""]
    )

    split_doc = text_splitter.split_documents(documents)

    print(f"Split {len(documents)} documents into {len(split_doc)} chunks")

    # Show example of a chunk

    if split_doc:
        print(f"\nExample chunk:")
        print(f"Content: {split_doc[0].page_content[:200]}...")
        print(f"Metadata: {split_doc[0].metadata}")
    return split_doc

In [7]:
chunks = split_documents(all_pdf_documents)
chunks

Split 11 documents into 18 chunks

Example chunk:
Content: Lecture 1 Notes :​
​
1.⁠ ⁠What is Low‑Level Design (LLD)? 
 
Definition: Designing the internal structure (“skeleton”) of an application by identifying 
classes/objects, their relationships, data flow...
Metadata: {'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': '', 'creationdate': '', 'source': '..\\data\\pdf\\1Notes.pdf', 'file_path': '..\\data\\pdf\\1Notes.pdf', 'total_pages': 2, 'format': 'PDF 1.4', 'title': 'Untitled document', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0, 'source_file': '1Notes.pdf', 'file_type': 'pdf'}


[Document(metadata={'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': '', 'creationdate': '', 'source': '..\\data\\pdf\\1Notes.pdf', 'file_path': '..\\data\\pdf\\1Notes.pdf', 'total_pages': 2, 'format': 'PDF 1.4', 'title': 'Untitled document', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0, 'source_file': '1Notes.pdf', 'file_type': 'pdf'}, page_content='Lecture 1 Notes :\u200b\n\u200b\n1.\u2060 \u2060What is Low‑Level Design (LLD)? \n \nDefinition: Designing the internal structure (“skeleton”) of an application by identifying \nclasses/objects, their relationships, data flows, and how DSA solutions plug into this \nstructure. \n●\u200b DSA: Solves isolated problems (e.g. “find shortest path in an array/graph”) using \nalgorithms like binary search, quicksort, Dijkstra’s, heaps, etc. \n●\u200b LLD: Determines which objects exist in the system and how they interact, then \napplies DSA inside that structur

## Embedding and VectorStoreDB

In [8]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List,Dict,Any,Tuple
from sklearn.metrics.pairwise import cosine_similarity


In [11]:
class EmbeddingManager:
    """Handles document Embedding generation using Sentence Transformer"""

    def __init__(self,model_name:str="sentence-transformers/all-MiniLM-L6-v2"):
        """Initialize the embeding manager
            Args:
                model_name:HuggingFace model for sentence embedding
        """
        self.model_name=model_name
        self.model=None
        self._load_model()

    

    def _load_model(self):
        """Load The SentenceTransformer model"""
        try:
            print(f"Loading the embedding model : {self.model_name}")
            self.model=SentenceTransformer(self.model_name, token=False)
            print(f"Model Loaded Sucessfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading model {self.model_name} : {e}")
            raise

    def generate_embedding(self,texts:List[str])->np.ndarray:
        """Generate embedding for a list of text
            Args:
                texts:List of text strings to embed
            Return : 
                numpy array of embedding with shape (len(texts),embedding_dim)
        """
        if not self.model:
            raise ValueError("Model Not Loaded")

        print(f"Generating embedding gor {len(texts)} texts...")
        embeddings = self.model.encode(texts,show_progress_bar=True)
        print(f"Generated embeddings with shape : {embeddings.shape}")

        return embeddings
    
    # def get_sentence_embedding_dimension(self) -> int:
    #     """Get the embedding dimension of the model"""
    #     if not self.model:
    #         raise ValueError("Model not loaded")
    #     return self.model.get_sentence_embedding_dimension()

embedding_manager = EmbeddingManager()
embedding_manager    

Loading the embedding model : sentence-transformers/all-MiniLM-L6-v2


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Model Loaded Sucessfully. Embedding dimension: 384


<__main__.EmbeddingManager at 0x15cafe7a510>

### VectorStore

In [13]:
import os


class VectorStore:
    """Manages document embedddings in a chromaDB vector store"""

    def __init__(self,collection_name:str="pdf_documents",persist_directory: str="../data/vector_store"):
        """Initialize the vector stroe
        
           Args:
               collection_name: Name  of the chromaDB collection
               persist_directory: Directpory to persist the vector store

        """

        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()

    def _initialize_store(self):
        """Initialize ChromaDB client and collection"""

        try:
            # Create persistent chroma client
            os.makedirs(self.persist_directory,exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)

            # Get or create collection
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={"description":"PDF document embeddings for RAG"}
            )

            print(f"Vector store initialized. Collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")
        except Exception as e:
            print(f"Error initializing vector store : {e}")
            raise

    def add_documents(self,documents:List[Any], embeddings: np.ndarray):
        """Add documents and their embeddings to the vector stroe
        Args:
            documents: List of langchain documents
            embeddings: Corresponding embeddings for the documents
        """
        if len(documents)!= len(embeddings):
            raise ValueError("Number of documents must match number of embeddings")
        print(f"Adding {len(documents)} documents to vector store...")

        #Prepare data for chromadb
        ids=[]
        metadatas=[]
        documnets_text =[]
        embeddings_list = []

        for i, (doc, embedding) in enumerate(zip(documents,embeddings)):
            # generate uuid
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            #Prepare metadata
            metadata = dict(doc.metadata)
            metadata['doc_index']=i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)

            # Document cointent
            documnets_text.append(doc.page_content)


            #Embedding
            embeddings_list.append(embedding.tolist())

        try:
            #add to collection

            self.collection.add(
                ids=ids,
                embeddings=embeddings_list,
                metadatas=metadatas,
                documents=documnets_text
            )  
            print(f"Successfully added {len(documents)} documents to vector store")
            print(f"Total documents in collection: {self.collection.count()}")
        except Exception as e:
            print(f"Error in adding document sto vector stroe: {e}")
            raise

vectorstore = VectorStore()
vectorstore  


Vector store initialized. Collection: pdf_documents
Existing documents in collection: 0


<__main__.VectorStore at 0x15cb1159010>

In [14]:
chunks

[Document(metadata={'producer': 'Skia/PDF m137 Google Docs Renderer', 'creator': '', 'creationdate': '', 'source': '..\\data\\pdf\\1Notes.pdf', 'file_path': '..\\data\\pdf\\1Notes.pdf', 'total_pages': 2, 'format': 'PDF 1.4', 'title': 'Untitled document', 'author': '', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0, 'source_file': '1Notes.pdf', 'file_type': 'pdf'}, page_content='Lecture 1 Notes :\u200b\n\u200b\n1.\u2060 \u2060What is Low‑Level Design (LLD)? \n \nDefinition: Designing the internal structure (“skeleton”) of an application by identifying \nclasses/objects, their relationships, data flows, and how DSA solutions plug into this \nstructure. \n●\u200b DSA: Solves isolated problems (e.g. “find shortest path in an array/graph”) using \nalgorithms like binary search, quicksort, Dijkstra’s, heaps, etc. \n●\u200b LLD: Determines which objects exist in the system and how they interact, then \napplies DSA inside that structur

In [15]:
### Convert text to embedding
texts = [doc.page_content for doc in chunks]
texts

['Lecture 1 Notes :\u200b\n\u200b\n1.\u2060 \u2060What is Low‑Level Design (LLD)? \n \nDefinition: Designing the internal structure (“skeleton”) of an application by identifying \nclasses/objects, their relationships, data flows, and how DSA solutions plug into this \nstructure. \n●\u200b DSA: Solves isolated problems (e.g. “find shortest path in an array/graph”) using \nalgorithms like binary search, quicksort, Dijkstra’s, heaps, etc. \n●\u200b LLD: Determines which objects exist in the system and how they interact, then \napplies DSA inside that structure. \n \n2.\u2060 \u2060Illustrative Story: Two Approaches to Building “QuickRide” \n●\u200b Scenario: Build a ride‑booking app (“QuickRide”) like Uber/Ola. \n \nAnurag’s DSA‑First Approach:\u200b\n \n1.\u200b Problem decomposition: \n●\u200b Map city intersections to graph nodes, roads to edges. \n●\u200b Use Dijkstra’s algorithm to compute shortest route. \n●\u200b Use a min‑heap (priority queue) to match riders to closest drivers.\u

In [16]:
texts = [doc.page_content for doc in chunks]

In [17]:
## Genrate the embedding
embeddings  =embedding_manager.generate_embedding(texts)


## store into vectorstroe
vectorstore.add_documents(chunks,embeddings)

Generating embedding gor 18 texts...


Batches: 100%|██████████| 1/1 [00:03<00:00,  3.05s/it]


Generated embeddings with shape : (18, 384)
Adding 18 documents to vector store...
Successfully added 18 documents to vector store
Total documents in collection: 18


## Retriever Pipeline From VectorStore

In [22]:
class RAGRetriever:
    """Handles query based retrieval from the vector store"""

    def __init__(self,vector_store:VectorStore, embedding_manager:EmbeddingManager):
        
        """ 
           Initialize the retriever

           Args:
               vector_store: Vector Store contaings document embeddings
               embedding_manager: Manager for generating query embeddings
        """
        self.vector_store=vector_store
        self.embedding_manager=embedding_manager

    
    def retrieve(self,query: str, top_k:int =5, score_threshold: float = 0.0 )-> List[Dict[str,Any]]:
        """ 
        Retrieve relevant documents for a query

        Args:
            query: The search query
            top_k: Number of top results to return
            score_thresholds: Minimum similarity score threshoild
        Return:
             List of dictionaries conatining retrived documents and metatdata

        """
        print(f"Retrieving documents for query: '{query}'")
        print(f"Top K: {top_k}, Score threshold: {score_threshold}")

        # Generate query embedding
        query_embedding = self.embedding_manager.generate_embedding([query])[0]

        # Search in vecotr store
        try:
            results = self.vector_store.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=top_k
            )

            # Process results
            retrived_docs = []

            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]

                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids,documents,metadatas,distances)):
                    # convert distance to similarity store (chromaDB uses cosine distance)
                    similarity_score = 1 - distance

                    if similarity_score >=score_threshold:
                        retrived_docs.append({
                            'id':doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score' : similarity_score,
                            'distance': distance,
                            'rank': i+1
                        }) 
                print(f"Retrieved {len(retrived_docs)} documents (after filtering)")
            else:
                print("No document found")

            return retrived_docs
        
        except Exception as e:
            print(f"Error during retrieval : {e}")
            return []
        

rag_retriever = RAGRetriever(vectorstore,embedding_manager)

In [23]:
rag_retriever

<__main__.RAGRetriever at 0x15cb56242f0>

In [24]:
rag_retriever.retrieve("What is LowLevel Design (LLD)?")

Retrieving documents for query: 'What is LowLevel Design (LLD)?'
Top K: 5, Score threshold: 0.0
Generating embedding gor 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 107.73it/s]

Generated embeddings with shape : (1, 384)





Retrieved 2 documents (after filtering)


[{'id': 'doc_8f4fe405_0',
  'content': 'Lecture 1 Notes :\u200b\n\u200b\n1.\u2060 \u2060What is Low‑Level Design (LLD)? \n \nDefinition: Designing the internal structure (“skeleton”) of an application by identifying \nclasses/objects, their relationships, data flows, and how DSA solutions plug into this \nstructure. \n●\u200b DSA: Solves isolated problems (e.g. “find shortest path in an array/graph”) using \nalgorithms like binary search, quicksort, Dijkstra’s, heaps, etc. \n●\u200b LLD: Determines which objects exist in the system and how they interact, then \napplies DSA inside that structure. \n \n2.\u2060 \u2060Illustrative Story: Two Approaches to Building “QuickRide” \n●\u200b Scenario: Build a ride‑booking app (“QuickRide”) like Uber/Ola. \n \nAnurag’s DSA‑First Approach:\u200b\n \n1.\u200b Problem decomposition: \n●\u200b Map city intersections to graph nodes, roads to edges. \n●\u200b Use Dijkstra’s algorithm to compute shortest route. \n●\u200b Use a min‑heap (priority queue)

## Integration Vectordb Context pipeline With LLM output

In [70]:
GEMINI_API_KEY=<Gemini_api_key>

SyntaxError: invalid syntax (2167862482.py, line 1)

In [34]:
# Initialize Gemini
import google.generativeai as genai
genai.configure(api_key="GEMINI_API_KEY")
gemini_llm = genai.GenerativeModel("gemini-2.5-flash")

In [41]:
import google.generativeai as genai

class GeminiRAG:
    def __init__(self, api_key: str, model_name="gemini-2.5-flash"):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel(model_name)

    def build_prompt(self, query: str, context: str) -> str:
        return f"""
You are a helpful AI assistant. Answer the question ONLY using the context.
If the context does not include the answer, say "The context does not provide that information."

### Context:
{context}

### Question:
{query}

### Answer:
"""

    def generate(self, query: str, context: str) -> str:
        """Generate RAG-based response using Gemini."""
        prompt = self.build_prompt(query, context)
        response = self.model.generate_content(prompt)
        return response.text

    def generate_stream(self, query: str, context: str):
        """Streaming version of Gemini RAG."""
        prompt = self.build_prompt(query, context)
        stream = self.model.generate_content(prompt, stream=True)
        for chunk in stream:
            if chunk.text:
                yield chunk.text

    def generate_json(self, query: str, context: str) -> dict:
        """Return structured JSON output."""
        prompt = self.build_prompt(query, context)
        response = self.model.generate_content(
            prompt,
            generation_config={"response_mime_type": "application/json"}
        )
        return response.text


In [None]:

def generate_response_with_gemini(query, context):
    prompt = f"""
You are a helpful assistant. Use ONLY the provided context to answer the question.

### Context:
{context}

### Query:
{query}

### Answer:
"""
    return gemini_llm.generate_content(prompt).text

In [None]:
## Simple RAG pipeline with Gemini LL

gemini_rag = GeminiRAG(api_key=api_key)

if gemini_rag:
    query="What is LowLevel Design (LLD)?"
    retrieved_docs = rag_retriever.retrieve(query, top_k=3,score_threshold=0.1)

    if retrieved_docs:

        #combine top retrievied documents as context
        combined_context= "\n\n".join([doc['content'] for doc in retrieved_docs])

        #Generate response  using llm
        response  = gemini_rag.generate(query , combined_context)

        print(f"\nResponse:\n{response}")
    else:
        print(f"No relevant documents found for the query")

Retrieving documents for query: 'What is LowLevel Design (LLD)?'
Top K: 3, Score threshold: 0.1
Generating embedding gor 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 97.69it/s]

Generated embeddings with shape : (1, 384)
Retrieved 1 documents (after filtering)






Response:
Low-Level Design (LLD) is the process of designing the internal structure (“skeleton”) of an application by identifying classes/objects, their relationships, data flows, and how DSA solutions plug into this structure.


In [None]:
import google.generativeai as genai
genai.configure(api_key=GEMINI_API_KEY)
llm = genai.GenerativeModel("gemini-2.5-flash")

In [52]:
print(llm)

genai.GenerativeModel(
    model_name='models/gemini-2.5-flash',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
    cached_content=None
)


In [None]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
load_dotenv()

### Initalize the groq LLM
# groq_api_key = os.getenv("GEMINI_API_KEY")

# llm = ChatGroq(groq_api_key=groq_api_key,model_name="gemma2-9b-it",temperature=0.1,max_tokens=1024)


## Simple RAG funtion
def rag_simple(query,retriever,llm,top_k=3):
    ## retrieve the context
    results = retriever.retrieve(query,top_k=top_k)
    context = "\n\n\n".join([doc['content'] for doc in results]) if results else ""

    if not context:
        print("No relevant context found to answer the question")

    ## generate the answer using groq llm
    prompt = f"""Use the following context to answer the question concisely.

    Context:
    {context}

   Question: {query}

   Answer:

"""
    # response = llm.invoke([prompt.format(context=context,query=query)])
    response = llm.generate_content([prompt.format(context=context,query=query)])
    # print("rsponse===",response.text)

    return response.text

In [63]:
answer = rag_simple("What is LowLevel Design (LLD)?",rag_retriever,llm)
answer

Retrieving documents for query: 'What is LowLevel Design (LLD)?'
Top K: 3, Score threshold: 0.0
Generating embedding gor 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 105.69it/s]

Generated embeddings with shape : (1, 384)
Retrieved 2 documents (after filtering)





rsponse=== Low-Level Design (LLD) is designing the internal structure ("skeleton") of an application by identifying classes/objects, their relationships, data flows, and how Data Structures and Algorithms (DSA) solutions plug into this structure.


'Low-Level Design (LLD) is designing the internal structure ("skeleton") of an application by identifying classes/objects, their relationships, data flows, and how Data Structures and Algorithms (DSA) solutions plug into this structure.'

### Enhanced RAG Pipeline Features

In [64]:
# --- Enhanced RAG Pipline Features---
def rag_advanced(query,retriever,llm,top_k=5,score_threshold=0.2, return_context=False):
    """
       RAG pipeline with extra features:
       - Retrun answer,sources,confidence score, and optionally full context
        """
    results = retriever.retrieve(query,top_k=top_k,score_threshold=score_threshold)
    if not results:
        return {'answer':'No relevant context found.', 'sources':[],'confidence':0.0,'context':''}
    
    # Prepare context and sources
    context = "\n\n\n".join([doc['content'] for doc in results])
    sources = [{
        'sources': doc['metadata'].get('source_files', doc['metadata'].get('source','unknown')),
        'page': doc['metadata'].get('page','unknown'),
        'score':doc['similarity_score'],
        'preview':doc['content'][:300] + '...'
    } for doc in results]

    confidence = max([doc['similarity_score'] for doc in results])

    #Generate answer
    prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"""
    response  = llm.generate_content([prompt.format(context=context,query=query)])

    output = {
        'answer':response.text,
        'sources':sources,
        'confidence':confidence
    }

    if return_context:
        output['context']=context
    
    return output

In [66]:
## Example
result= rag_advanced("What is LowLevel Design (LLD)?",rag_retriever,llm,top_k=3,score_threshold=0.1,return_context=True)
print("Answer: ",result['answer'])
print("sources: ",result['sources'])
print("Confidence: ",result['confidence'])
print("Contrext Preview: ",result['context'][:300])

Retrieving documents for query: 'What is LowLevel Design (LLD)?'
Top K: 3, Score threshold: 0.1
Generating embedding gor 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 120.21it/s]

Generated embeddings with shape : (1, 384)
Retrieved 1 documents (after filtering)





Answer:  Low-Level Design (LLD) is designing the internal structure ("skeleton") of an application by identifying classes/objects, their relationships, data flows, and how DSA solutions plug into this structure.
sources:  [{'sources': '..\\data\\pdf\\1Notes.pdf', 'page': 0, 'score': 0.11530083417892456, 'preview': 'Lecture 1 Notes :\u200b\n\u200b\n1.\u2060 \u2060What is Low‑Level Design (LLD)? \n \nDefinition: Designing the internal structure (“skeleton”) of an application by identifying \nclasses/objects, their relationships, data flows, and how DSA solutions plug into this \nstructure. \n●\u200b DSA: Solves isolated problems (e.g. “find sh...'}]
Confidence:  0.11530083417892456
Contrext Preview:  Lecture 1 Notes :​
​
1.⁠ ⁠What is Low‑Level Design (LLD)? 
 
Definition: Designing the internal structure (“skeleton”) of an application by identifying 
classes/objects, their relationships, data flows, and how DSA solutions plug into this 
structure. 
●​ DSA: Solves isolated problems (e.g. 

### Advance RAG Pipeline


In [67]:
# --- Advanced RAG Pipeline: Streaming, Citations, History, Summarization ---
from typing import List, Dict, Any
import time

class AdvancedRAGPipeline:
    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.history = []  # Store query history

    def query(self, question: str, top_k: int = 5, min_score: float = 0.2, stream: bool = False, summarize: bool = False) -> Dict[str, Any]:
        # Retrieve relevant documents
        results = self.retriever.retrieve(question, top_k=top_k, score_threshold=min_score)
        if not results:
            answer = "No relevant context found."
            sources = []
            context = ""
        else:
            context = "\n\n".join([doc['content'] for doc in results])
            sources = [{
                'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
                'page': doc['metadata'].get('page', 'unknown'),
                'score': doc['similarity_score'],
                'preview': doc['content'][:120] + '...'
            } for doc in results]
            # Streaming answer simulation
            prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
            if stream:
                print("Streaming answer:")
                for i in range(0, len(prompt), 80):
                    print(prompt[i:i+80], end='', flush=True)
                    time.sleep(0.05)
                print()
            response = self.llm.generate_content([prompt.format(context=context, question=question)])
            answer = response.text

        # Add citations to answer
        citations = [f"[{i+1}] {src['source']} (page {src['page']})" for i, src in enumerate(sources)]
        answer_with_citations = answer + "\n\nCitations:\n" + "\n".join(citations) if citations else answer

        # Optionally summarize answer
        summary = None
        if summarize and answer:
            summary_prompt = f"Summarize the following answer in 2 sentences:\n{answer}"
            summary_resp = self.llm.generate_content([summary_prompt])
            summary = summary_resp.text

        # Store query history
        self.history.append({
            'question': question,
            'answer': answer,
            'sources': sources,
            'summary': summary
        })

        return {
            'question': question,
            'answer': answer_with_citations,
            'sources': sources,
            'summary': summary,
            'history': self.history
        }



In [69]:
# Example usage:
adv_rag = AdvancedRAGPipeline(rag_retriever, llm)
result = adv_rag.query("What is LowLevel Design (LLD)?", top_k=3, min_score=0.1, stream=True, summarize=True)
print("\nFinal Answer:", result['answer'])
print("Summary:", result['summary'])
print("History:", result['history'][-1])

Retrieving documents for query: 'What is LowLevel Design (LLD)?'
Top K: 3, Score threshold: 0.1
Generating embedding gor 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 109.03it/s]

Generated embeddings with shape : (1, 384)
Retrieved 1 documents (after filtering)
Streaming answer:
Use the following context to answer the question concisely.
Context:
Lecture 1 Notes :​
​
1.⁠ ⁠What is Low‑Level Design (LLD)? 
 
Definition: Designing the internal structure (“skeleton”) of an application by identifying 
classes/objects, their relationships, data flows, and how DSA solutions plug into this 
structure.




 
●​ DSA: Solves isolated problems (e.g. “find shortest path in an array/graph”) using 
algorithms like binary search, quicksort, Dijkstra’s, heaps, etc. 
●​ LLD: Determines which objects exist in the system and how they interact, then 
applies DSA inside that structure. 
 
2.⁠ ⁠Illustrative Story: Two Approaches to Building “QuickRide” 
●​ Scenario: Build a ride‑booking app (“QuickRide”) like Uber/Ola. 
 
Anurag’s DSA‑First Approach:​
 
1.​ Problem decomposition: 
●​ Map city intersections to graph nodes, roads to edges. 
●​ Use Dijkstra’s algorithm to compute shortest route. 
●​ Use a min‑heap (priority queue) to match riders to closest drivers.​
 
2.​ Gaps:

Question: What is LowLevel Design (LLD)?

Answer:

Final Answer: Low-Level Design (LLD) is the process of designing the internal structure ("skeleton") of an application by identifying classes/objects, their relationships, data flows, and how Data Structures and Algorithms (DSA) solutions plug into this structure.

Citations:
[1