In [1]:
# Loading and chunking libs

import os
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path

# Embedding and storing libs

import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity


  from .autonotebook import tqdm as notebook_tqdm


### Loading all pdf documents

In [2]:
### Read all the pdfs in teh directory

def process_pdfs(pdf_directory):

    all_docs = []
    pdf_dir = Path(pdf_directory)

    pdf_files = list(pdf_dir.glob("**/*.pdf"))

    print(f"Found {len(pdf_files)} in the directory {pdf_directory}")

    # return pdf_files

    for pdf_file in pdf_files:
        print(f"Processing file {pdf_file}")

        try:
            loader = PyMuPDFLoader(str(pdf_file))
            documents = loader.load()
            print(f"Loaded {len(documents)} documents from {pdf_file} using PyMuPDFLoader")
            print(documents)

            # Add Metadata
            for doc in documents:
                doc.metadata['source_file'] = pdf_file.name
                doc.metadata['file_path'] = 'pdf'

            all_docs.extend(documents)
            print(f'Loaded {len(documents)} pages')
            # break

        except Exception as e:
            print(f"Failed to load {pdf_file} with PyMuPDFLoader due to {e}, trying PyPDFLoader")

    print(f"Total documents loaded: {len(all_docs)}")
    return all_docs


all_pdf_documents = process_pdfs("../data/pdfs")


Found 3 in the directory ../data/pdfs
Processing file ..\data\pdfs\Building a Personal Portfolio Q&A Chatbot.pdf
Loaded 9 documents from ..\data\pdfs\Building a Personal Portfolio Q&A Chatbot.pdf using PyMuPDFLoader
[Document(metadata={'producer': 'WeasyPrint 65.1', 'creator': 'ChatGPT', 'creationdate': '', 'source': '..\\data\\pdfs\\Building a Personal Portfolio Q&A Chatbot.pdf', 'file_path': '..\\data\\pdfs\\Building a Personal Portfolio Q&A Chatbot.pdf', 'total_pages': 9, 'format': 'PDF 1.7', 'title': 'Building a Personal Portfolio Q&A Chatbot', 'author': 'ChatGPT Deep Research', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0}, page_content='Building a Personal Portfolio Q&A Chatbot\nFramework and Hosting Considerations\nChoosing the right framework for your portfolio site is important for ease of development and deployment.\nNext.js is a React-based framework that provides server-side rendering (SSR), static site generation

In [3]:
all_pdf_documents

[Document(metadata={'producer': 'WeasyPrint 65.1', 'creator': 'ChatGPT', 'creationdate': '', 'source': '..\\data\\pdfs\\Building a Personal Portfolio Q&A Chatbot.pdf', 'file_path': 'pdf', 'total_pages': 9, 'format': 'PDF 1.7', 'title': 'Building a Personal Portfolio Q&A Chatbot', 'author': 'ChatGPT Deep Research', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0, 'source_file': 'Building a Personal Portfolio Q&A Chatbot.pdf'}, page_content='Building a Personal Portfolio Q&A Chatbot\nFramework and Hosting Considerations\nChoosing the right framework for your portfolio site is important for ease of development and deployment.\nNext.js is a React-based framework that provides server-side rendering (SSR), static site generation (SSG),\nbuilt-in routing, and easy integration of backend logic via API routes\n. These features can improve\nperformance and SEO (since pages can be pre-rendered or SSR) and simplify development (routing and\

### Chunking the content

In [4]:
### Text splitting into chunks

def split_documents(documents, chunk_size = 1000, chunk_overlap = 200):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = chunk_size,
        chunk_overlap = chunk_overlap,
        length_function = len,
        separators = ["\n\n", "\n", " ", ""]
    )

    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")

    # Example chunk
    if split_docs:
        print("Example Chunk")
        print(f"Content: {split_docs[0].page_content}")
        print(f"Metadata: {split_docs[0].metadata}")

    return split_docs

chunked_pdf_documents = split_documents(all_pdf_documents)
chunked_pdf_documents

Split 28 documents into 98 chunks
Example Chunk
Content: Building a Personal Portfolio Q&A Chatbot
Framework and Hosting Considerations
Choosing the right framework for your portfolio site is important for ease of development and deployment.
Next.js is a React-based framework that provides server-side rendering (SSR), static site generation (SSG),
built-in routing, and easy integration of backend logic via API routes
. These features can improve
performance and SEO (since pages can be pre-rendered or SSR) and simplify development (routing and
configuration work out of the box). For example, Next.js allows you to “easily create your custom backend
functionalities with API Routes to power your own front end”, all without extra client-side bloat
. This
means you could host your Q&A model’s API or inference logic within the same Next.js project if needed. 
By contrast, React (without Next.js) typically means using a tool like Create React App or Vite to build a
Metadata: {'producer': 'Weas

[Document(metadata={'producer': 'WeasyPrint 65.1', 'creator': 'ChatGPT', 'creationdate': '', 'source': '..\\data\\pdfs\\Building a Personal Portfolio Q&A Chatbot.pdf', 'file_path': 'pdf', 'total_pages': 9, 'format': 'PDF 1.7', 'title': 'Building a Personal Portfolio Q&A Chatbot', 'author': 'ChatGPT Deep Research', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': '', 'page': 0, 'source_file': 'Building a Personal Portfolio Q&A Chatbot.pdf'}, page_content='Building a Personal Portfolio Q&A Chatbot\nFramework and Hosting Considerations\nChoosing the right framework for your portfolio site is important for ease of development and deployment.\nNext.js is a React-based framework that provides server-side rendering (SSR), static site generation (SSG),\nbuilt-in routing, and easy integration of backend logic via API routes\n. These features can improve\nperformance and SEO (since pages can be pre-rendered or SSR) and simplify development (routing and\

In [5]:
chunked_pdf_documents[40].page_content

'data in a structured way, then pursue a retrieval-based solution as the primary path (it’s quick to set up and\nvery effective). Meanwhile, plan out a fine-tuning experiment on a small LLM as an educational first choice –\n12\n3\n8'

### Embedding and vectorStoreDB

In [6]:
class EmbeddingManager:
    '''
    Handles document embedding generation using SentenceTransformer.

    Args:
        model_name (str): Name of the pre-trained SentenceTransformer model to use.
    '''

    def __init__(self, model_name: str = 'all-MiniLM-L6-v2'):
        
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        ''' Loads the SentenceTransformer model'''
        try:
            print(f"Loading embedding model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading model {self.model_name}: {e}")

    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        '''Generates embeddings for a list of texts.

        Args:
            texts (List[str]): List of text strings to embed.

        Returns:
            np.ndarray: Array of embeddings.

        '''

        if not self.model:
            raise ValueError("Model not loaded.")
        
        print(f"Generating embedding for {len(texts)} texts")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"Generated embedding with shape: {embeddings.shape}")
        return embeddings
    
# Initializing the embedding manager
embedding_manager = EmbeddingManager(model_name='all-MiniLM-L6-v2')
embedding_manager

Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384


<__main__.EmbeddingManager at 0x1c9f493bcb0>

In [7]:
text_trials = ["This is example 1", "This is example 2"]

embeddings_trials = embedding_manager.generate_embeddings(text_trials)

Generating embedding for 2 texts


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.58it/s]

Generated embedding with shape: (2, 384)





In [8]:
embeddings_trials[0]

array([-8.03826470e-03,  2.22325660e-02,  1.47959348e-02,  5.55793904e-02,
        1.93927288e-02, -2.86618173e-02,  7.33681917e-02, -6.11751620e-03,
       -3.03268861e-02, -1.17029417e-02,  4.74827848e-02, -2.76739988e-02,
        8.52716342e-02, -6.68997294e-04, -3.39977965e-02,  1.82092097e-02,
        3.26722972e-02, -2.81444751e-02, -8.22492093e-02, -1.60418451e-02,
        1.11098230e-01, -2.68107168e-02, -2.23269947e-02,  1.21751614e-02,
       -3.89853958e-04, -7.64439106e-02,  3.06463195e-03,  8.84773582e-02,
        1.40486524e-01, -1.27440825e-01,  1.06240846e-02,  5.18650515e-03,
        3.44171971e-02,  3.47072370e-02,  5.72374370e-03,  5.25662415e-02,
        1.13464743e-02,  1.06352657e-01, -9.13371816e-02,  6.94956183e-02,
       -1.48934859e-03, -7.01464638e-02,  3.17046884e-03, -1.56533532e-02,
        6.59741908e-02, -6.73016980e-02,  8.53713416e-03,  2.51697060e-02,
       -4.86696512e-02, -1.35161430e-01, -9.16747525e-02, -2.18729172e-02,
       -1.38859287e-01,  

In [9]:
text_trial2 = ["This is example 1"]

embeddings_trials = embedding_manager.generate_embeddings(text_trial2)[0]

Generating embedding for 1 texts


Batches: 100%|██████████| 1/1 [00:00<00:00, 89.46it/s]

Generated embedding with shape: (1, 384)





In [10]:
print(embeddings_trials)
print("--------------------------------------")
print(embeddings_trials.shape)
print("--------------------------------------")
print(type(embeddings_trials))
# print("--------------------------------------")
# print(embeddings_trials.tolist())
print("--------------------------------------")    
embeddings_trials = embeddings_trials.tolist()
print(type(embeddings_trials))

[-8.03826470e-03  2.22325660e-02  1.47959348e-02  5.55793904e-02
  1.93927288e-02 -2.86618173e-02  7.33681917e-02 -6.11751620e-03
 -3.03268861e-02 -1.17029417e-02  4.74827848e-02 -2.76739988e-02
  8.52716342e-02 -6.68997294e-04 -3.39977965e-02  1.82092097e-02
  3.26722972e-02 -2.81444751e-02 -8.22492093e-02 -1.60418451e-02
  1.11098230e-01 -2.68107168e-02 -2.23269947e-02  1.21751614e-02
 -3.89853958e-04 -7.64439106e-02  3.06463195e-03  8.84773582e-02
  1.40486524e-01 -1.27440825e-01  1.06240846e-02  5.18650515e-03
  3.44171971e-02  3.47072370e-02  5.72374370e-03  5.25662415e-02
  1.13464743e-02  1.06352657e-01 -9.13371816e-02  6.94956183e-02
 -1.48934859e-03 -7.01464638e-02  3.17046884e-03 -1.56533532e-02
  6.59741908e-02 -6.73016980e-02  8.53713416e-03  2.51697060e-02
 -4.86696512e-02 -1.35161430e-01 -9.16747525e-02 -2.18729172e-02
 -1.38859287e-01  1.23401331e-02  2.59330645e-02 -3.34522799e-02
 -5.77293038e-02  4.64849323e-02  5.85254887e-03  1.48342224e-02
  4.99756001e-02 -8.93104

### VectorStore

In [11]:
class VectorStore:
    ''' 
    Manages the embedding in a ChromaDB Vector Store.
    
    '''

    def __init__(self, collection_name: str = 'pdf_documents', persistent_directory: str = "../data/vector_store"):
        '''
        Initialize the Vector Store
        
        Args:
            Collection _name (str): Name of chromaDB collection
            persistent_directory (str): Directory to persist the vector store
        '''

        self.collection_name = collection_name
        self.persistent_directory = persistent_directory
        self.client = None
        self.collection = None
        self.initialize_store()

    def initialize_store(self):
        ''' 
        Initialize ChromaDB client and collection

        '''
        try:
            # Create persistent ChromaDB client
            os.makedirs(self.persistent_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persistent_directory)

            # Create or get collection
            self.collection = self.client.get_or_create_collection(
                name = self.collection_name,
                metadata= {"description": "PDF Document Embeddings",
                           "hnsw:space": "cosine"   }
            )
            print(f"Vector Store initialized with collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")
        
        except Exception as e:
            print(f"Error initializing Vector Store: {e}")
            raise 

    def add_documents(self, documents: List[Any], embeddings: np.ndarray):
        ''' 
        Add documents and their embedding to the vector store
        
        Args:
            documents (List[Any]): List of documents to add
            embeddings (np.ndarray): Corresponding embeddings
        '''

        if len(documents) != len(embeddings):
            raise ValueError("Number of documents must match number of embeddings")
        
        print (f"Adding {len(documents)} documents to the vector store")

        #Prepare data for ChromaDB
        ids = []
        metadatas = []
        document_texts = []
        embeddings_list = []

        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # Generate unique ID
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)

            # Prepare metadata
            metadata = dict(doc.metadata)
            metadata['doc_idx'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)

            # Document text
            document_texts.append(doc.page_content)

            # Embedding
            embeddings_list.append(embedding.tolist())
        
        # Add to collection

        try:
            self.collection.add(
                ids =ids,
                embeddings= embeddings_list,
                metadatas= metadatas,
                documents= document_texts
            )

            print(f"Successfully added {len(documents)} documents to the vector store.")
            print(f"Total documents in collection now: {self.collection.count()}")
        
        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
            raise

# Initialize Vector Store
vector_store = VectorStore()
vector_store

Vector Store initialized with collection: pdf_documents
Existing documents in collection: 0


<__main__.VectorStore at 0x1c9f4c87380>

In [12]:
len(chunked_pdf_documents)

98

In [13]:
### Converting teh chunked texts into embeddings

texts = [doc.page_content for doc in chunked_pdf_documents]

### Generate teh embeddings

embeddings = embedding_manager.generate_embeddings(texts)

### Store in Vector Store

vector_store.add_documents(chunked_pdf_documents, embeddings)

### Minor issues with current setup:
# Will it realize duplicates?
# No. Chroma only cares about id. With random UUIDs, every run looks “new”.

# Will it store duplicates with new ids?
# Yes, exactly that.

# Is the current code capable of handling redundancies?
# No. It always inserts new rows. To handle redundancies you need deterministic IDs + upsert() or a clear-then-rebuild strategy.

Generating embedding for 98 texts


Batches: 100%|██████████| 4/4 [00:03<00:00,  1.06it/s]


Generated embedding with shape: (98, 384)
Adding 98 documents to the vector store
Successfully added 98 documents to the vector store.
Total documents in collection now: 98


In [14]:
texts

['Building a Personal Portfolio Q&A Chatbot\nFramework and Hosting Considerations\nChoosing the right framework for your portfolio site is important for ease of development and deployment.\nNext.js is a React-based framework that provides server-side rendering (SSR), static site generation (SSG),\nbuilt-in routing, and easy integration of backend logic via API routes\n. These features can improve\nperformance and SEO (since pages can be pre-rendered or SSR) and simplify development (routing and\nconfiguration work out of the box). For example, Next.js allows you to “easily create your custom backend\nfunctionalities with API Routes to power your own front end”, all without extra client-side bloat\n. This\nmeans you could host your Q&A model’s API or inference logic within the same Next.js project if needed. \nBy contrast, React (without Next.js) typically means using a tool like Create React App or Vite to build a',
 'By contrast, React (without Next.js) typically means using a tool li

### Retriver Pipeline from VectorStore

In [15]:
class RAGRetriever:
    ''' Handles query based retrival from the vectore store'''

    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        
        ''' Initializes teh retriver

        Args:
            vectorestore: VectoreStore containing the document embeddings
            embedding manager: Manager for generating query embeddings
        '''

        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
        '''
        Retrieve relevant documents for a given query
        
        Args:
            query (str): The search query
            top_k (int): Number of top retrieved results
            score_threshold (float): Minimum similarity score threshold
            
        Return:
            List of dictionries containing retrieved documents and metadata'''
        
        print(f"Generating embedding for query: {query}")
        print(f"Top_k: {top_k}, Score Threshold: {score_threshold}")

        # Generate query embedding
        query_embedding = self.embedding_manager.generate_embeddings([query])[0]

        # Search the vectore store

        try:

            results = self.vector_store.collection.query(
                query_embeddings= [query_embedding.tolist()],
                n_results= top_k
            )

            # Process Result

            retrived_docs = []

            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]

                print(f"The distances retrieved are: {distances}")

                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas,distances)):
                    # Convert distance to similarity scores
                    similarity_score = 1 - distance

                    if similarity_score>= score_threshold:
                        retrived_docs.append({
                            'id': doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': i+1 
                        })
                
                print(f"Retrieved {len(retrived_docs)} documents after applying score threshold")

            else:
                print("No documents retrieved from vector store.")
            
            return retrived_docs

        except Exception as e:
            print(f"Error during retrieval: {e}")
            return []

rag_retriver = RAGRetriever(vector_store,embedding_manager)

In [16]:
rag_retriver

<__main__.RAGRetriever at 0x1c9f4df9e80>

In [17]:
rag_retriver.retrieve("What is RAG?", top_k=10)

Generating embedding for query: What is RAG?
Top_k: 10, Score Threshold: 0.0
Generating embedding for 1 texts


Batches: 100%|██████████| 1/1 [00:00<00:00, 94.70it/s]

Generated embedding with shape: (1, 384)
The distances retrieved are: [0.612400233745575, 0.6758878231048584, 0.6773046255111694, 0.6946336030960083, 0.6964412927627563, 0.738471269607544, 0.7537255883216858, 0.7552218437194824, 0.7956720590591431, 0.8051016330718994]
Retrieved 10 documents after applying score threshold





[{'id': 'doc_a9c74e25_89',
  'content': '37.\u200bHow to Use Pytest Fixtures in a RAG-Based LangChain Streamlit App? - Stack \nOverflow, accessed November 20, 2025, \nhttps://stackoverflow.com/questions/79717950/how-to-use-pytest-fixtures-in-a-\nrag-based-langchain-streamlit-app \n38.\u200bEvaluate RAG pipeline using Ragas in Python with watsonx - IBM, accessed \nNovember 20, 2025, \nhttps://www.ibm.com/think/tutorials/evaluate-rag-pipeline-using-ragas-in-python\n-with-watsonx \n39.\u200bRun your first experiment - Ragas, accessed November 20, 2025, \nhttps://docs.ragas.io/en/stable/getstarted/experiments_quickstart/ \n40.\u200bRAG Evaluation: The Definitive Guide to Unit Testing ... - Confident AI, accessed \nNovember 20, 2025, \nhttps://www.confident-ai.com/blog/how-to-evaluate-rag-applications-in-ci-cd-pi\npelines-with-deepeval \n41.\u200bA Complete Guide to Unit Testing RAG in Continuous Development Workflow, \naccessed November 20, 2025, \nhttps://blog.griffinai.io/news/complete-g

In [22]:
rag_retriver.retrieve("What all has Piyush worked on?", top_k=10)

Generating embedding for query: What all has Piyush worked on?
Top_k: 10, Score Threshold: 0.0
Generating embedding for 1 texts


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.16it/s]


Generated embedding with shape: (1, 384)
The distances retrieved are: [0.7024816274642944, 0.7451099157333374, 0.7696207761764526, 0.7750554084777832, 0.8066123127937317, 0.8125970363616943, 0.8330579400062561, 0.8337186574935913, 0.8364413380622864, 0.8369515538215637]
Retrieved 10 documents after applying score threshold


[{'id': 'doc_db3818c1_90',
  'content': 'Piyush Hemnani | Artificial Intelligence Graduate \nPiyushdeepak97@gmail.com | https://www.linkedin.com/in/piyush-hemnani-05b328189/ | +1-940-843-8403 | Redmond, WA \n \nPERSONAL SUMMARY \n AI/ML Engineer (MS, 4.0 GPA) focused on GenAI, NLP, and Computer Vision with a track record of productionizing models into enterprise workflows. \nBuilt a multi-agent LLM pipeline (Fal.ai STT + GPT-4.1 + n8n + Jira) that cuts BA ticket update time by 80-90%; delivered OCR automation with 96% \naccuracy / 93% field precision and 4× throughput vs. manual entry. Comfortable across PyTorch/TensorFlow/Hugging Face, MLflow/Docker/K8s, and \nAWS/GCP; strong at turning ambiguous requirements into measurable business impact.             \nEDUCATION \nUniversity of North Texas                                                                                                                                                                                    May 2025',
  'me

In [19]:
rag_retriver.retrieve("Who is piyush?", top_k=10)

Generating embedding for query: Who is piyush?
Top_k: 10, Score Threshold: 0.0
Generating embedding for 1 texts


Batches: 100%|██████████| 1/1 [00:00<00:00, 48.55it/s]

Generated embedding with shape: (1, 384)
The distances retrieved are: [0.7671207189559937, 0.8440251350402832, 0.8935506343841553, 0.9024507999420166, 0.9025073647499084, 0.9193494915962219, 0.9202035069465637, 0.9387513399124146, 0.9439467191696167, 0.9444816708564758]
Retrieved 10 documents after applying score threshold





[{'id': 'doc_db3818c1_90',
  'content': 'Piyush Hemnani | Artificial Intelligence Graduate \nPiyushdeepak97@gmail.com | https://www.linkedin.com/in/piyush-hemnani-05b328189/ | +1-940-843-8403 | Redmond, WA \n \nPERSONAL SUMMARY \n AI/ML Engineer (MS, 4.0 GPA) focused on GenAI, NLP, and Computer Vision with a track record of productionizing models into enterprise workflows. \nBuilt a multi-agent LLM pipeline (Fal.ai STT + GPT-4.1 + n8n + Jira) that cuts BA ticket update time by 80-90%; delivered OCR automation with 96% \naccuracy / 93% field precision and 4× throughput vs. manual entry. Comfortable across PyTorch/TensorFlow/Hugging Face, MLflow/Docker/K8s, and \nAWS/GCP; strong at turning ambiguous requirements into measurable business impact.             \nEDUCATION \nUniversity of North Texas                                                                                                                                                                                    May 2025',
  'me

### RAG Pipeline - Vector Store to LLM Output Generation

In [31]:
import os
from dotenv import load_dotenv
load_dotenv()

# Langchain imports
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate, ChatPromptTemplate
from langchain_core.messages import AIMessage
from langchain_core.runnables import RunnableSequence
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel

In [34]:
class OpenAI_LLM:
    ''' Handle Output generation using OpenAI LLMs '''

    def __init__(self, model_name: str = "gpt-4.1-mini", api_key: str = None):

        '''
        Initializing the OpenAI LLM
        
        Args:
            model_name: str - Name of the model to use
            api_key: str - OpenAPI Key
        '''
        
        self.model_name = model_name
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")

        if not self.api_key:
            raise ValueError("OpenAI API key not provided. Set it via argument or environment variable")
        
        self.llm = ChatOpenAI(
            model=self.model_name, 
            openai_api_key=self.api_key,
            temperature=0.1,
            max_tokens=1024
            )
        
        print (f"Initialized OpenAI LLM with model: {self.model_name}")

        self.system_prompt = SystemMessagePromptTemplate.from_template(
            '''You are Piyush Hemnani's Personal Portfolio Agent.
Your job is to respond using only the retrieved context from the vector store.

Rules:

Ground every answer ONLY in retrieved text. No guessing or hallucinating.

If the question asks about personal details not found in retrieved docs, reply:

“That information is not available in my current portfolio dataset.”

Be structured and professional when answering recruiter or job-related queries.

When the user asks about “projects,” “skills,” or “experience,” summarize the retrieved info clearly and concisely.

If multiple retrieved chunks overlap, merge them into a clean narrative.

NEVER add new facts not found in retrieval.

If asked non-portfolio questions (e.g., unrelated trivia), politely redirect:

“I can only answer questions about Piyush based on the portfolio data I have.”

Tone:

Professional

Helpful

Concise

Resume-aware (structured, keyword-rich)'''
        )

        self.human_prompt = HumanMessagePromptTemplate.from_template(
                        '''Use the following context to answer the question.
                        ------------------------------------
                        Question:
                        {query}
                        ------------------------------------
                        Retrived Context:
                        {context}
                        ------------------------------------
            '''
        )


        self.chat_prompt = ChatPromptTemplate.from_messages(
            [self.system_prompt, self.human_prompt]
        )

        self.chains: RunnableSequence = self.chat_prompt | self.llm 

    def generate_response(self, query: str, context: str, max_length: int = 500) -> str:
        ''' 
        Generate response from LLM with retrieved context
        
        Args:
            query (str): The user query
            context (str): The retrieved context to condition the response
            max_length (int): Maximum length of the response
            
        Returns:
            str: Generated response from the LLM
        '''

        try:
            response = self.chains.invoke({"query": query, "context": context})
            return response.content
    
        except Exception as e:
            return f"Error generating response: {e}"
        

        


In [35]:
openai_llm = OpenAI_LLM()
openai_llm

Initialized OpenAI LLM with model: gpt-4.1-mini


<__main__.OpenAI_LLM at 0x1c9fa2f9fd0>

In [None]:
ret_docs = rag_retriver.retrieve("Who is Piyush Hemnani?", top_k=5)
for i, doc in enumerate(ret_docs):
    print(i)
    print(doc['content'])

context = "\n\n".join([doc['content'] for doc in ret_docs])

In [36]:
def format_docs(docs):
    if not docs:
        return ""

    try:
        return "\n\n".join(doc["content"] for doc in docs)
    
    except Exception as e:
        raise RuntimeError(f"Failed to format docs: {e}")  # don't pollute LLM context

In [37]:
retriever_runnable = RunnableLambda(lambda q: rag_retriver.retrieve(q, top_k=5))

# RAG Chain

rag_chain = (
    RunnablePassthrough()
    |{
        "query":RunnablePassthrough(),
        "context": RunnablePassthrough()
        | retriever_runnable
        | RunnableLambda(format_docs),
    }
    | openai_llm.chains
)

rag_chain_text = rag_chain | RunnableLambda(lambda msg: msg.content)

while True:
    q = input("Insert query (or type 'exit'): ")

    if q.lower() in ["exit", "quit"]:
        break

    answer = rag_chain_text.invoke(q)
    print("\n--- ANSWER ---\n")
    print(answer)
    print("\n--------------\n")


Generating embedding for query: Who is piyush?
Top_k: 5, Score Threshold: 0.0
Generating embedding for 1 texts


Batches: 100%|██████████| 1/1 [00:00<00:00, 74.25it/s]

Generated embedding with shape: (1, 384)
The distances retrieved are: [0.7671207189559937, 0.8440251350402832, 0.8935506343841553, 0.9024507999420166, 0.9025073647499084]
Retrieved 5 documents after applying score threshold






--- ANSWER ---

Piyush Hemnani is an Artificial Intelligence graduate with a Master of Science in Artificial Intelligence (concentration in Machine Learning) from the University of North Texas, maintaining a 4.0 GPA. He is an AI/ML Engineer specializing in Generative AI, Natural Language Processing (NLP), and Computer Vision, with experience in productionizing models into enterprise workflows. Piyush has developed solutions such as a multi-agent large language model pipeline that significantly reduces business analyst ticket update time and delivered OCR automation with high accuracy and throughput. He is proficient in technologies including PyTorch, TensorFlow, Hugging Face, MLflow, Docker, Kubernetes, AWS, and GCP. His background also includes a Bachelor of Science in Mechanical Engineering with Honors from Birla Institute of Technology and Science.

--------------

