# Installing packages

In [1]:
# %pip install -qU langchain_community pypdf langchain-openai langchain-text-splitters langgraph langchain  faiss-cpu pinecone python-dotenv

# Setting Environment Variables

In [None]:
# from dotenv import dotenv_values

# env_vars = dotenv_values(".env")  # Loads all variables from .env
# print(env_vars)  # This should print a dictionary with your environment variables


In [None]:
# import os
# from dotenv import load_dotenv, find_dotenv

# # Load environment variables from .env file
# load_dotenv(find_dotenv(".env"))

# # Access the environment variables safely
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

# # Ensure API keys are loaded correctly
# if not OPENAI_API_KEY or not PINECONE_API_KEY:
#     raise ValueError("Missing API Key(s)! Check your .env file.")

# print("Environment variables loaded successfully!")


In [26]:
# import os

# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")
# os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
# os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
# os.environ["LANGSMITH_TRACING_V2"] = "true"
# os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

# print(os.environ["PINECONE_API_KEY"])


In [24]:

import os
from dotenv import load_dotenv

load_dotenv(dotenv_path=".env")

# Load API Keys Safely
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
LANGSMITH_TRACING_V2 = os.getenv("LANGSMITH_TRACING_V2")



# Langsmith Tracing enabling
- Why use Langmsith?
    - Because, We want to see the logs and workings of every step of our RAG.
    - It is used to implement observability and monitoring for our LLM Models, when they are invoked.
    - How does tracing_is_enabled work?Does it always set tracing to true?or is it linked to LANGSMITH_TRACING_V2?


In [2]:
from langsmith import utils
utils.tracing_is_enabled()

True

# **INDEXING THE DOCUMENTS(STATIC INDEXING):**

# **INITIALIZING PINECONE CLIENT & LOGGING**

In [25]:
import time
import logging
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableSequence
from pinecone import Pinecone, ServerlessSpec

# Configure logging
logging.basicConfig(level=logging.INFO)


# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Index Name
index_name = "testprojectv1"


INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Users/swethavoora/Desktop/AI Projects/RAG_Pipeline/myenv/lib/python3.13/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference
INFO:pinecone_plugin_interface.logging:Installing plugin inference into Pinecone


# **Function to check if index exists, if not, create it**

In [6]:

def ensure_index():
    existing_indexes = [index["name"] for index in pc.list_indexes()]
    if index_name in existing_indexes:
        logging.info(f"Index '{index_name}' already exists. Skipping creation.")
    else:
        logging.info(f"Creating Pinecone index: {index_name}")
        pc.create_index(
            name=index_name,
            dimension=1536,
            metric='cosine',
            spec=ServerlessSpec(cloud='aws', region='us-east-1')
        )
        time.sleep(5)  # Ensure index is ready


In [38]:

index = pc.Index(index_name)
print(index.describe_index_stats())

INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Users/swethavoora/Desktop/AI Projects/RAG_Pipeline/myenv/lib/python3.13/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 49}, 'ns2': {'vector_count': 49}},
 'total_vector_count': 98}


In [35]:
index.delete(namespace='conv_vector_cda9c6d0-19ab-4e01-af89-10e7e7ba297d', delete_all=True)

{}

# **Function to load and split documents**

In [30]:

def load_and_split_documents(filepath):
    logging.info("Loading document...")
    loader = PyPDFLoader(filepath)
    docs = loader.load()

    logging.info("Splitting documents into chunks...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        add_start_index=True
    )
    splits = text_splitter.split_documents(docs)
    return {"all_splits": splits, "total_Splits:": len(splits), "message": "Documents loaded and split successfully!"}

# **Function to embed documents**

In [37]:

def embed_documents(inputs):
    splits = inputs["all_splits"]
    logging.info("Generating embeddings...")
    embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY)
    embeddings = embeddings_model.embed_documents([split.page_content for split in splits])

    # Compute norms (to ensure embeddings aren't garbage)
    norms = [sum(e[:5]) for e in embeddings[:5]]  # Get first 5 embeddings' norm
    
    return {"all_embeddings": embeddings, "norms": norms, "message": "Embeddings generated successfully!"}


# **Function to upsert embeddings into Pinecone**

In [38]:

def upsert_embeddings(data):  # This function expects a dictionary
    splits = data["splits"]["all_splits"]
    embeddings = data["embeddings"]["all_embeddings"]
    logging.info(f"Upserting {len(embeddings)} documents into Pinecone...")
    index = pc.Index(index_name)

    vectors = [
        {
            "id": f"doc_{split.metadata.get('source')}_{i}_{split.metadata.get('page_label', 'no_label')}",  # Ensure ID is valid and unique(because vectors of same Id get overwritten by the latest vector)
            "values": emb,
            "metadata": {"text": split.page_content}
        }
        for i, (split, emb) in enumerate(zip(splits, embeddings)) if len(emb) > 0
    ]

    BATCH_SIZE = 100  # Recommended batch size
    for i in range(0, len(vectors), BATCH_SIZE):
        batch = vectors[i:i + BATCH_SIZE]
        index.upsert(vectors=batch, namespace='ns1')
        logging.info(f"Upserted batch {i // BATCH_SIZE + 1} of {len(vectors) // BATCH_SIZE + 1}")
    
    logging.info(f"Upserted {len(vectors)} vectors into the vector store.")
    return f"Upserted {len(vectors)} vectors into the vector store."



# **Using LangChain's RunnableSequence for an Indexing Chain**

In [39]:
from langchain_core.runnables import RunnableLambda, RunnableSequence

# Turn Functions into Runnables
load_split_runnable = RunnableLambda(load_and_split_documents)
embed_runnable = RunnableLambda(embed_documents)
upsert_runnable = RunnableLambda(upsert_embeddings)

# Using LangChain's RunnableSequence to create the Indexing chain
# indexing_chain = RunnableSequence(load_split_runnable, embed_runnable, upsert_runnable)


# Using LangChain's | Operator for an Indexing Chain
indexing_chain = (
    load_split_runnable 
    | {
        "splits": RunnablePassthrough(),
        "embeddings": embed_runnable
    }
    | upsert_runnable
)

# **Run the Indexing Pipeline**

In [40]:

def run_indexing_pipeline(filepath):
    ensure_index()
    
    indexing_chain.invoke(filepath)
    
    logging.info("Indexing pipeline completed successfully!")


In [41]:
run_indexing_pipeline("./data/BOFA_safedepositbox_disclosures.pdf")

INFO:root:Index 'testprojectv1' already exists. Skipping creation.
INFO:root:Loading document...
INFO:root:Splitting documents into chunks...
INFO:root:Generating embeddings...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:root:Upserting 49 documents into Pinecone...
INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Users/swethavoora/Desktop/RAGProject/myenv/lib/python3.13/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference
INFO:root:Upserted batch 1 of 1
INFO:root:Upserted 49 vectors into the vector store.
INFO:root:Indexing pipeline completed successfully!


In [17]:
pc.delete_index('testprojectv1')

# IGNORE THE BELOW CODE TILL RETRIEVAL & GENERATION

# Initializing a Pinecone Client
- We are going to use Pinecone database for Indexing our documents

In [10]:
# from pinecone import Pinecone

# pc = Pinecone(api_key=PINECONE_API_KEY)

# Creating an Index
- What is an Index?
  - An index defines the dimension of vectors to be stored and the similarity metric to be used when querying them.
- Is it different from Vector store?
- How does one know the dimension to be user?
  - This can be known from the embedding model that you are willing to choose. Each embedding model provides vectors of fixed length. Meaning, when they convert the text into numerical representation, the numerical representation will be a series of numbers of this fixed length. 
  - Eg : 1536 for OpenAI's text-embedding-3-small model
- We are going to use static Indexing, meaning, we are going to Index our curated set of documents only once(or periodically when we want to update our Index). All the end users will use the RAG, which will retrieve from the same Index.
- Advantage with Pinecone is that it is serverless. So, we dont have to worry about ...?WWWWHHHHHAAAAAATTTTT???!!!!


In [25]:
# from pinecone import ServerlessSpec
# index_name = "testprojectv1"

# pc.create_index(
#     name=index_name,
#     dimension=1536, # Replace with your model dimensions
#     metric="cosine", # Replace with your model metric
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-east-1"
#     ) 
# )

# Document Loading

- document loaders return a list of document objects.
- each page in the original document is instantiated as a document object with page content, metadata and other optional attributes

# Splitting the documents into chunks
- because, all the models have limit for the context window(len(input + len(output)))
- so, having these chunks will help us retrieve docs and stay within the approved context window length. instead of taking the entire document as the model's context

# Defining the Embeddings model, which can be used to embed chunks of text into numerical representation

- Each numerical representation store the semantic meaning of the chunk of information. which can later be used for proper retrieval of relevant information based on similarity search.

# Creating a function to embed splits

# Embedding the splits

# Storing the embeddings in Pinecone
- Upsert the six generated vector embeddings into a new ns1 namespace in your index:

In [20]:
# print(index.describe_index_stats())

# This marks the completion of Indexing part of the RAG pipeline
- Choosing a source of information
- Loading the source of information(the document)
- Once loaded, we split the document into chunks to suit the context length for most of the llm models
- Once split, we define our desired embedding model
- Using the embedding model, we create our vector store using FAISS

# RETRIEVAL AND GENERATION

### CREATING MODELS, PROMPTS AND CHAINS
- These will be used as part of our retrieval and generation stages of a RAG pipeline.

# CHAINING
- To implement chain of retriever -> prompt -> model invocation -> output parser, we might need runnables.
- What are runnables? These are the classes or functions which implement a invoke method to take an input, process it and then provide an output
- All these runnables can be chained using the '|' pipe operator
- This '|' operator will ensure that the output of one runnable becomes the input of th efollowing runnable in the chain.

# Defining the Embedding Model(same as the one we used for Indexing)

In [42]:
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=OPENAI_API_KEY)
index = pc.Index(index_name)

INFO:pinecone_plugin_interface.logging:Discovering subpackages in _NamespacePath(['/Users/swethavoora/Desktop/RAGProject/myenv/lib/python3.13/site-packages/pinecone_plugins'])
INFO:pinecone_plugin_interface.logging:Looking for plugins in pinecone_plugins.inference


# **Creating the retriever**
- This retriever will have the question parameter (It takes one input argument)
- This retriever will retrieve the relevant chunks or docs from the vector store index using 'similarity search'.
- We use include_metadata in the index.query, because our page_content(the text) is actually part of our metadata key.
- We can always modify the way we want our documents to be.
- Knowing the structure of the payload/documents helps us write correct code for the RAG chain
- always include the question in []. because embed_documents expects a list. 
- If its not a list, then each character will be treated as a separate document that needs to be embedded seperately.

In [43]:
def retriever(question):
    # always include the question in []. because embed_documents expects a list. 
    # If its not a list, then each character will be treated as a separate document that needs to be embedded seperately.
    embeddedQuestion = embeddings_model.embed_documents([question]) 
    similar_docs = index.query(vector=embeddedQuestion, top_k=3, namespace="ns1", include_metadata=True)
    return similar_docs

# **Modify the retrieved_docs to be a context of certain format**

In [44]:
def formatContext(retrieved_docs):
    return "\n".join(doc.metadata["text"] for doc in retrieved_docs['matches'])

# Converting the retriever and formatcontext into runnables as '|' can only be used to chain runnables
- Runnables are components or classes that implement the 'invoke' method inside them
- Or, the regular functions need to be wrapped by the runnablelambda wrapper to convert them into runnables
- OR WE COULD DIRECTLY USE THE FUNCTIONS ENCLOSED IN A LIST, AS AN ARGUMENT TO RUNNABLESEQUENCE.

In [48]:
from langchain_core.runnables import RunnableLambda

# Wrap retriever and formatContext as runnables
retriever_runnable = RunnableLambda(retriever)  # Wrap retriever
formatContext_runnable = RunnableLambda(formatContext)  # Wrap formatContext


# **Prompt Templating**

- prompt templating is something we use to provide the model with instructions on how it should behave, what will it receive as part of the prompt.
- in this case, the context is the formatted context created from the retrieved chunks from the vector store

In [45]:
from langchain.prompts import ChatPromptTemplate


prompt = ChatPromptTemplate.from_template("""
    Answer the user question based on the following context.
    If you dont know the answer, just say you dont know.
                                          
    Context: {context} 
                                          
    Question: {question}""")

# **Creating a gpt-3.5-turbo model**
- This is the LLM model that will be invoked by the prompt

In [46]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key = OPENAI_API_KEY, model="gpt-3.5-turbo")

# String Parser 
- To get only content as the output from the AIMessage object

In [47]:
def outputParser(response):
    return response.content

# USING RUNNABLE SEQUENCE TO CHAIN THE FUNCTIONS THEMSELVES(BELOW COMMENTS)

In [4]:
# from langchain_core.runnables import RunnableSequence

# indexing_chain = RunnableSequence([load_and_split_documents, embed_documents, upsert_embeddings])


# Defining the RAG-pipeline Chain
- Outer pipeline:
    - Inner pipeline (retriever | formatContext):
        - retriever will fetch relevant docs
        - formatContext will format the retrieved documents
    - Dictionary of {context, question} are passed to prompt
    - The output of prompmt is passed to the model
    - The output of the model is passed to the outputParser
    - The User receives the output of the entire chain (The content of the AIMessage object)

when we create a chain like below: 
- the input (your question string) is passed individually to each key in that dictionary. In other words:

-- “context” key:

The retriever receives the same input string (“What happens if ketu is in leo?”).
The retriever internally uses that string to embed and do a similarity search in FAISS.
Whatever the retriever returns is piped to formatContext.

-- “question” key:

RunnablePassthrough() also receives the same input string. It simply passes it along with no change.


In [49]:
from langchain_core.runnables import RunnablePassthrough
# Chain
rag_chain = (
    {"context": retriever_runnable | formatContext_runnable, "question": RunnablePassthrough()}
    | prompt
    | model
    | outputParser
)

# Invoking the Chain

In [51]:

# Question
rag_chain.invoke("What is the locking and unlocking system like at BOFA?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'At Bank of America (BOFA), renters are prohibited from placing additional locks on the box, including the sleeve inside the box. If unauthorized locks are discovered, the bank reserves the right to remove them without notice. The bank is not responsible for any damage to the lock or contents in this situation. The bank also mentions that they are not liable for any delays caused by the failure of the locks on the box to operate. Additionally, renters receive two keys and agree to return them upon surrendering the box or terminating the agreement. The bank may charge a key deposit and reserves the right to drill open the box if needed, with two bank employees present to remove, examine, and inventory the contents.'