In [37]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(dotenv_path=".env")


PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
LANGSMITH_TRACING_V2 = os.getenv("LANGSMITH_TRACING_V2")

In [18]:
from langsmith import utils
utils.tracing_is_enabled()

True

In [19]:
import time
import logging
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableSequence
from pinecone import Pinecone, ServerlessSpec

# Configure logging
logging.basicConfig(level=logging.INFO)

# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Index Name
index_name = "testprojectv2"

In [20]:
from sentence_transformers import SentenceTransformer

EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
st_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
EMBEDDING_DIM = 384  # all-MiniLM-L6-v2 output size


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 0c7ac3fd-5029-4338-9f61-c027a611b302)')' thrown while requesting HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/./modules.json
Retrying in 1s [Retry 1/5].


In [21]:
def ensure_index():
    existing_indexes = [index["name"] for index in pc.list_indexes()]
    if index_name in existing_indexes:
        logging.info(f"Index '{index_name}' already exists. Skipping creation.")
    else:
        logging.info(f"Creating Pinecone index: {index_name}")
        pc.create_index(
            name=index_name,
            dimension=EMBEDDING_DIM,
            metric='cosine',
            spec=ServerlessSpec(cloud='aws', region='us-east-1')
        )
        time.sleep(5)  # Ensure index is ready

In [22]:
def load_and_split_documents(filepath):
    logging.info("Loading document...")
    # Use the filepath passed into the function so this works for any PDF path
    loader = PyPDFLoader(filepath)
    docs = loader.load()

    logging.info("Splitting documents into chunks...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        add_start_index=True
    )
    splits = text_splitter.split_documents(docs)
    return {"all_splits": splits, "total_Splits:": len(splits), "message": "Documents loaded and split successfully!"}

In [23]:
from sentence_transformers import SentenceTransformer

st_model = SentenceTransformer(EMBEDDING_MODEL_NAME)

def embed_documents(inputs):
    splits = inputs["all_splits"]
    logging.info("Generating embeddings (sentence-transformers)...")
    texts = [split.page_content for split in splits]
    embeddings = st_model.encode(texts, convert_to_numpy=True)  # shape: (N, EMBEDDING_DIM)
    norms = [float(e[:5].sum()) for e in embeddings[:5]]
    return {
        "all_embeddings": embeddings,
        "norms": norms,
        "message": "Embeddings generated successfully with sentence-transformers!"
    }


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [24]:
def upsert_embeddings(data):  # This function expects a dictionary
    splits = data["splits"]["all_splits"]
    embeddings = data["embeddings"]["all_embeddings"]
    logging.info(f"Upserting {len(embeddings)} documents into Pinecone...")
    index = pc.Index(index_name)

    vectors = [
        {
            # Ensure "id" is valid and unique(because vectors of same Id get overwritten by the latest vector)
            "id": f"doc_{split.metadata.get('source')}_{i}_{split.metadata.get('page_label', 'no_label')}",  
            "values": emb,
            "metadata": {"text": split.page_content}
        }
        for i, (split, emb) in enumerate(zip(splits, embeddings)) if len(emb) > 0
    ]

    BATCH_SIZE = 100  # Recommended batch size
    for i in range(0, len(vectors), BATCH_SIZE):
        batch = vectors[i:i + BATCH_SIZE]
        index.upsert(vectors=batch, namespace='ns1')
        logging.info(f"Upserted batch {i // BATCH_SIZE + 1} of {len(vectors) // BATCH_SIZE + 1}")
    
    logging.info(f"Upserted {len(vectors)} vectors into the vector store.")
    return f"Upserted {len(vectors)} vectors into the vector store."

In [25]:
from langchain_core.runnables import RunnableLambda, RunnableSequence

# Turn Functions into Runnables
load_split_runnable = RunnableLambda(load_and_split_documents)
embed_runnable = RunnableLambda(embed_documents)
upsert_runnable = RunnableLambda(upsert_embeddings)

# Using LangChain's | Operator for an Indexing Chain
indexing_chain = (
    load_split_runnable 
    | {
        "splits": RunnablePassthrough(),
        "embeddings": embed_runnable
    }
    | upsert_runnable
)

In [26]:
def run_indexing_pipeline(filepath):
    ensure_index()
    
    indexing_chain.invoke(filepath)
    
    logging.info("Indexing pipeline completed successfully!")

In [27]:
# Call the indexing pipeline with the PDF filename (ensure file exists in the workspace)
run_indexing_pipeline("Euthyphro.pdf")

INFO:root:Creating Pinecone index: testprojectv2
INFO:root:Loading document...
INFO:root:Splitting documents into chunks...
INFO:root:Generating embeddings (sentence-transformers)...
Batches: 100%|██████████| 2/2 [00:02<00:00,  1.33s/it]
INFO:root:Upserting 53 documents into Pinecone...
INFO:root:Upserted batch 1 of 1
INFO:root:Upserted 53 vectors into the vector store.
INFO:root:Indexing pipeline completed successfully!


In [28]:
index = pc.Index(index_name)
print(index.describe_index_stats())

{'_response_info': {'raw_headers': {'connection': 'keep-alive',
                                    'content-length': '175',
                                    'content-type': 'application/json',
                                    'date': 'Sat, 27 Dec 2025 10:33:01 GMT',
                                    'grpc-status': '0',
                                    'server': 'envoy',
                                    'x-envoy-upstream-service-time': '89',
                                    'x-pinecone-request-id': '6702328674259901553',
                                    'x-pinecone-request-latency-ms': '88',
                                    'x-pinecone-response-duration-ms': '90'}},
 'dimension': 384,
 'index_fullness': 0.0,
 'memoryFullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'ns1': {'vector_count': 53}},
 'storageFullness': 0.0,
 'total_vector_count': 53,
 'vector_type': 'dense'}


In [29]:
# reuse st_model and EMBEDDING_DIM from above

def retriever(question):
    # encode returns a 1D array, Pinecone expects a list
    embedded_question = st_model.encode([question], convert_to_numpy=True)[0].tolist()
    similar_docs = index.query(
        vector=embedded_question,
        top_k=3,
        namespace="ns1",
        include_metadata=True
    )
    return similar_docs

In [30]:
def formatContext(retrieved_docs):
    return "\n".join(doc["metadata"]["text"] for doc in retrieved_docs['matches'])

In [31]:
from langchain_core.runnables import RunnableLambda

# Wrap retriever and formatContext as runnables
retriever_runnable = RunnableLambda(retriever)
formatContext_runnable = RunnableLambda(formatContext)

In [33]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
    Answer the user question based on the following context.
    If you dont know the answer, just say you dont know.
                                          
    Context: {context} 
                                          
    Question: {question}""")

In [38]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=1.0,  # Gemini 3.0+ defaults to 1.0
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [39]:
def outputParser(response):
    return response.content

In [40]:
from langchain_core.runnables import RunnablePassthrough
# Chain
rag_chain = (
    {"context": retriever_runnable | formatContext_runnable, "question": RunnablePassthrough()}
    | prompt
    | model
    | outputParser
)

In [44]:
# Question
rag_chain.invoke("What is the book about?")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00, 30.63it/s]
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


"Based on the provided context, the book is a dialogue between Socrates and Euthyphro. It covers:\n\n*   A philosophical discussion about the relationship between fear and reverence.\n*   Socrates' accusation by Meletus for corrupting the youth.\n*   Euthyphro's personal case of prosecuting his own father for murder and the debate surrounding what is considered holy and unholy in religious matters."