In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")

In [2]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [4]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [5]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(
    path=r"C:\Users\tejus\OneDrive\Desktop\Full_Stack\ML\RAG_Chatbot\rag_env\data",
    glob="*.pdf"
)

docs = loader.load()

print(f"No. of docs = {len(docs)}")
print(docs[0].page_content[100:500])



  from .autonotebook import tqdm as notebook_tqdm


No. of docs = 1
mprehensive, well-structured guide to the essential aspects of building generative AI systems. A must-read for any professional looking to scale AI across the enterprise.” Vittorio Cretella, former global CIO at P&G and Mars

“Chip Huyen gets generative AI. She is a remarkable teacher and writer whose work has been instrumental in helping teams bring AI into production. Drawing on her deep experti


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
  chunk_size = 1000,
  chunk_overlap = 200,
  add_start_index = True
)

splits = text_splitter.split_documents(docs)

print(f"Split data into {len(splits)} sub-documents.")


Split data into 1425 sub-documents.


In [7]:
document_ids = vector_store.add_documents(documents=splits)

print(document_ids[:3])

['d72ed5bc-cf87-4265-9a64-5fde351a56dd', '3a981f8c-0f72-4f30-9aca-2774d3d36755', '7e5d4628-bb21-48bb-9a6e-93caf7ac5100']


In [8]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "(context goes here)", "question": "(question goes here)"}
).to_messages()

assert len(example_messages) == 1
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: (question goes here) 
Context: (context goes here) 
Answer:


In [9]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict


class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [10]:
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [11]:
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [12]:
result = graph.invoke({"question":"Explain Finetuning?"})

print(f"Question: {result['question']}")
print(f"Context: {result['context']}")
print(f"Answer: {result['answer']}")



Question: Explain Finetuning?
Context: [Document(id='0bd14be2-5e2b-4255-b584-fdade8f27878', metadata={'start_index': 674694, 'source': 'C:\\Users\\tejus\\OneDrive\\Desktop\\Full_Stack\\ML\\RAG_Chatbot\\rag_env\\data\\AI Engineering Building Applications with Foundation Models (Chip Huyen) (Z-Library).pdf'}, page_content='There’s a lot to discuss. Let’s dive in!\n\nFinetuning Overview To finetune, you start with a base model that has some, but not all, of the capabilities you need. The goal of finetuning is to get this model to perform well enough for your specific task.\n\nFinetuning is one way to do transfer learning, a concept first introduced by Bozinov‐ ski and Fulgosi in 1976. Transfer learning focuses on how to transfer the knowledge gained from one task to accelerate learning for a new, related task. This is conceptu‐ ally similar to how humans transfer skills: for example, knowing how to play the piano can make it easier to learn another musical instrument.\n\nAn early large-sc