In [29]:
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langgraph.graph import StateGraph, MessagesState,START,END
from langgraph.graph.message import add_messages
from langgraph.prebuilt import create_react_agent
from pydantic import BaseModel, Field
from typing_extensions import Annotated, Literal,TypedDict, List
from langchain_core.tools import tool

from dotenv import load_dotenv
import os
load_dotenv()

True

In [30]:
try:
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-flash",
        temperature=0)
    response = llm.invoke("Are you ready to help me")
    print(response.content)
except Exception as e:
    print(f"Error initializing LLM: {e}")
    

Yes, I am! I'm ready to help you with whatever you need.

Please tell me how I can assist you.


## Document Load

In [31]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter

In [35]:
file_path = './data/cover_letter.pdf'
loader = PyPDFLoader(file_path,mode='single')
docs=loader.load()
spliter = CharacterTextSplitter(chunk_size=1000 ,chunk_overlap=100,separator="\n")
texts = spliter.split_documents(docs)

In [36]:
# texts
help(CharacterTextSplitter)

Help on class CharacterTextSplitter in module langchain_text_splitters.character:

class CharacterTextSplitter(langchain_text_splitters.base.TextSplitter)
 |  CharacterTextSplitter(
 |      separator: 'str' = '\n\n',
 |      is_separator_regex: 'bool' = False,
 |      **kwargs: 'Any'
 |  ) -> 'None'
 |
 |  Splitting text that looks at characters.
 |
 |  Method resolution order:
 |      CharacterTextSplitter
 |      langchain_text_splitters.base.TextSplitter
 |      langchain_core.documents.transformers.BaseDocumentTransformer
 |      abc.ABC
 |      builtins.object
 |
 |  Methods defined here:
 |
 |  __init__(
 |      self,
 |      separator: 'str' = '\n\n',
 |      is_separator_regex: 'bool' = False,
 |      **kwargs: 'Any'
 |  ) -> 'None'
 |      Create a new TextSplitter.
 |
 |  split_text(self, text: 'str') -> 'list[str]'
 |      Split into chunks without re-inserting lookaround separators.
 |
 |  ----------------------------------------------------------------------
 |  Data and o

In [37]:
# docs=loader.load()

In [38]:
# docs

In [39]:
# spliter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0,separator="\n\n")
# texts = spliter.split_documents(docs)

In [40]:
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0,is_separator_regex=True, separators=["\n\n", "\n", " ", ""])

In [41]:
# text_splitter.split_documents(docs)

## create vectorstore

In [43]:
# embedding 
embeddings = GoogleGenerativeAIEmbeddings(model='models/gemini-embedding-001')
vectorstore = Chroma.from_documents(texts,embedding=embeddings,collection_name="sample_collection",persist_directory="./data/chroma_db2")

## Create retrival tool

In [44]:
from langchain.tools.retriever import create_retriever_tool
retriver_tool = create_retriever_tool(
    retriever=vectorstore.as_retriever(),
    name="retriever_tool",
    description="""This tool is used to retrieve relevant documents from the vector store based on the user's query.
                   the informmation is about a person named 'Natdanai intraraksa' who is a software engineer and has worked at various companies including Invitrace and others.
    """)

In [45]:
a = vectorstore.similarity_search("Which framework that I use to build the agent?")

In [46]:
a

[Document(id='e46a77e1-0ddb-4169-92c8-0facb5e9d480', metadata={'total_pages': 2, 'producer': 'Skia/PDF m140 Google Docs Renderer', 'creationdate': '', 'source': './data/cover_letter.pdf', 'creator': 'PyPDF', 'title': 'Cover letter for apexanalytix'}, page_content='networks.\n \nWe\n \nfine-tuned\n \nlarge\n \nlanguage\n \nmodels\n \nusing\n \nthe\n \nUnsloth\n \nframework\n \nto\n \ngenerate\n \nstructured\n \nmedical\n \ndocumentation\n \nfrom\n \nunstructured\n \nnotes,\n \nsignificantly\n \nreducing\n \nthe\n \nworkload\n \nfor\n \nhealthcare\n \nprofessionals.\n \nThis\n \nsolution\n \nintegrated\n \nretrieval-augmented\n \ngeneration\n \n(RAG)\n \nto\n \nprovide\n \nreal-time,\n \ncontextually\n \nrelevant\n \ncontent\n \nand\n \nimprove\n \naccuracy.\n \n Previously  at  iBotnoi,  I  collaborated  with  Phramongkutklao  and  \nSamutprakarn\n \nhospitals\n \nto\n \ndevelop\n \nAI-powered\n \nvirtual\n \nassistants\n \n(Nurse\n \nAI).\n \nThese\n \nassistants\n \nused\n \nfunction-

In [47]:
@tool
def retrieve_documents(query: str):
    """Tools for retrive information that relevant to my information of the users"""
    results = vectorstore.similarity_search(query)
    return results

@tool
def multiply_numbers(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b + 9

tools = [retrieve_documents, multiply_numbers]

def query_vectorstore(state:MessagesState):
    prompt = ''' you are AI agent that answer the question by using tools'''
    agent = create_react_agent(llm, tools=[retriver_tool], prompt=prompt)
    result = agent.invoke({'messages':state['messages']})
    return {'messages': result['messages']}

def Agent(state:MessagesState):
    llm_with_tools = llm.bind_tools([retriver_tool])
    response = llm_with_tools.invoke(state['messages'])
    return response

    

In [48]:
state = MessagesState()
state['messages'] = "What does Natdanai do at Samutprakarn hospitals?"

r = Agent(state)
r2 = query_vectorstore(state)

In [49]:
r

AIMessage(content='', additional_kwargs={'function_call': {'name': 'retriever_tool', 'arguments': '{"query": "What does Natdanai do at Samutprakarn hospitals?"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--c7c51ca5-473b-4f18-9807-34dd6871c67c-0', tool_calls=[{'name': 'retriever_tool', 'args': {'query': 'What does Natdanai do at Samutprakarn hospitals?'}, 'id': 'db8b8d01-da13-4537-96c0-c77e8a396e9e', 'type': 'tool_call'}], usage_metadata={'input_tokens': 110, 'output_tokens': 95, 'total_tokens': 205, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 66}})

In [50]:
r2['messages']

[HumanMessage(content='What does Natdanai do at Samutprakarn hospitals?', additional_kwargs={}, response_metadata={}, id='5d40f843-e043-403e-a991-d4cb59267349'),
 AIMessage(content='', additional_kwargs={'function_call': {'name': 'retriever_tool', 'arguments': '{"query": "What does Natdanai do at Samutprakarn hospitals?"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--7f708c71-b12d-4952-a815-bc79707fae5f-0', tool_calls=[{'name': 'retriever_tool', 'args': {'query': 'What does Natdanai do at Samutprakarn hospitals?'}, 'id': '5ec96b22-7d58-44c6-a89d-5ce2d5d67468', 'type': 'tool_call'}], usage_metadata={'input_tokens': 121, 'output_tokens': 186, 'total_tokens': 307, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 157}}),
 ToolMessage(content="networks.\n \nWe\n \nfine-tuned\n \nlarge\n \nlanguage\n \nmodels\n \nusing\n \nthe\n \nUn

## Grade Document


In [51]:
class GradeDocuments(BaseModel):
    """Grade documents using a binary score for relevance check."""

    binary_score: str = Field(
        description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
    )

In [52]:
def grade_documents(state:MessagesState) -> Literal["generate_answer", "rewrite_question"]:
    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are an expert at determining the relevance of documents to a given question."),
        ("user", """Given the question and the retrieved documents, determine if the documents are relevant to answer the question.
                    Respond with 'yes' if relevant, or 'no' if not relevant.
                    Question: {question}
                    Documents: {documents}""")
    ])
    # Extract the question and documents from the state
    question = state['messages'] if isinstance(state['messages'], str) else state['messages'][0].content
    documents = state['messages'][-1].content if hasattr(state['messages'][-1], 'content') else str(state['messages'])

    # Format the prompt first
    formatted_prompt = prompt.format_messages(
        question=question,
        documents=documents
    )
    
    llm_with_structured_output = llm.with_structured_output(GradeDocuments)
    response = llm_with_structured_output.invoke(formatted_prompt)
    if response.binary_score == "yes":
        return "generate_answer"
    else:
        return "rewrite_question"
    
   

grade_documents(r2)

'generate_answer'

## Rewrite node


In [53]:

def rewrite_question(state: MessagesState):
    """Rewrite the original user question."""


    REWRITE_PROMPT = (
    "Look at the input and try to reason about the underlying semantic intent / meaning.\n"
    "Here is the initial question:"
    "\n ------- \n"
    "{question}"
    "\n ------- \n"
    "Formulate an improved question:"
    )
    messages = state["messages"]
    question = messages[0].content
    prompt = REWRITE_PROMPT.format(question=question)
    response = llm.invoke([{"role": "user", "content": prompt}])
    return {"messages": [{"role": "user", "content": response.content}]}

In [54]:
def generate_answer(state: MessagesState):
    """Generate an answer."""

    GENERATE_PROMPT = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n"
    "Question: {question} \n"
    "Context: {context}"
    )

    question = state["messages"][0].content
    context = state["messages"][-1].content
    prompt = GENERATE_PROMPT.format(question=question, context=context)
    response = llm.invoke([{"role": "user", "content": prompt}])
    return {"messages": [response]}

## Construct Graph

In [55]:
# Create the workflow

workflow = StateGraph(MessagesState)
workflow.add_node("agent",query_vectorstore)
# workflow.add_node("grade_documents", grade_documents)
workflow.add_node("rewrite_question", rewrite_question)
workflow.add_node("generate_answer", generate_answer)
workflow.add_edge(START,"agent")
# workflow.add_edge("agent", "grade_documents")
workflow.add_conditional_edges('agent',
        grade_documents, {
        "generate_answer": "generate_answer",
        "rewrite_question": "rewrite_question",})
workflow.add_edge("rewrite_question", "agent")
workflow.add_edge("generate_answer", END)
graph = workflow.compile()

In [60]:
r = graph.invoke({'messages': "What does he do at previous company?"})

In [61]:
r

{'messages': [HumanMessage(content='What does he do at previous company?', additional_kwargs={}, response_metadata={}, id='13b2bfbe-91f2-498c-b987-c8b7b92eebd8'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'retriever_tool', 'arguments': '{"query": "Natdanai Intraraksa\'s previous company roles and responsibilities"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--2778207a-ecfe-4efe-b8f4-1181fbd67e47-0', tool_calls=[{'name': 'retriever_tool', 'args': {'query': "Natdanai Intraraksa's previous company roles and responsibilities"}, 'id': '02a8e811-8f65-4c5b-9731-22305697fb96', 'type': 'tool_call'}], usage_metadata={'input_tokens': 115, 'output_tokens': 124, 'total_tokens': 239, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 94}}),
  ToolMessage(content="like\n \nLangGraph\n \nand\n \nCrewAI\n \nto\n \nmana