In [21]:
! pip install --upgrade langchain langchain-google-genai

Collecting langchain
  Downloading langchain-1.2.6-py3-none-any.whl.metadata (4.9 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-4.2.0-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-core<2.0.0,>=1.2.7 (from langchain)
  Downloading langchain_core-1.2.7-py3-none-any.whl.metadata (3.7 kB)
Collecting uuid-utils<1.0,>=0.12.0 (from langchain-core<2.0.0,>=1.2.7->langchain)
  Downloading uuid_utils-0.14.0-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl.metadata (4.9 kB)
Collecting google-genai<2.0.0,>=1.56.0 (from langchain-google-genai)
  Downloading google_genai-1.59.0-py3-none-any.whl.metadata (53 kB)
Collecting google-auth<3.0.0,>=2.47.0 (from google-auth[requests]<3.0.0,>=2.47.0->google-genai<2.0.0,>=1.56.0->langchain-google-genai)
  Downloading google_auth-2.47.0-py3-none-any.whl.metadata (6.4 kB)
Downloading langchain-1.2.6-py3-none-any.whl (108 kB)
Downloading langchain_core-1.2.7-py3-none-any.whl (490 kB)
Downloading uu

In [33]:
import os
from dotenv import load_dotenv

load_dotenv()  # Explicit path
from langchain.agents import create_agent
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS

from langchain.tools import tool
from langchain.messages import SystemMessage
from langchain_core.output_parsers import JsonOutputParser

gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.1,
                                    google_api_key=os.getenv('GOOGLE_API_KEY'))
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004",
                                          google_api_key=os.getenv('GOOGLE_API_KEY'))

base_directory = "base"
files = os.listdir(base_directory)
documents = []
for file in files:
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(base_directory, file))
        documents.extend(loader.load())
vectorstore = FAISS.from_documents(documents, embeddings)

@tool
def retriever(query: str) -> str:
    """Tool that retrieves relevant documents from the vector store
    about medical connection between non-invasive features such as ECG and glucose levels.
    """
    docs = vectorstore.similarity_search(query, k=3)
    return "\n\n".join([doc.page_content for doc in docs])



from pydantic import BaseModel, Field
from typing import List
from langchain_core.output_parsers import PydanticOutputParser

# Define your response model
class AgentResponse(BaseModel):
    recommend: List[str] = Field(description="List of 2 actionable recommendations (max 10 tokens each)")
    document: str = Field(description="Name of the most relevant document PDF file")

# Create parser and get format instructions
parser = PydanticOutputParser(pydantic_object=AgentResponse)
format_instructions = parser.get_format_instructions()

system_prompt = SystemMessage(
    content=f"""
    You are a Research Analyst AI assistant.
    Use the "retriever" tool to find relevant information from the provided documents
    to answer user queries accurately and comprehensively.

    Return a dictionary in the following format:
    'recommend': a list of 2 actionable recommendations based on the retrieved information (maximum 10 tokens),
    'document': name of the most relevant document pdf file from which the information was retrieved

    {format_instructions}

    IMPORTANT: Return ONLY the JSON object, with NO markdown code blocks, NO backticks, NO extra text.
    """
)

agent = create_agent(
    model=gemini_llm,
    tools=[retriever],
    system_prompt=system_prompt
    )


In [34]:
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"recommend": {"description": "List of 2 actionable recommendations (max 10 tokens each)", "items": {"type": "string"}, "title": "Recommend", "type": "array"}, "document": {"description": "Name of the most relevant document PDF file", "title": "Document", "type": "string"}}, "required": ["recommend", "document"]}\n```'

In [35]:
from langchain.messages import HumanMessage
def query_to_invokable(msg: str) -> HumanMessage:
    return {'messages': [HumanMessage(content=msg)]}
query = query_to_invokable(
    "My heart is beating very fast and I feel dizzy. Can my glucose levels be the reason?"
)
response = agent.invoke(query)

In [36]:
response['messages'][-1].content

[{'type': 'text',
  'text': '{"recommend": ["Check glucose levels", "Consult a doctor"], "document": "Health and Technology (2023) 13:145–154"}',
  'extras': {'signature': 'CskGAXLI2nzl+r9+TmEjunfawrc2eS0K8jK1lNrpaBffls4GcizATNK37vmaMwGHF4QkINwx4VXavElnvzqkyS6GtQHfvkyN8fX0mUDyG6H9z9XkT1rAc7ENotnGVcNA1btIEy7kCACSBL6MBZ9Szk2nepDGn717FnsCtfL5s/d9I2QpRRe5ed60MntoHAXbO8QEXl/BkdUgX32xBvVHc2WOZdCdTE9RsPjyCzyaK4rTaBRTu8HFazIFAoEiLY9aWnoZ38irUgpQl3WLW1DBapYx8h0iz3bfYATv2cIfn0d8red6/uaYw8CEf9m+dHDPoTIJlT5i1g12+9lDjWlY1wUoJfuF9nBzE4ubEQLSUcU8CmcnMdM08F5DrsrGZM9czsEpRDwo54KsD0sdiLXbOhcm/DesKPEKJBi/aksoyuPzBQx0mVV7epz8lTyc3+lMzb+3mQko9QNQI7ARR7FGlCwsmddCVHpGHLetinMUE35Wz4X0ulVSD+0U5ptO1rsD7D5CmCOnRjmQnoLwiF4gQegLaVZsikzrXeiKVQeUDBtidjDJYGFG306BgzYePggZG14dHyVFYbbHpJSluWXeZTrlqao1I5x5qRsod+jojbBNpfs8vyM72wioA8kAUagVvFIisA4EGe7/wdYw09WdFwQOUjarvsYrmg1ppFtFgfqVPfHQvTsM7gnGqgD5VwOBmRB2AfHF48m/oWhS0frZbK0ah9NY6V6ToHlIMXwreID90eQqEu3eLmPsuPU71mc8OP/rdSkNPK8y7n1xh4U5veIiI2x6ljQUK0Q4Dz43M2FL0JZOxIzKXEoWniX

In [38]:
import json

try:
    final_message = response['messages'][-1].content[0]['text']
    parsed_response = json.loads(final_message)
except json.JSONDecodeError as e:
    print(f"Failed to parse JSON: {e}")
    print(f"Raw content: {final_message}")

In [42]:
print("HumanMessage:")
print(response['messages'][0].content)
print("AI Recommendations:")
for r in parsed_response['recommend']:
    print(f"- {r}")

HumanMessage:
My heart is beating very fast and I feel dizzy. Can my glucose levels be the reason?
AI Recommendations:
- Check glucose levels
- Consult a doctor
