In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import sqlite3
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser

from dotenv import load_dotenv
load_dotenv()

True

In [3]:
# Initialize the Gemini model via Vertex AI
vertex_llm = init_chat_model(
    "gemini-2.5-flash", 
    model_provider="google_vertexai",
    project='gen-lang-client-0311515393',
    location='us-central1',
)

type(vertex_llm)


langchain_google_vertexai.chat_models.ChatVertexAI

In [4]:

openai_llm = init_chat_model(
    "gpt-4o",  # or "gpt-3.5-turbo", "o1-preview", etc.
    model_provider="openai",
    api_key="your-api-key-here", # Optional: can also be set via OPENAI_API_KEY env var
)

type(openai_llm)

langchain_openai.chat_models.base.ChatOpenAI

In [5]:
from langchain.agents import create_agent

def get_weather(city: str) -> str:
    """Get weather for a given city."""
    return f"It's always sunny in {city}!"

agent = create_agent(
    # model="gpt-4o",
    model=vertex_llm,
    tools=[get_weather],
    system_prompt="You are a helpful assistant",
)

# Run the agent
agent.invoke(
    {"messages": [{"role": "user", "content": "what is the weather in sf"}]}
)

{'messages': [HumanMessage(content='what is the weather in sf', additional_kwargs={}, response_metadata={}, id='65607a88-c86c-4a9d-9926-6de5c478a753'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_weather', 'arguments': '{"city": "sf"}'}, '__gemini_function_call_thought_signatures__': {'11563d10-0bd8-46c2-8d51-a77da39ab16e': 'CvIBAY89a1/RBfoXyuYwayGBmm362sATdVoCCTG2Jv+GM04eqoavjKlVBd3VSIlPjrnrEgKiiI0uQ12z8bb4wXPaFyrye76Y9anlhHgv6foDG82HWQz0iUsGOJok+g633oRYiObx+Ra9pubnEOZw/oXzoBCxgnCLEmS5HLIzleggV2DpRbdqpOnrJiqxqo2RF/eB/4bzWIPFTg5XZjSTUbiBaJvS+cbpl8ME2ktbsYarIQi1tlJ+sDjEXKDcbm39EXvIWHojaofOP7aYO23hjYWKIIPTFY6phHpIEDEsyWbZsj4v0c4zEPZ/ShlJFLSuzwDuv3A='}}, response_metadata={'is_blocked': False, 'safety_ratings': [], 'usage_metadata': {'prompt_token_count': 25, 'candidates_token_count': 5, 'total_token_count': 86, 'prompt_tokens_details': [{'modality': 1, 'token_count': 25}], 'candidates_tokens_details': [{'modality': 1, 'token_count': 5}], 'thoughts_token_coun

In [10]:
from typing import TypedDict
from dataclasses import dataclass
from langgraph.runtime import get_runtime
from langgraph.checkpoint.memory import InMemorySaver
from langchain_community.utilities import SQLDatabase
from langchain_core.tools import tool
from langchain.agents import create_agent


db = SQLDatabase.from_uri("sqlite:///money_rag.db")

@dataclass
class RuntimeContext:
    db: SQLDatabase

@tool
def execute_sql(query: str) -> str:
    """Execute a SQLite command and return results.

    Available Database Schema:

    Table: transactions
    Columns:
        - transaction_date (TIMESTAMP): Date and time of the transaction
        - description (TEXT): Transaction description (contains vendor/merchant name)
        - amount (REAL): Transaction amount (positive = spending, negative = payments/refunds)
        - category (TEXT): Transaction category
        - source_file (TEXT): Source file name
    
    """
    
    runtime = get_runtime(RuntimeContext)
    db = runtime.context.db
    try:
        return db.run(query)
    except Exception as e:
        return f"Error: {e}"

schema_info = db.get_table_info()

SYSTEM = f"""You are a careful SQLite analyst.
Rules:
- Think step-by-step.
- When you need data, call the tool `execute_sql` with ONE SELECT query.
- Read-only; no INSERT/UPDATE/DELETE/ALTER/DROP/CREATE/REPLACE/TRUNCATE.
- Limit to 5 rows of output unless the user explicitly asks otherwise.
- If the tool returns 'Error:', revise the SQL and try again.
- Prefer explicit column lists; avoid SELECT *.
- **Spending Logic**:
    - Spending is represented by POSITIVE values (> 0) in the amount column.
    - NEGATIVE values (< 0) represent credit card payments or refunds and MUST NOT be included when calculating total spending or expenses.
"""
agent = create_agent(
 model=vertex_llm,
 tools=[execute_sql],
 system_prompt=SYSTEM,
 context_schema=RuntimeContext,
 checkpointer=InMemorySaver(),
)

In [11]:
question = "how much total amount i spent for walmart?"
steps = []

for step in agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    {"configurable": {"thread_id": "1"}},
    stream_mode="values",
    context=RuntimeContext(db=db)
):
    step["messages"][-1].pretty_print()
    steps.append(step)


how much total amount i spent for walmart?
Tool Calls:
  execute_sql (d438ac6f-4bb2-4680-9c7f-740395f91ccf)
 Call ID: d438ac6f-4bb2-4680-9c7f-740395f91ccf
  Args:
    query: SELECT SUM(amount) FROM transactions WHERE description LIKE '%Walmart%' AND amount > 0
Name: execute_sql

[(1625.1,)]

[{'type': 'text', 'text': 'You spent 1625.1 in total for Walmart.', 'thought_signature': 'CuoCAY89a1+Jcd/Y+Uba4DpkXkaE0/pNRTJg6X/NqezO8uX1VTz5ciMtUkgobo40Y+BvmSTSNdFslUGh6sfjDB97t//OsGJDSh9m6WJ6p1TXqcA3I+JFE5ojZhsht/v/HQeD66DfC6x7mZQU3nllvCLFOCojp63B6FP4W8uunUoyDI9pKelrG2mNOYXIDXRLPl0G/5oA5e6YThevoJBaXMgF/JDe6s2XunLVQ/rcomlLT2w4GVIv/GZs6zuxqSjzSbDtLSKQtdYS6bBejakI99oWTJIqsRgQN/Ge4zJPUQLBUSV0BHQJKT0sQOMjSEPM5MuIMJCpajcb1bLLdg51H+l9xxEE2AIZAv9xHdQo6eSlEzEGKtEtQ4CZ8VeapM8dnotYj18cRzAZUkUXAUvBQSw73IxV9JWHqZ4uujRetw2UEdhFry9hBw3D79sbqiDTYznDVigxp7B0N4GEUi0pTc1R5bjs5unMBHP+1ug='}]


In [29]:
question = "what transcation did you take into account for this calculation?"
steps = []

for step in agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    {"configurable": {"thread_id": "1"}},
    stream_mode="values",
    context=RuntimeContext(db=db)
):
    step["messages"][-1].pretty_print()
    steps.append(step)


what transcation did you take into account for this calculation?
Tool Calls:
  execute_sql (329f95cd-8c94-4322-9d85-16377d134fa5)
 Call ID: 329f95cd-8c94-4322-9d85-16377d134fa5
  Args:
    query: SELECT transaction_date, description, amount, category FROM transactions WHERE description LIKE '%Walmart%' AND amount > 0 LIMIT 5;
Name: execute_sql

[('2024-10-18 00:00:00', 'WALMART STORE 00332 HUNTSVILLE AL', 146.73, 'Merchandise'), ('2024-11-02 00:00:00', 'WALMART STORE 00332 HUNTSVILLE AL', 113.93, 'Merchandise'), ('2024-11-09 00:00:00', 'WALMART STORE 00332 HUNTSVILLE AL', 132.56, 'Merchandise'), ('2024-12-08 00:00:00', 'WALMART STORE 00332 HUNTSVILLE AL', 87.27, 'Merchandise'), ('2024-12-15 00:00:00', 'WALMART STORE 00332 HUNTSVILLE AL', 104.01, 'Merchandise')]

[{'type': 'text', 'text': 'The transactions taken into account for the calculation are:\n\n| transaction_date | description | amount | category |\n|---|---|---|---|\n| 2024-10-18 00:00:00 | WALMART STORE 00332 HUNTSVILLE AL | 

In [None]:
state = agent.get_state(
    {"configurable": {"thread_id": "1"}}
)

TypeError: tuple indices must be integers or slices, not str

In [None]:
state[0]

{'messages': [HumanMessage(content='how much total amount i spent for movie ticket in last 6 months?', additional_kwargs={}, response_metadata={}, id='9afa14d9-a99f-42eb-aa14-4d23b52b8582'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'execute_sql', 'arguments': '{"query": "SELECT name FROM sqlite_master WHERE type=\'table\';"}'}, '__gemini_function_call_thought_signatures__': {'50c7a4f0-ab5d-4f57-a101-014774fe942c': 'CpEHAY89a1/KQi8HcFJ1I6MlrLMAWcrchfnwGFqGF2dNWNKKk23kMwModhY8HA9IaiF6a09aawThjAcHpZG2sVfBKAUpn1/AeFrYyOKjRgG2KYrHRJZO20h2et623xNT9GKauKHLJ/nhnM2JQKQ3iu1nYDk2TOPahY+dZxGfMBbAropiVA4v3INxa3pWDcb8YR1RonHb90HvEtPp7g+sY+2qZnJGfC+NMWpW05cSQ1MfhvsfW6LjZft5TbfVs5RlCGD6taJIJklKku47mbihxV39pfIbU2gGAX4QFhDO5+f7mOK+dIByDMlPZE8mJH2nnLThy25b5bDUFlIJ60ZB6pe4lJ92qrsko/wv35kx1gvjL7E5s3S1uOCfl6xk+prs8xhsfjkMdWkawCPZezzQ2nv0SZHTTVXfWJuEZLtC2h5KOZAug/LmfPaJoKn3XdYNajjPn5T2dwbjI/oc+fAhaaBJWtZgUPm6D/AtmetuL4YXW5o0OXZEdUBfTzgEl6P6DWk/OUXJGdolM4YfbnJ4MiaPdxeC5vpF0fd2vBDjH