In [1]:
## Load environment variables
import os
from dotenv import load_dotenv

load_dotenv()
assert os.environ['LANGCHAIN_API_KEY'], "Please set the LANGCHAIN_API_KEY environment variable"
assert os.environ['GROQ_API_KEY'], "Please set the GROQ_API_KEY environment variable"
assert os.environ['OPENAI_API_KEY'], "Please set the OPENAI_API_KEY environment variable"

DATA_DIR = "../../data/raw"
DATA_CSV_PATH = DATA_DIR + "/Fixed_Synthetic_Data.csv"

In [2]:
## Read CSV file with LangChain
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path=DATA_CSV_PATH)

data = loader.load()

In [None]:
## Convert CSV file to SQL file
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, text

sqlite_db_path = DATA_DIR + "/orders.db"
engine = create_engine(f"sqlite:///{sqlite_db_path}")
df = pd.read_csv(DATA_CSV_PATH)

with engine.connect() as conn:
    rows = df.to_sql(name="orders", con=engine, if_exists="replace", index=False)
    print(f"Inserted {rows} rows into the orders table")

    ## Check database
    table = Table('orders', MetaData(), autoload_with=engine)
    print(f"Columns in table '{table.name}':")
    print(table.columns.values())

    rows = conn.execute(text("SELECT * FROM orders LIMIT 5")).fetchall()
    print(f"Sample rows in table '{table.name}':")
    for row in rows:
        print(row)

engine.dispose()

In [None]:
## Initialize LLM and SQL toolkit
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit

groq_llm = ChatGroq(model="llama3-8b-8192")
openai_llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.environ['OPENAI_API_KEY'])
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
print(db.get_usable_table_names())

toolkit = SQLDatabaseToolkit(db=db, llm=groq_llm)
tools = toolkit.get_tools()
print(tools)

In [5]:
## Initialize agent
from langgraph.prebuilt import create_react_agent
from langchain import hub

## Prompt template for SQL agent, which contains rules for generating required SQL queries
prompt_template = hub.pull("langchain-ai/sql-agent-system-prompt")
system_message = prompt_template.format(dialect="SQLite", top_k=5)

agent_executor = create_react_agent(
    model=openai_llm, tools=tools, state_modifier=system_message
)

In [None]:
print(prompt_template)

In [None]:
## Issue query
example_query = "You are working for a financial institution. You are tasked with compliance reporting for the orders of the day. Identify anomalies and provide reasons they are suspicious."

events = agent_executor.stream(
    {"messages": [("user", example_query)]},
    stream_mode="values",
)
for event in events:
    event["messages"][-1].pretty_print()

In [None]:
output = agent_executor.invoke({"messages": [("user", example_query)]})
output["messages"][-1]

In [8]:
## Parse output
