In [1]:
## Load environment variables
import os
from dotenv import load_dotenv

load_dotenv()
assert os.environ['LANGCHAIN_API_KEY'], "Please set the LANGCHAIN_API_KEY environment variable"
assert os.environ['GROQ_API_KEY'], "Please set the GROQ_API_KEY environment variable"
assert os.environ['OPENAI_API_KEY'], "Please set the OPENAI_API_KEY environment variable"

DATA_DIR = "../../data/raw"
DATA_CSV_PATH = DATA_DIR + "/Fixed_Synthetic_Data.csv"

In [2]:
## (No longer used) Read CSV file with LangChain
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path=DATA_CSV_PATH)

data = loader.load()

In [None]:
## Convert CSV file to SQL file
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, text

sqlite_db_path = DATA_DIR + "/orders.db"
engine = create_engine(f"sqlite:///{sqlite_db_path}")
df = pd.read_csv(DATA_CSV_PATH)

with engine.connect() as conn:
    rows = df.to_sql(name="orders", con=engine, if_exists="replace", index=False)
    print(f"Inserted {rows} rows into the orders table")

    ## Check database
    table = Table('orders', MetaData(), autoload_with=engine)
    print(f"Columns in table '{table.name}':")
    print(table.columns.values())

    rows = conn.execute(text("SELECT * FROM orders LIMIT 5")).fetchall()
    print(f"Sample rows in table '{table.name}':")
    for row in rows:
        print(row)

engine.dispose()

In [4]:
## Initialize LLM and SQL toolkit
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit

groq_llm = ChatGroq(model="llama3-8b-8192")
openai_llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.environ['OPENAI_API_KEY'])
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
print(db.get_usable_table_names())

toolkit = SQLDatabaseToolkit(db=db, llm=groq_llm)
tools = toolkit.get_tools()
print(tools)

['orders']
[QuerySQLDataBaseTool(description="Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x0000017C41CAFB10>), InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x0000017C41CAFB10>), ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x0000017C41CAFB10>), QuerySQLCheckerTool(description='Use this 

In [5]:
## Initialize agent
from langgraph.prebuilt import create_react_agent
from langchain import hub

## Prompt template for SQL agent, which contains rules for generating required SQL queries
prompt_template = hub.pull("langchain-ai/sql-agent-system-prompt")
system_message = prompt_template.format(dialect="SQLite", top_k=5)
print(prompt_template)
print(system_message)

agent_executor = create_react_agent(
    model=openai_llm, tools=tools, state_modifier=system_message
)

input_variables=['dialect', 'top_k'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'langchain-ai', 'lc_hub_repo': 'sql-agent-system-prompt', 'lc_hub_commit_hash': '31156d5fe3945188ee172151b086712d22b8c70f8f1c0505f5457594424ed352'} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['dialect', 'top_k'], input_types={}, partial_variables={}, template='You are an agent designed to interact with a SQL database.\nGiven an input question, create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.\nUnless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\

In [None]:
## Issue query
example_query = "You are working for a financial institution. You are tasked with compliance reporting for the orders of the day. Identify anomalies and provide reasons they are suspicious."

events = agent_executor.stream(
    {"messages": [("user", example_query)]},
    stream_mode="values",
)
for event in events:
    event["messages"][-1].pretty_print()

In [None]:
output = agent_executor.invoke({"messages": [("user", example_query)]})
output["messages"][-1]

In [8]:
## Parse output


Chain: SQL agent --> Report generator LLM

In [25]:
## Initialize SQL agent
## Imports
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda

## Prompt template for SQL agent, which contains rules for generating required SQL queries
sql_prompt_template = hub.pull("langchain-ai/sql-agent-system-prompt")
sql_system_message = prompt_template.format(dialect="SQLite", top_k=5)

## Configure agent
openai_llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.environ['OPENAI_API_KEY'])
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
toolkit = SQLDatabaseToolkit(db=db, llm=openai_llm)
tools = toolkit.get_tools()
agent_executor = create_react_agent(
    model=openai_llm, tools=tools, state_modifier=sql_system_message
)

## Helper functions to format agent input and output
##TODO: Replace with Langchain parses and ResponseSchemas
agent_input_helper = RunnableLambda(lambda x: {"messages": x.text})
agent_output_parser = RunnableLambda(lambda x: x["messages"][-1].content)

## Prompt template to specify required metrics
required_insights = """
    total orders placed, total volume, total value of order, breakdown of buy sell for each sector, most frequently ordered sectors, trades created over time, 
    ratio of order capacity, order lifetime breakdown, price instruction breakdown
"""
insight_prompt = PromptTemplate.from_template("From the order data, provide the following insights: {insights}")

## Construct SQL chain
sql_chain = insight_prompt | agent_input_helper | agent_executor | agent_output_parser

## Testing
result = sql_chain.invoke({"insights": required_insights})
print(type(result))
print(result)

<class 'str'>
Here are the insights derived from the order data:

1. **Total Orders Placed**: 5,000
2. **Total Volume**: 40,391,051
3. **Total Value of Orders**: Approximately 3,327,730,537.63 AUD

4. **Breakdown of Buy/Sell for Each Sector**:
   - CBA: Buy - 2,875,619 units, Value: 228,186,790.82 AUD
   - BHP: Buy - 2,623,657 units, Value: 224,850,195.62 AUD
   - JBH: Sell - 2,796,448 units, Value: 224,520,815.98 AUD
   - IRE: Buy - 2,500,637 units, Value: 217,443,058.81 AUD
   - ACDC: Sell - 2,596,188 units, Value: 215,362,106.44 AUD

5. **Most Frequently Ordered Sectors**:
   - CBA: 664 orders
   - ACDC: 644 orders
   - BHP: 635 orders
   - JBH: 623 orders
   - IRE: 620 orders

6. **Trades Created Over Time**:
   - February 2024: 5,000 trades

7. **Ratio of Order Capacity**:
   - Agency: 2,476 orders
   - Principal: 2,524 orders

8. **Order Lifetime Breakdown**:
   - End of Day: 1,026 orders
   - Fill And Kill: 977 orders
   - Fill Or Kill: 956 orders
   - Good Till Cancel: 1,035 or

In [28]:
## Initialize report generator LLM
## Imports
from langchain_core.output_parsers import StrOutputParser

## Configure model
openai_llm = ChatOpenAI(model="gpt-4o-mini", temperature=1, api_key=os.environ['OPENAI_API_KEY'])

## Prompt template with sample report
template = """
    You are working at a financial institution and are responsible for end of day reporting.
    Given the insights, generate a report. You may use the following example: 
    {sample_report}
    Write a report based on these insights:
    {insights}
"""
report_prompt = PromptTemplate(
    input_variables=["sample_report", "insights"],
    template=template
)

## Sample report
sample_report = """
    Today, the firm processed a total of 250 orders across multiple sectors, with a combined order volume of 1.5 million shares and an order value of $75 million. Of these, 60% were buy orders, with the remaining 40% being sell orders. The majority of buy orders were concentrated in the Technology and Healthcare sectors, representing approximately 55% of the total order volume, reflecting a continued focus on growth-oriented stocks. Sell orders were predominantly in the Energy and Consumer Goods sectors, reflecting a shift in market sentiment toward risk aversion in those industries.

    The firm handled orders in both agency and principal capacities, with 70% of the trades executed in an agency capacity, ensuring that clients’ interests were prioritized. The remaining 30% were principal trades, where the firm acted as the counterparty. A significant portion of the orders (around 65%) had a short lifetime, executed within the first two hours of placement, indicating high market activity and a swift decision-making process among clients.

    In terms of price instructions, the majority of orders (75%) were executed using limit price instructions, with clients specifying the maximum or minimum prices at which they were willing to trade. The remaining 25% followed market-on-close instructions, aiming to execute trades at the day’s closing price.

    Overall, today’s trading activity was characterized by increased demand in growth sectors, with careful execution through predominantly limit orders and short lifetimes, ensuring optimal timing and price conditions for clients.
"""

## Construct report chain
report_chain = report_prompt | openai_llm | StrOutputParser()

## Testing
##TODO: Find a way to pass the SQL insights to the report chain
report = report_chain.invoke({"sample_report": sample_report, "insights": result})
print(report)


**End of Day Reporting - February 2024**

**Date**: February 29, 2024  
**Prepared By**: [Your Name]  
**Institution Name**: [Financial Institution Name]  

---

### Executive Summary

Today, our firm managed a total of **5,000 orders**, encompassing a significant trading volume of **40,391,051** shares and a total order value of **approximately 3.33 billion AUD**. The trading activity reflected a dynamic market environment with varied demand across multiple sectors, demonstrating a balance between buy and sell orders driven by client sentiment and market conditions.

---

### Order Overview

- **Total Orders Placed**: 5,000
- **Total Volume**: 40,391,051 shares
- **Total Value**: 3,327,730,537.63 AUD

---

### Buy/Sell Breakdown by Sector

The following insights provide a detailed view of the orders placed by sector:

| **Sector** | **Order Type** | **Units** | **Order Value (AUD)** |
|------------|----------------|-----------|------------------------|
| CBA        | Buy            | 

In [None]:
## Construct chain