In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import os
import json
import dotenv

In [3]:
dotenv.load_dotenv()

True

In [4]:
from langchain_core.tools import tool
from langchain_deepseek import ChatDeepSeek
from typing_extensions import Annotated, TypedDict, Literal
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, InjectedState
from langgraph.graph import StateGraph
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_experimental.utilities import PythonREPL

In [5]:
from src.utils_stock import get_top_nasdaq_stock_data

In [6]:
llm = ChatDeepSeek(model="deepseek-chat",
                   api_key=os.getenv("DEEPSEEK_API_KEY"))

In [7]:
class State(TypedDict):
    messages: Annotated[list, add_messages]
    number_of_days_analysis: int
    email_address: str

### Data Acquisition Agent

In [8]:
@tool
def get_top_nasdaq_performance_stock_data(
    state: Annotated[State, InjectedState]
) -> str:
    """

    Get the day's top NASDAQ-100 gainer with symbol, percentage increase, sample data in markdown format, pandas dataframe info and csv data path for further python code analysis.

    Returns:
        str: Top nasdaq performance stock data information
    """

    result = get_top_nasdaq_stock_data(state['number_of_days_analysis'])
    return json.dumps(result, indent=4)

In [9]:
data_acquisition_tools = [get_top_nasdaq_performance_stock_data]

In [10]:
data_acquisition_tools_node = ToolNode(data_acquisition_tools)

In [11]:
data_acquisition_agent = llm.bind_tools(data_acquisition_tools)

In [12]:
def data_acquisition_agent_node(state: State):
    messages = state["messages"]
    response = data_acquisition_agent.invoke(messages)
    return {**state, "messages": [response]}

In [13]:
# Define the conditional edge between the nodes
def data_acquisition_reflection(state: State) -> Literal["data_acquisition_tool", "python_analysis_agent"]:
    messages = state["messages"]
    last_message = messages[-1]
    if last_message.tool_calls:
        return "data_acquisition_tool"
    return "python_analysis_agent"

In [14]:
# workflow = StateGraph(State)
# workflow.add_node("data_acquisition_agent", data_acquisition_agent_node)
# workflow.add_node("data_acquisition_tool", data_acquisition_tools_node)

# workflow.add_edge("__start__", "data_acquisition_agent")
# workflow.add_conditional_edges(
#     "data_acquisition_agent",
#     data_acquisition_reflection,
# )
# workflow.add_edge("data_acquisition_tool", "data_acquisition_agent")

# graph = workflow.compile()

# graph.get_graph().print_ascii()

In [15]:
# print(graph.get_graph().draw_mermaid())

In [16]:
# input_message = HumanMessage(
#     content="First, Let get the top performance stock data")

In [17]:
# events = graph.stream(
#     input={
#         "messages": [input_message],
#         "number_of_days_analysis": 10,
#         "email_address": "nelsonlin0321@gmail.com"
#     },
#     # Maximum number of steps to take in the graph
#     config={"recursion_limit": 15},
#     stream_mode="values",
# )
# for event in events:
#     if "messages" in event:
#         event["messages"][-1].pretty_print()

### Python Analysis Agent

In [18]:
repl = PythonREPL()


@tool
def python_repl_tool(
    code: Annotated[str, "The python code to execute to do comprehensive stock performance analysis"],
):
    """Use this to execute python code. If you want to see the output of a value,
    you should print it out with `print(...)`. This is visible to the user."""
    try:
        print(f"Executing codes:{code}")
        result = repl.run(code)
    except BaseException as e:
        return f"Failed to execute. Error: {repr(e)}"
    result_str = f"Successfully executed:\n```python\n{code}\n```\nStdout: {result}"
    return result_str

In [19]:
python_analysis_tools = [python_repl_tool]
python_analysis_tools_node = ToolNode(python_analysis_tools)

In [20]:
python_analysis_agent = llm.bind_tools(python_analysis_tools)

In [21]:
def python_analysis_agent_node(state: State):
    messages = state["messages"]
    print("messages", messages)
    system_message = SystemMessage(content="""
        You are an agent designed to write and execute python code for comprehensive stock analysis given data. 
        The data include the stock symbol, percentage increase, sample data in markdown format, pandas dataframe info, and csv data path for python code analysis. 
        You have access to a python REPL, which you can use to execute python code.
        If you get an error, debug your code and try again.

        Your tasks are to generate codes and execute codes with python REPL to fullfil below all requirements in a single python script. 
        Please call python REPL tool multiple times when necessarily to complete all below tasks.
                                 
        1. Data Loading and Preparation:
           - Read and clean the provided CSV data using pandas
           - Handle any missing values appropriately
           - Ensure proper data types for calculations

        2. Technical Analysis:
           - Calculate and analyze moving averages (5-day, 10-day)
           - Compute daily returns and cumulative returns
           - Calculate trading volume trends
           - Determine price momentum indicators
           - Analyze last n trading days performance (average change, trend direction)
           - Calculate volatility metrics (standard deviation, beta)

        3. Statistical Insights:
           - Perform descriptive statistics on price and volume data
           - Calculate risk metrics (Sharpe ratio if applicable)
           - Identify any statistically significant patterns
           - Analyze distribution of returns

        All the result should be using python print() to present in a clear, structured format using print statements so that is visible to the user in the console.
""")
    response = python_analysis_agent.invoke([system_message, *messages])
    return {**state, "messages": [response]}

In [22]:
# Define the conditional edge between the nodes
def python_analysis_reflection(state: State) -> Literal["python_analysis_tool", "__end__"]:
    messages = state["messages"]
    last_message = messages[-1]
    if last_message.tool_calls:
        return "python_analysis_tool"
    return "__end__"

In [23]:
# data_acquisition_agent
workflow = StateGraph(State)
workflow.add_node("data_acquisition_agent", data_acquisition_agent_node)
workflow.add_node("data_acquisition_tool", data_acquisition_tools_node)

workflow.add_node("python_analysis_agent", python_analysis_agent_node)
workflow.add_node("python_analysis_tool", python_analysis_tools_node)


workflow.add_edge("__start__", "data_acquisition_agent")
workflow.add_conditional_edges(
    "data_acquisition_agent",
    data_acquisition_reflection,
)

workflow.add_conditional_edges(
    "python_analysis_agent",
    python_analysis_reflection,
)


workflow.add_edge("data_acquisition_tool", "data_acquisition_agent")
workflow.add_edge("python_analysis_tool", "python_analysis_agent")

# workflow.add_edge("data_acquisition_agent", "python_analysis_agent")

graph = workflow.compile()

In [24]:
graph.get_graph().print_ascii()

                        +-----------+                          
                        | __start__ |                          
                        +-----------+                          
                              *                                
                              *                                
                              *                                
                  +------------------------+                   
                  | data_acquisition_agent |                   
                  +------------------------+                   
                     ***               ...                     
                  ***                     ...                  
                **                           ..                
+-----------------------+           +-----------------------+  
| data_acquisition_tool |           | python_analysis_agent |  
+-----------------------+           +-----------------------+  
                                 ....   

In [25]:
print(graph.get_graph().draw_mermaid())

---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	data_acquisition_agent(data_acquisition_agent)
	data_acquisition_tool(data_acquisition_tool)
	python_analysis_agent(python_analysis_agent)
	python_analysis_tool(python_analysis_tool)
	__end__([<p>__end__</p>]):::last
	__start__ --> data_acquisition_agent;
	data_acquisition_agent -.-> data_acquisition_tool;
	data_acquisition_agent -.-> python_analysis_agent;
	data_acquisition_tool --> data_acquisition_agent;
	python_analysis_agent -.-> __end__;
	python_analysis_agent -.-> python_analysis_tool;
	python_analysis_tool --> python_analysis_agent;
	classDef default fill:#f2f0ff,line-height:1.2
	classDef first fill-opacity:0
	classDef last fill:#bfb6fc



In [26]:
input_message = HumanMessage(
    content="First, Let get the top performance stock data")

events = graph.stream(
    input={
        "messages": [input_message],
        "number_of_days_analysis": 10,
        "email_address": "nelsonlin0321@gmail.com"
    },
    # Maximum number of steps to take in the graphd
    config={"recursion_limit": 15},
    stream_mode="values",
)
for event in events:
    if "messages" in event:
        event["messages"][-1].pretty_print()


First, Let get the top performance stock data
Tool Calls:
  get_top_nasdaq_performance_stock_data (call_0_3b750c42-3952-4dae-9ffc-2fd91b3705f4)
 Call ID: call_0_3b750c42-3952-4dae-9ffc-2fd91b3705f4
  Args:


100%|██████████| 101/101 [00:02<00:00, 38.52it/s]


Name: get_top_nasdaq_performance_stock_data

{
    "symbol": "MNST",
    "percentage_increased": 3.7238540649033007,
    "sample_data_in_markdown": "| Date                      |   Open |   High |   Low |   Close |   Volume |   Dividends |   Stock Splits |\n|:--------------------------|-------:|-------:|------:|--------:|---------:|------------:|---------------:|\n| 2025-04-28 00:00:00-04:00 |  58.74 |  58.98 | 58.03 |   58.49 |  3057100 |           0 |              0 |\n| 2025-04-29 00:00:00-04:00 |  58.59 |  59.33 | 58.01 |   59.25 |  3448100 |           0 |              0 |\n| 2025-04-30 00:00:00-04:00 |  59.59 |  60.29 | 58.8  |   60.12 |  5228700 |           0 |              0 |\n| 2025-05-01 00:00:00-04:00 |  59.86 |  60.03 | 59.3  |   59.52 |  5164900 |           0 |              0 |\n| 2025-05-02 00:00:00-04:00 |  59.87 |  60.26 | 59.43 |   60.05 |  4161600 |           0 |              0 |",
    "data_csv_path": "/Volumes/mnt/Workspace/stock-analysis-agent-langgraph/data/mnst_p

Python REPL can execute arbitrary code. Use with caution.


Tool Calls:
  python_repl_tool (call_0_e919c807-b3ed-4c2e-81d8-65e30ad55813)
 Call ID: call_0_e919c807-b3ed-4c2e-81d8-65e30ad55813
  Args:
    code: import pandas as pd

# Load the data from the CSV file
data_path = "/Volumes/mnt/Workspace/stock-analysis-agent-langgraph/data/mnst_performance_in_the_past_10_days.csv"
df = pd.read_csv(data_path)

# Display the first few rows of the dataframe
print("First 5 rows of the data:")
print(df.head())

# Check for missing values
print("\nMissing values in the data:")
print(df.isnull().sum())

# Ensure proper data types
print("\nData types:")
print(df.dtypes)
Executing codes:import pandas as pd

# Load the data from the CSV file
data_path = "/Volumes/mnt/Workspace/stock-analysis-agent-langgraph/data/mnst_performance_in_the_past_10_days.csv"
df = pd.read_csv(data_path)

# Display the first few rows of the dataframe
print("First 5 rows of the data:")
print(df.head())

# Check for missing values
print("\nMissing values in the data:")
print(df.isnull(