In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
CHAT_MODEL="deepseek-v3"
os.environ["OPENAI_API_KEY"]=os.environ.get("INFINI_API_KEY")
os.environ["OPENAI_BASE_URL"]=os.environ.get("INFINI_BASE_URL")
OPENAI_BASE=os.environ.get("INFINI_BASE_URL") 

In [3]:
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI(
    temperature=0,
    model=CHAT_MODEL,
    base_url=OPENAI_BASE)

In [4]:
from langchain.agents import tool, initialize_agent, Tool
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import load_tools
from langchain_community.utilities import SerpAPIWrapper
import numpy as np
import pandas as pd

In [5]:
search = SerpAPIWrapper()

df = pd.read_csv("data.csv", header=0, index_col=0)

print(df)

       t   N    k
1  10.23   5  100
2  20.45  10  200
3  15.60   7  160
4  18.90   8  180


In [None]:
# tools for data processing
@tool
def data_accessor() -> str:
    """
    Use this tool to access the dataframe.
    """
    return df.to_string()

@tool
def data_processor(query: str) -> str:
    """
    Use this tool to process the dataframe.
    The input is a line of python code that will be executed on the dataframe.
    For example, you can use:
    df['column_name2'] = np.log(df['column_name1'])
    to create a new column in the dataframe.
    Do not type "" or '' around the code.
    The output will be the modified dataframe.
    If the input is not a valid python code, it will return an error message.
    """
    print(f"Executing query: {query}")
    try:
        exec(query)
        return df.to_string()
    except Exception as e:
        return str(e)
    
@tool
def saver(query: str) -> str:
    """
    Call this tool to save the dataframe to a csv file.
    The input is the name of the file.
    """
    try:
        df.to_csv(query)
        return f"Dataframe saved to {query}"
    except Exception as e:
        return str(e)

tools = [
    data_accessor,
    data_processor,
    saver
]

In [None]:
# tools for plotting

import matplotlib.pyplot as plt

@tool


In [7]:
from langchain.agents import create_react_agent, AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

react_prompt = """
You need to process some data inside the pandas dataframe called df. The data is from a physics experiment.
You have access to the following tools:

{tools}

You have to think step by step and use the tools when you need to.

Use the following format:

Question: what you need to do with the data
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I finish the task, output "Finish!"

Begin!

Task: {input}
Thought:{agent_scratchpad}
"""

prompt = PromptTemplate(
    input_variables=["input", "tools", "tool_names", "agent_scratchpad"],
    template=react_prompt,
)

llm = ChatOpenAI(
    model=CHAT_MODEL,
)

agent = create_react_agent(
    llm,
    tools,
    prompt=prompt,
)

agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    verbose=True,
    memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True)
)

  memory=ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [8]:
agent_executor.invoke({
    "input": "df中是我做弹簧振子的实验数据，t是总时间，N是周期数，k是弹簧筋度系数，请求出周期T，小球质量m，各单独一列，最后计算周期和小球质量的平均值和标准差，将全部数据保存到new_data.csv中"
})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To solve this task, I need to perform several steps:
1. Access the dataframe to understand its structure and columns.
2. Calculate the period T by dividing total time t by the number of cycles N.
3. Calculate the mass m using the formula for a spring-mass system: T = 2π√(m/k), solving for m.
4. Compute the mean and standard deviation for both T and m.
5. Save the processed dataframe to a CSV file.

Let's start by accessing the data to verify the columns.

Action: data_accessor
Action Input: 
[0m[36;1m[1;3m       t   N    k
1  10.23   5  100
2  20.45  10  200
3  15.60   7  160
4  18.90   8  180[0m[32;1m[1;3mThought: I have accessed the dataframe and confirmed it contains columns 't' (total time), 'N' (number of cycles), and 'k' (spring constant). Now I need to:
1. Calculate period T = t/N
2. Calculate mass m using the formula m = (T^2 * k)/(4 * π^2)
3. Compute mean and standard deviation for T and m
4. Save the 

ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: `Thought: I have successfully completed all the required steps:
1. Calculated the period T and mass m
2. Computed their mean and standard deviation
3. Saved the processed data to a CSV file

Finish!`
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [9]:
print(df)

          t     N      k         T          m
1     10.23   5.0  100.0  2.046000  10.603556
2     20.45  10.0  200.0  2.045000  21.186386
3     15.60   7.0  160.0  2.228571  20.128590
4     18.90   8.0  180.0  2.362500  25.448161
mean    NaN   NaN    NaN  2.170518  19.341673
std     NaN   NaN    NaN  0.154366   6.262750
