In [10]:
import os
import pandas as pd 
from dotenv import load_dotenv
from langchain.agents import Tool, AgentExecutor
from langchain_experimental.tools.python.tool import PythonAstREPLTool
from langchain_community.chat_models.openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain.agents import create_react_agent
from langchain_anthropic import ChatAnthropic


load_dotenv()

df = pd.read_csv("titanic.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [11]:
def dataframe_agent(df):
    # llm = ChatOpenAI(model="gpt-4o", temperature=0)
    llm = ChatAnthropic(model="claude-3-5-sonnet-20240620")

    
    template = """
        Answer the following questions as best you can. You are working with a dataframe with name 'df'. You are the expert data analyst. You have to understand the df and give the right answer of user query. You have access to the following tools:
        {tools}
        
        Use the following format:
    
        Question: the input question you must answer
        Thought: you should always think about what to do
        Action: the action to take, should be one of [{tool_names}]
        Action Input: the input to the action
        Observation: the result of the action
        ...(this Thought/Action/Action Input/Observatio-n can repeat N times)
        Thought: I now know the final answer
        Final Answer: the final answer to the original input question, Begin!
        New question: {input}
        {agent_scratchpad}"""
                
    prompt = PromptTemplate.from_template(template)
    response_locals = {}
    response_locals["df"] = df

    tools = [PythonAstREPLTool(locals=response_locals)]

    agent = create_react_agent(llm, tools, prompt)

    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

    return agent_executor

In [14]:
agent = dataframe_agent(df)

user_query = input("Enter the Query:- ")
result = agent({"input": user_query})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo answer this question, I need to analyze the 'Survived' column in the dataframe. Let's start by examining the data and then count the number of survivors.

Thought: First, I should check the structure of the dataframe and confirm that we have a 'Survived' column.

Action: python_repl_ast
Action Input: df.columns
[0m[36;1m[1;3mIndex(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')[0m[32;1m[1;3mThought: Great, we can see that there is indeed a 'Survived' column in the dataframe. Now, let's count how many people survived.

Action: python_repl_ast
Action Input: df['Survived'].value_counts()
[0m[36;1m[1;3mSurvived
0    266
1    152
Name: count, dtype: int64[0m[32;1m[1;3mThought: The output shows that 152 people survived (value 1 in the 'Survived' column represents survivors).

Final Answer: 152 people survived according 

In [15]:
result["output"]

'152 people survived according to the data in the dataframe.'