In [1]:
import pandas as pd
import ollama
import io
import gradio as gr
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from pprint import pprint
from contextlib import redirect_stdout


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
llm = ChatOllama(model="llama3:latest", temperature=0.0)

  llm = ChatOllama(model="llama3:latest", temperature=0.0)


In [3]:
df = pd.read_csv('train.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
from langchain.agents import tool

Get Data Tool

In [5]:
@tool
def get_dataframe_info() -> str:
    """
    Returns a full summary of the DataFrame including column names, non-null counts, and data types.
    Use this first to get a general understanding of the dataset's structure.
    """
    global df
    buffer = io.StringIO()
    df.info(buf=buffer)
    return buffer.getvalue()

@tool
def get_dataframe_head(n: int = 5) -> str:
    """
    Returns the first n rows of the DataFrame as a string.
    Useful for inspecting the actual data values. 'n' defaults to 5.
    """
    global df
    return df.head(n).to_string()

@tool
def get_descriptive_stats() -> str:
    """
    Returns descriptive statistics for the numerical columns in the DataFrame, 
    including count, mean, std, min, max, and percentiles.
    """
    global df
    return df.describe().to_string()

@tool
def get_value_counts(column: str) -> str:
    """
    Returns the count of unique values for a specified column.
    Best used for categorical columns like 'Sex', 'Pclass', or 'Embarked'.
    """
    global df
    if column not in df.columns:
        return f"Error: Column '{column}' not found in the DataFrame."
    return df[column].value_counts().to_string()


Edit Data Tool

In [6]:
@tool
def drop_columns(columns: list[str]) -> str:
    """
    Drops one or more specified columns from the DataFrame.
    The input should be a list of column name strings.
    """
    global df
    try:
        df.drop(columns=columns, axis=1, inplace=True)
        return f"Successfully dropped columns: {columns}. Use get_dataframe_info to see the new structure."
    except KeyError as e:
        return f"Error: One or more columns not found: {e}"
    except Exception as e:
        return f"An error occurred: {e}"

@tool
def fill_missing_age_with_median() -> str:
    """
    Calculates the median of the 'Age' column and fills any missing values in that column with the median.
    This should only be used for the 'Age' column.
    """
    global df
    if 'Age' not in df.columns:
        return "Error: 'Age' column not found."
    
    median_age = df['Age'].median()
    df['Age'].fillna(median_age, inplace=True)
    return f"Successfully filled missing 'Age' values with the median value of {median_age}."

@tool
def create_familysize_feature() -> str:
    """
    Creates a new column 'FamilySize' by adding the 'SibSp' and 'Parch' columns together.
    """
    global df
    if 'SibSp' in df.columns and 'Parch' in df.columns:
        df['FamilySize'] = df['SibSp'] + df['Parch']
        return "Successfully created the 'FamilySize' feature."
    else:
        return "Error: 'SibSp' or 'Parch' columns not found."

In [7]:
tools = [
    get_dataframe_info,
    get_dataframe_head,
    get_descriptive_stats,
    get_value_counts,
    drop_columns,
    fill_missing_age_with_median,
    create_familysize_feature,
]

In [8]:
llm = ChatOllama(model="llama3")

In [9]:
system_prompt = """
You are an expert Data Scientist assistant. Your goal is to help the user with Exploratory Data Analysis (EDA) and Feature Engineering.

You have access to a set of tools to inspect and manipulate a pandas DataFrame.

**Thinking Process:**
1.  **Understand the Goal:** Analyze the user's request.
2.  **Inspect:** ALWAYS use your inspection tools (`get_dataframe_info`, `get_dataframe_head`, etc.) to understand the current state of the DataFrame before taking any action. Do not assume the state.
3.  **Plan:** Decide which tool is appropriate for the user's request.
4.  **Act:** Execute the chosen tool.
5.  **Observe & Respond:** Analyze the output from the tool and provide a clear, helpful summary to the user.
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [10]:
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')]

In [11]:
agent =  create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [12]:
def chat(message, history):
  chat_history = []
  for human, assistant in history:
    chat_history.append(HumanMessage(content=human))
    chat_history.append(AIMessage(content=assistant))

  response = agent_executor.invoke({
    "input":message,
    "chat_history":chat_history
  })
  return response["output"]

In [13]:
gr.ChatInterface(chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.






[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mPlease provide me with the DataFrame and the specific column you'd like me to describe its features. 

(Note: I'll be using my inspection tools (`get_dataframe_info`, `get_dataframe_head`, etc.) to understand the current state of the DataFrame before taking any action.)[0m

[1m> Finished chain.[0m


Traceback (most recent call last):
  File "c:\ai_agent_practice\.venv\Lib\site-packages\gradio\queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ai_agent_practice\.venv\Lib\site-packages\gradio\route_utils.py", line 349, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ai_agent_practice\.venv\Lib\site-packages\gradio\blocks.py", line 2274, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ai_agent_practice\.venv\Lib\site-packages\gradio\blocks.py", line 1779, in call_function
    prediction = await fn(*processed_input)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ai_agent_practice\.venv\Lib\site-packages\gradio\utils.py", line 876, in async_wrapper
    response = await f(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ai_