In [None]:
# ============================================
# DATA ANALYST AGENT WITH CODE EXECUTION
# ============================================
# This notebook demonstrates how to create an AI agent that can:
# - Execute Python code dynamically
# - Perform data analysis tasks
# - Handle data cleaning operations

# Import required libraries
from crewai import Agent, Task, Crew  # Core CrewAI components
from crewai_tools import CodeInterpreterTool  # Tool for executing Python code

import os

# Set up API key - Replace with your actual OpenAI API key
# os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"
from dotenv import load_dotenv
load_dotenv()

In [None]:
# ============================================
# AGENT DEFINITION
# ============================================
# Create a data analyst agent with code execution capabilities

data_analyst_agent = Agent(
    role="General Data Analyst",  # Define the agent's role
    goal="Perform data analysis tasks based on user inputs and instructions and execute them using Python",
    backstory="You are an experienced data analyst skilled in Python and data manipulation",
    
    # KEY FEATURE: Allow this agent to execute Python code
    # This enables the agent to perform actual data operations
    allow_code_execution=True,
    
    # Set retry limit in case of errors
    max_retry_limit=3,
    
    # IMPORTANT: Provide CodeInterpreterTool for Python execution
    # This tool allows the agent to run pandas, numpy, and other data libraries
    tools=[CodeInterpreterTool()]
)

In [None]:
# ============================================
# TASK FACTORY FUNCTION
# ============================================
# This function creates dynamic tasks based on user requirements
# This pattern is useful when you need to create multiple similar tasks
# with different parameters

def create_data_analysis_task(dataframe_path: str, user_instruction: str, output_path: str):
    """
    Creates a data analysis task with specific parameters.
    
    Parameters:
    - dataframe_path: Path to input CSV file
    - user_instruction: Natural language instruction for data operation
    - output_path: Path where the result should be saved
    
    Returns: Task object configured for the data analyst agent
    """
    return Task(
        # Description: Clear instructions for what the agent should do
        description=(
            f"Load the dataframe from '{dataframe_path}', perform the following task: '{user_instruction}',"
            f"and save the resulting dataframe to '{output_path}'. Execute the task using Python, if you are"
            f"not able to execute the code on the machine please explain why."
        ),
        
        # Expected output: What success looks like
        expected_output=f"Dataframe with the applied operations saved to '{output_path}'.",
        
        # Assign this task to the data analyst agent
        agent=data_analyst_agent,
    )

In [None]:
# ============================================
# CONFIGURATION: Define Task Parameters
# ============================================
# Set up the specific task you want the agent to perform

# Path to your input CSV file (should be in the same directory or provide full path)
dataframe_path = "input_data.csv"

# Natural language instruction - The agent will interpret and execute this
# Examples: "fix missing values", "remove duplicates", "normalize column X"
user_instruction = "fix missing values"

# Where to save the processed data
output_path = "output_data.csv"

In [None]:
# ============================================
# CREATE THE TASK
# ============================================
# Use the factory function to create a task with our parameters

data_analysis_task = create_data_analysis_task(dataframe_path, user_instruction, output_path)

In [None]:
# ============================================
# CREW SETUP
# ============================================
# Create a Crew to orchestrate the agent and task
# A Crew manages the execution flow and coordinates between agents

analyst_crew = Crew(
    agents=[data_analyst_agent],  # List of agents (can have multiple)
    tasks=[data_analysis_task]     # List of tasks to execute
)

In [None]:
# ============================================
# EXECUTE THE CREW
# ============================================
# Start the crew execution - the agent will:
# 1. Read the description and understand the task
# 2. Generate Python code to solve the problem
# 3. Execute the code using CodeInterpreterTool
# 4. Save the results and provide output

# NOTE: Variable name should be 'analyst_crew' not 'analysis_crew'
result = analyst_crew.kickoff()

# Display the final result
print(result)

# WHAT HAPPENED:
# - The agent loaded the CSV file
# - Detected missing values in numeric and categorical columns
# - Applied appropriate filling strategies (mean for numeric, mode for categorical)
# - Saved the cleaned dataframe to output_data.csv