In [None]:
!pip install pyautogen --quiet
!pip install openai --quiet

In [32]:
import autogen
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
from autogen.coding import LocalCommandLineCodeExecutor

In [None]:
# Load configuration
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST.json")

In [None]:
def load_dataset(file_path):
    """Loads the dataset from a CSV file."""
    return pd.read_csv(file_path)


In [None]:

def preprocess_data(df):
    """Cleans and preprocesses the dataset."""
    df = df.dropna()  # Remove missing values
    df = df.select_dtypes(include=["number"])  # Keep only numerical columns
    return df

In [None]:
def perform_eda(df):
    """Performs exploratory data analysis and visualization."""
    summary = df.describe()
    correlation_matrix = df.corr()

    plt.figure(figsize=(10, 6))
    sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm")
    plt.title("Correlation Matrix")
    plt.savefig("eda_correlation_matrix.png")

    return summary, "eda_correlation_matrix.png"

In [None]:
def generate_report(summary, eda_visuals):
    """Compiles EDA results into a structured report."""
    report = f"""Exploratory Data Analysis Report
    --------------------------------------
    Summary Statistics:
    {summary}

    Visualizations:
    {eda_visuals}
    """
    return report

In [51]:
# Define Agents with their responsibilities
admin_agent = AssistantAgent(
    "Admin",
    llm_config={"config_list": config_list},
    system_message=(
        "You are responsible for overseeing the entire EDA workflow. Ensure that each agent completes its task correctly, "
        "coordinate between agents, and enforce project goals, standards, and best practices. Resolve any workflow issues."
    )
)


In [52]:
data_prep_agent = AssistantAgent(
    "DataPrep",
    llm_config={"config_list": config_list},
    system_message=(
        "Your role is to clean and preprocess the dataset. Handle missing values, standardize data types, remove outliers, "
        "and perform feature engineering if necessary. Ensure that the dataset is well-structured for EDA. "
        "Execute code to clean the dataset and pass the result to the next agent."
    ),
    function_map={"preprocess_data": preprocess_data},
    is_termination_msg=lambda msg: False
)

In [None]:
eda_agent = AssistantAgent(
    "EDA",
    llm_config={"config_list": config_list},
    system_message=(
        "Perform exploratory data analysis on the given dataset. Compute descriptive statistics, analyze distributions, "
        "detect correlations, and generate appropriate visualizations such as histograms, box plots, and heatmaps. "
        "Execute EDA code and pass results to the next agent."
    ),
    function_map={"perform_eda": perform_eda},
    is_termination_msg=lambda msg: False
)

In [None]:
report_agent = AssistantAgent(
    "Report",
    llm_config={"config_list": config_list},
    system_message=(
        "Compile the findings from the EDA process into a structured, easy-to-read report. "
        "Summarize key insights with visualizations and actionable recommendations. "
        "Ensure clarity and coherence in the report structure. Execute report generation code."
    ),
    function_map={"generate_report": generate_report},
    is_termination_msg=lambda msg: False
)

In [None]:
critic_agent = AssistantAgent(
    "Critic",
    llm_config={"config_list": config_list},
    system_message=(
        "Review the EDA report for accuracy, clarity, and completeness. "
        "Identify any misleading interpretations, incorrect conclusions, or missing insights. "
        "Provide constructive feedback and suggest necessary improvements."
    ),
    is_termination_msg=lambda msg: False
)


In [None]:
executor_agent = AssistantAgent(
    "Executor",
    llm_config={"config_list": config_list},
    system_message=(
        "Verify that all generated code runs correctly without errors and produces valid results. "
        "Check that statistical calculations, visualizations, and preprocessing steps are correctly implemented. "
        "Flag any discrepancies or issues for correction."
    ),
    is_termination_msg=lambda msg: False
)

In [None]:
# Define User Proxy
user_proxy = UserProxyAgent(
    "User",
    code_execution_config={"work_dir": "eda_workspace"},
    system_message="You provide the dataset and initiate the EDA process. Monitor the workflow and provide additional instructions if needed."
)

In [None]:
# Register agents into a group
eda_group = autogen.GroupChat(
    agents=[admin_agent, data_prep_agent, eda_agent, report_agent, critic_agent, executor_agent, user_proxy],
    messages=[],
    max_round=10,
)

In [None]:
# Define GroupChatManager
manager = autogen.GroupChatManager(groupchat=eda_group, llm_config={"config_list": config_list})

In [None]:
# Start the conversation
file_path =  "/content/train.csv"  # Replace with the actual dataset file path
dataset = load_dataset(file_path)
cleaned_dataset = preprocess_data(dataset)
summary, eda_visuals = perform_eda(cleaned_dataset)
report = generate_report(summary, eda_visuals)

In [None]:
user_proxy.initiate_chat(manager, message=f"Begin the EDA workflow on the provided dataset: {file_path}. Ensure data is well-prepared and insights are actionable.")