In [1]:
# Install required packages
!pip install pyautogen google-generativeai pandas matplotlib seaborn ipywidgets -q

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import google.generativeai as genai
from google.colab import files
from IPython.display import display, HTML, Markdown
import ipywidgets as widgets
from IPython.display import clear_output
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")

class AgenticEDA:
    def __init__(self, api_key=None):
        """Initialize the Agentic EDA System"""
        self.api_key = api_key
        self.df = None
        self.results = {}

        if api_key:
            genai.configure(api_key=api_key)
            print("✅ Gemini API configured successfully!")

    def setup_api_key(self):
        """Setup Gemini API key"""
        print("🔑 Please enter your Gemini API Key:")
        print("Get your API key from: https://aistudio.google.com/app/apikey")

        api_input = widgets.Password(
            placeholder='Enter your Gemini API key',
            description='API Key:',
            style={'description_width': 'initial'}
        )

        submit_btn = widgets.Button(
            description='Submit',
            button_style='success',
            icon='check'
        )

        output = widgets.Output()

        def on_submit(b):
            with output:
                clear_output()
                if api_input.value:
                    self.api_key = api_input.value
                    genai.configure(api_key=self.api_key)
                    print("✅ API key configured successfully!")
                else:
                    print("❌ Please enter a valid API key")

        submit_btn.on_click(on_submit)
        display(widgets.VBox([api_input, submit_btn, output]))

    def gemini_call(self, prompt, model_name="gemini-1.5-flash"):
        """Make API call to Gemini"""
        try:
            model = genai.GenerativeModel(model_name)
            response = model.generate_content(prompt)
            return response.text
        except Exception as e:
            return f"Error calling Gemini API: {str(e)}"

    def upload_dataset(self):
        """Upload and load dataset"""
        print("📁 Upload your CSV file:")
        uploaded = files.upload()

        if uploaded:
            filename = list(uploaded.keys())[0]
            self.df = pd.read_csv(filename)
            print(f"✅ Dataset loaded successfully! Shape: {self.df.shape}")

            # Display basic info
            display(HTML("<h3>📊 Dataset Preview</h3>"))
            display(self.df.head())

            display(HTML("<h3>ℹ️ Dataset Info</h3>"))
            print(f"Rows: {self.df.shape[0]}")
            print(f"Columns: {self.df.shape[1]}")
            print(f"Memory usage: {self.df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

            return True
        return False

    def data_prep_agent(self):
        """Data Preparation Agent"""
        print("🧹 Data Preparation Agent working...")

        prompt = f"""You are a Data Cleaning Agent. Analyze this dataset and provide Python code for data preprocessing.

Dataset Info:
- Shape: {self.df.shape}
- Columns: {list(self.df.columns)}
- Data types: {self.df.dtypes.to_dict()}
- Missing values: {self.df.isnull().sum().to_dict()}
- Duplicates: {self.df.duplicated().sum()}

Dataset head:
{self.df.head().to_string()}

Provide:
1. Python code for data cleaning (handle missing values, fix data types, remove duplicates)
2. Brief explanation of the preprocessing steps
3. Any data quality issues found

Format your response with clear sections."""

        response = self.gemini_call(prompt)
        self.results['data_prep'] = response
        return response

    def eda_agent(self):
        """EDA Agent"""
        print("📊 EDA Agent analyzing data...")

        prompt = f"""You are an EDA Agent. Perform comprehensive exploratory data analysis.

Dataset Info:
- Shape: {self.df.shape}
- Columns: {list(self.df.columns)}
- Numeric columns: {self.df.select_dtypes(include=[np.number]).columns.tolist()}
- Categorical columns: {self.df.select_dtypes(include=['object']).columns.tolist()}

Dataset head:
{self.df.head().to_string()}

Summary statistics:
{self.df.describe(include='all').to_string()}

Provide:
1. Summary statistics interpretation
2. At least 5 key insights from the data
3. Correlation analysis (if applicable)
4. Distribution analysis
5. Suggestions for visualizations
6. Potential data issues or anomalies

Be specific and actionable in your insights."""

        response = self.gemini_call(prompt)
        self.results['eda'] = response
        return response

    def report_generator_agent(self):
        """Report Generator Agent"""
        print("📄 Report Generator creating comprehensive report...")

        data_prep = self.results.get('data_prep', 'No data preparation analysis available')
        eda_insights = self.results.get('eda', 'No EDA analysis available')

        prompt = f"""You are a Report Generator Agent. Create a professional EDA report.

Data Preparation Analysis:
{data_prep}

EDA Insights:
{eda_insights}

Create a comprehensive report with:
1. **Executive Summary** - Key findings in 2-3 sentences
2. **Dataset Overview** - Basic information about the data
3. **Data Quality Assessment** - Issues found and how they were addressed
4. **Key Findings** - Main insights from the analysis
5. **Recommendations** - Next steps for analysis or modeling
6. **Conclusion** - Summary of the analysis

Use clear headings and bullet points. Make it professional and actionable."""

        response = self.gemini_call(prompt)
        self.results['report'] = response
        return response

    def critic_agent(self):
        """Critic Agent"""
        print("🧐 Critic Agent reviewing the analysis...")

        report = self.results.get('report', 'No report available')

        prompt = f"""You are a Critic Agent. Review this EDA report and provide constructive feedback.

EDA Report:
{report}

Evaluate the report on:
1. **Completeness** - Are all important aspects covered?
2. **Clarity** - Is the report easy to understand?
3. **Accuracy** - Are the insights logical and well-supported?
4. **Actionability** - Are the recommendations practical?
5. **Missing Elements** - What additional analysis might be needed?

Provide:
- Strengths of the current analysis
- Areas for improvement
- Specific suggestions for enhancement
- Overall quality rating (1-10)

Be constructive and specific in your feedback."""

        response = self.gemini_call(prompt)
        self.results['critique'] = response
        return response

    def executor_agent(self):
        """Executor Agent"""
        print("✅ Executor Agent validating the analysis...")

        data_prep = self.results.get('data_prep', 'No data preparation available')

        prompt = f"""You are an Executor Agent. Validate the data preprocessing code and analysis.

Data Preparation Code/Analysis:
{data_prep}

Validate:
1. **Code Quality** - Is the preprocessing code syntactically correct and efficient?
2. **Logic** - Are the preprocessing steps logical and appropriate?
3. **Completeness** - Are all necessary preprocessing steps included?
4. **Best Practices** - Does it follow data science best practices?

Provide:
- Code validation results
- Suggested improvements or corrections
- Alternative approaches if applicable
- Final validation status (PASS/FAIL/NEEDS_REVISION)

Be specific about any issues found."""

        response = self.gemini_call(prompt)
        self.results['execution'] = response
        return response

    def create_basic_visualizations(self):
        """Create basic visualizations"""
        if self.df is None:
            return

        print("📈 Creating basic visualizations...")

        # Set up the plotting area
        fig_count = 0

        # Numeric columns distribution
        numeric_cols = self.df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) > 0:
            fig_count += 1
            plt.figure(figsize=(15, 5))
            for i, col in enumerate(numeric_cols[:3], 1):  # Show first 3 numeric columns
                plt.subplot(1, min(3, len(numeric_cols)), i)
                plt.hist(self.df[col].dropna(), bins=30, alpha=0.7, color=sns.color_palette()[i-1])
                plt.title(f'Distribution of {col}')
                plt.xlabel(col)
                plt.ylabel('Frequency')
            plt.tight_layout()
            plt.show()

        # Correlation heatmap
        if len(numeric_cols) > 1:
            fig_count += 1
            plt.figure(figsize=(10, 8))
            correlation_matrix = self.df[numeric_cols].corr()
            sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
                       square=True, fmt='.2f')
            plt.title('Correlation Heatmap')
            plt.tight_layout()
            plt.show()

        # Categorical columns
        categorical_cols = self.df.select_dtypes(include=['object']).columns
        if len(categorical_cols) > 0:
            fig_count += 1
            plt.figure(figsize=(15, 5))
            for i, col in enumerate(categorical_cols[:3], 1):  # Show first 3 categorical columns
                plt.subplot(1, min(3, len(categorical_cols)), i)
                value_counts = self.df[col].value_counts().head(10)
                plt.bar(range(len(value_counts)), value_counts.values,
                       color=sns.color_palette()[i-1])
                plt.title(f'Top Values in {col}')
                plt.xlabel(col)
                plt.ylabel('Count')
                plt.xticks(range(len(value_counts)), value_counts.index, rotation=45)
            plt.tight_layout()
            plt.show()

        if fig_count == 0:
            print("ℹ️ No suitable columns found for basic visualizations")

    def run_full_analysis(self):
        """Run the complete multi-agent analysis"""
        if not self.api_key:
            print("❌ Please set up your API key first")
            return

        if self.df is None:
            print("❌ Please upload a dataset first")
            return

        print("🚀 Starting Multi-Agent EDA Analysis...")
        print("="*60)

        # Run each agent
        try:
            # 1. Data Preparation
            prep_result = self.data_prep_agent()
            display(HTML("<h2>🧹 Data Preparation Agent</h2>"))
            display(Markdown(prep_result))
            print("\n" + "="*60)

            # 2. EDA Agent
            eda_result = self.eda_agent()
            display(HTML("<h2>📊 EDA Agent</h2>"))
            display(Markdown(eda_result))
            print("\n" + "="*60)

            # 3. Basic Visualizations
            display(HTML("<h2>📈 Basic Visualizations</h2>"))
            self.create_basic_visualizations()
            print("\n" + "="*60)

            # 4. Report Generator
            report_result = self.report_generator_agent()
            display(HTML("<h2>📄 EDA Report</h2>"))
            display(Markdown(report_result))
            print("\n" + "="*60)

            # 5. Critic Agent
            critique_result = self.critic_agent()
            display(HTML("<h2>🧐 Critic Review</h2>"))
            display(Markdown(critique_result))
            print("\n" + "="*60)

            # 6. Executor Agent
            exec_result = self.executor_agent()
            display(HTML("<h2>✅ Executor Validation</h2>"))
            display(Markdown(exec_result))

            print("\n" + "="*60)
            print("🎉 Multi-Agent EDA Analysis Complete!")

        except Exception as e:
            print(f"❌ Error during analysis: {str(e)}")

    def display_menu(self):
        """Display interactive menu"""
        display(HTML("""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    padding: 20px; border-radius: 10px; margin-bottom: 20px;">
            <h1 style="color: white; text-align: center; margin: 0;">
                🔍 Agentic EDA System
            </h1>
            <p style="color: white; text-align: center; margin: 10px 0 0 0;">
                Multi-Agent Exploratory Data Analysis powered by Gemini AI
            </p>
        </div>
        """))

        # Create buttons
        setup_btn = widgets.Button(description="🔑 Setup API Key", button_style='info')
        upload_btn = widgets.Button(description="📁 Upload Dataset", button_style='warning')
        analyze_btn = widgets.Button(description="🚀 Run Full Analysis", button_style='success')

        output = widgets.Output()

        def on_setup(b):
            with output:
                clear_output()
                self.setup_api_key()

        def on_upload(b):
            with output:
                clear_output()
                if not self.api_key:
                    print("❌ Please setup API key first")
                    return
                self.upload_dataset()

        def on_analyze(b):
            with output:
                clear_output()
                self.run_full_analysis()

        setup_btn.on_click(on_setup)
        upload_btn.on_click(on_upload)
        analyze_btn.on_click(on_analyze)

        button_box = widgets.HBox([setup_btn, upload_btn, analyze_btn])
        display(widgets.VBox([button_box, output]))

# Initialize and run the system
print("🚀 Initializing Agentic EDA System...")
eda_system = AgenticEDA()
eda_system.display_menu()

print("\n" + "="*60)
print("📋 Instructions:")
print("1. Click '🔑 Setup API Key' to configure your Gemini API key")
print("2. Click '📁 Upload Dataset' to upload your CSV file")
print("3. Click '🚀 Run Full Analysis' to start the multi-agent analysis")
print("="*60)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/114.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.2/114.2 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.4/101.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.6/65.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h🚀 Initializing Agentic EDA System...


VBox(children=(HBox(children=(Button(button_style='info', description='🔑 Setup API Key', style=ButtonStyle()),…


📋 Instructions:
1. Click '🔑 Setup API Key' to configure your Gemini API key
2. Click '📁 Upload Dataset' to upload your CSV file
3. Click '🚀 Run Full Analysis' to start the multi-agent analysis
