In [1]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ollama
import os
from pathlib import Path

# Function to Perform EDA and Generate Visualizations
def eda_analysis(file_path):
    try:
        # Check if file_path is valid
        if not file_path or not os.path.exists(file_path):
            return "Error: Please upload a valid CSV file.", []

        # Load the dataset
        df = pd.read_csv(file_path)

        # Handle missing values
        for col in df.select_dtypes(include=['number']).columns:
            if df[col].notna().any():  # Ensure there's at least one non-NaN value
                df[col].fillna(df[col].median(), inplace=True)
            else:
                df[col].fillna(0, inplace=True)  # Fallback for all-NaN columns

        for col in df.select_dtypes(include=['object']).columns:
            if df[col].notna().any():
                df[col].fillna(df[col].mode()[0], inplace=True)
            else:
                df[col].fillna("Unknown", inplace=True)  # Fallback for all-NaN columns

        # Data Summary
        summary = df.describe(include='all').to_string()

        # Missing Values (after filling)
        missing_values = df.isnull().sum().to_string()

        # Generate AI Insights
        insights = generate_ai_insights(summary)

        # Generate Data Visualizations
        plot_paths = generate_visualizations(df)

        return (
            f"\nData Loaded Successfully!\n\nSummary:\n{summary}\n\nMissing Values:\n{missing_values}\n\nAI Insights:\n{insights}",
            plot_paths
        )
    except Exception as e:
        return f"Error: An unexpected issue occurred - {str(e)}", []

# AI-Powered Insights using Mistral-7B (Ollama)
def generate_ai_insights(df_summary):
    try:
        prompt = f"Analyze the dataset summary and provide insights:\n\n{df_summary}"
        response = ollama.chat(model="mistral", messages=[{"role": "user", "content": prompt}])
        return response['message']['content']
    except Exception as e:
        return f"AI Insights unavailable: {str(e)}"

# Function to Generate Data Visualizations
def generate_visualizations(df):
    plot_paths = []
    output_dir = Path("plots")
    output_dir.mkdir(exist_ok=True)  # Create a directory for plots

    # Histograms for Numeric Columns
    for col in df.select_dtypes(include=['number']).columns:
        plt.figure(figsize=(6, 4))
        sns.histplot(df[col], bins=30, kde=True, color="blue")
        plt.title(f"Distribution of {col}")
        path = output_dir / f"{col}_distribution.png"
        plt.savefig(path)
        plot_paths.append(str(path))
        plt.close()

    # Correlation Heatmap (only numeric columns)
    numeric_df = df.select_dtypes(include=['number'])
    if not numeric_df.empty and len(numeric_df.columns) > 1:  # Ensure >1 column for correlation
        plt.figure(figsize=(8, 5))
        sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
        plt.title("Correlation Heatmap")
        path = output_dir / "correlation_heatmap.png"
        plt.savefig(path)
        plot_paths.append(str(path))
        plt.close()

    return plot_paths

# Gradio Interface
demo = gr.Interface(
    fn=eda_analysis,
    inputs=gr.File(label="Upload CSV File", type="filepath"),
    outputs=[gr.Textbox(label="EDA Report"), gr.Gallery(label="Data Visualizations")],
    title="📊 LLM-Powered Exploratory Data Analysis (EDA)",
    description="Upload any dataset CSV file and get automated EDA insights with AI-powered analysis and visualizations."
)

# Launch the Gradio App
if __name__ == "__main__":
    demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.
