# AI Data Analyst Agent (Jupyter + Gradio)

This notebook:
- Lets the user upload a CSV
- Accepts natural language queries
- Returns text answers; if a matplotlib figure is produced it returns the figure image


In [1]:
!pip install google-generativeai pandas matplotlib pillow gradio




[notice] A new release of pip is available: 24.3.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Cell 2: Import Libraries
import os
import io
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import gradio as gr

try:
    import google.generativeai as genai
    print(" All libraries imported successfully!")
except ImportError as e:
    print(f"❌ Error importing libraries: {e}")
    print("Please run: pip install google-generativeai")

 All libraries imported successfully!


In [3]:
# Cell 3: Configure Gemini API
GEMINI_API_KEY = ""

os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY
genai.configure(api_key=GEMINI_API_KEY)

print(" Gemini API configured successfully!")
print(f"API Key: {GEMINI_API_KEY[:10]}...") 

 Gemini API configured successfully!
API Key: ...


In [4]:
# Cell 3.5: List Available Models (Run this to check available models)
print("\n" + "="*50)
print("AVAILABLE GEMINI MODELS:")
print("="*50)
try:
    available_models = []
    for model in genai.list_models():
        if 'generateContent' in model.supported_generation_methods:
            available_models.append(model.name)
            print(f"✓ {model.name}")
    
    if not available_models:
        print(" No models available. Check your API key!")
    else:
        print(f"\n✓ Found {len(available_models)} models")
except Exception as e:
    print(f" Error listing models: {e}")
    print("This usually means your API key is invalid or not configured properly")


AVAILABLE GEMINI MODELS:
 Error listing models: 
  No API_KEY or ADC found. Please either:
    - Set the `GOOGLE_API_KEY` environment variable.
    - Manually pass the key with `genai.configure(api_key=my_api_key)`.
    - Or set up Application Default Credentials, see https://ai.google.dev/gemini-api/docs/oauth for more information.
This usually means your API key is invalid or not configured properly


In [5]:
# Cell 4: Define Helper Function - Convert Figure to Image
def fig_to_pil(fig):
    """Convert matplotlib figure to PIL Image"""
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches="tight", dpi=100)
    buf.seek(0)
    img = Image.open(buf)
    return img

print("Helper function 'fig_to_pil' defined!")

Helper function 'fig_to_pil' defined!


In [6]:
# Cell 5: Define Main Analysis Function
def analyze_with_gemini(uploaded_file, query: str):
    """Analyze CSV data using Gemini AI"""
    if uploaded_file is None:
        return "Please upload a CSV file first.", None
    
    if not query or query.strip() == "":
        return "Please enter a question.", None
    
    try:
        # Read the CSV file
        if isinstance(uploaded_file, bytes):
            df = pd.read_csv(io.BytesIO(uploaded_file))
        else:
            df = pd.read_csv(uploaded_file)
        
        print(f"✓ CSV loaded: {df.shape[0]} rows, {df.shape[1]} columns")
        
        # Get dataframe info
        df_preview = df.head(20).to_string()
        df_info = f"Columns: {list(df.columns)}\nShape: {df.shape}\nData types:\n{df.dtypes.to_string()}"
        
        # Create prompt for Gemini
        prompt = f"""You are a data analyst. Here is information about a CSV dataframe:

{df_info}

Preview of first 20 rows:
{df_preview}

Question: {query}

Please provide a clear, concise answer based on the data. If the question requires visualization, mention what type of plot would be appropriate."""
        
        # Get the first available model dynamically
        model = None
        try:
            available_models = [m.name for m in genai.list_models() 
                              if 'generateContent' in m.supported_generation_methods]
            
            if not available_models:
                return "No models available. Please check your API key at https://makersuite.google.com/app/apikey", None
            
            # Use the first available model
            model_name = available_models[0]
            model = genai.GenerativeModel(model_name)
            print(f"✓ Using model: {model_name}")
            
        except Exception as e:
            return f"Error initializing model: {str(e)}\n\nPlease run Cell 3.5 to check available models.", None
        
        if model is None:
            error_msg = "Could not initialize any Gemini model.\n\n"
            error_msg += "Please check:\n"
            error_msg += "1. Your API key is valid and active\n"
            error_msg += "2. You have access to Gemini models\n"
            error_msg += "3. Run Cell 3.5 to see available models\n\n"
            error_msg += "Get a free API key at: https://makersuite.google.com/app/apikey"
            return error_msg, None
        
        # Generate response
        print("🤖 Generating AI response...")
        response = model.generate_content(prompt)
        text_result = response.text
        print("✓ AI response generated!")
        
        # Check if plotting is mentioned or needed
        plot_keywords = ['plot', 'chart', 'graph', 'visualize', 'visualization', 'show']
        should_plot = any(keyword in query.lower() or keyword in text_result.lower() 
                         for keyword in plot_keywords)
        
        img_pil = None
        if should_plot:
            numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
            
            if len(numeric_cols) >= 1:
                print("📊 Generating plot...")
                fig, ax = plt.subplots(figsize=(10, 6))
                
                # Simple automatic plotting logic
                if len(numeric_cols) == 1:
                    df[numeric_cols[0]].plot(kind="line", ax=ax, 
                                            title=f"{numeric_cols[0]} Over Index")
                elif len(numeric_cols) >= 2:
                    df.plot(x=numeric_cols[0], y=numeric_cols[1], 
                           kind="scatter", ax=ax,
                           title=f"{numeric_cols[1]} vs {numeric_cols[0]}")
                
                ax.grid(True, alpha=0.3)
                plt.tight_layout()
                img_pil = fig_to_pil(fig)
                plt.close(fig)
                print("✓ Plot generated!")
        
        return text_result, img_pil
    
    except pd.errors.EmptyDataError:
        return "Error: The uploaded file is empty.", None
    except pd.errors.ParserError:
        return "Error: Could not parse the CSV file. Please check the file format.", None
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return f"Error: {str(e)}\n\nFull error details:\n{error_details}", None

print(" Main analysis function 'analyze_with_gemini' defined!")

 Main analysis function 'analyze_with_gemini' defined!


In [7]:
# Cell 6: Create Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📊 AI Data Analyst Agent")
    gr.Markdown("Upload a CSV file, ask a question in plain English, and get AI-powered analysis with automatic visualizations.")
    
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="Upload CSV File", file_count="single", file_types=[".csv"])
            query_input = gr.Textbox(
                label="Ask Your Question", 
                placeholder="e.g., What are the top 5 products by sales? Show a trend of revenue over time.",
                lines=3
            )
            analyze_btn = gr.Button("🔍 Analyze", variant="primary")
    
    with gr.Row():
        output_text = gr.Textbox(label="AI Analysis", lines=10, interactive=False)
    
    with gr.Row():
        output_image = gr.Image(label="Generated Visualization", type="pil")
    
    # Example queries
    gr.Markdown("### Example Questions:")
    gr.Markdown("""
    - What are the top 5 rows by a specific column?
    - Show me a summary of the data
    - What's the average value of column X?
    - Plot the relationship between column A and column B
    - Are there any missing values in the dataset?
    """)
    
    analyze_btn.click(
        fn=analyze_with_gemini, 
        inputs=[file_input, query_input], 
        outputs=[output_text, output_image]
    )

print(" Gradio interface created successfully!")

 Gradio interface created successfully!


In [None]:
# Cell 7: Launch the Application
demo.launch(share=False, debug=True)

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.
