In [None]:
print('Setup complete.')

# Lab 12: Catalog Visualizer

## Learning Objectives
- Create interactive data visualizations with AI assistance
- Build dashboard interfaces for complex datasets
- Implement real-time data exploration tools
- Design user-friendly visualization interfaces

## Lab Overview
Build an AI-powered catalog visualization system:
1. **Data Processing** - Clean and structure catalog data
2. **Visualization Engine** - Create interactive charts and graphs
3. **Dashboard Interface** - Build user-friendly exploration tools
4. **AI Insights** - Generate automated analysis and recommendations

## Exit Ticket
- [ ] Interactive catalog data visualizations
- [ ] Dashboard with filtering and search
- [ ] AI-generated insights and recommendations
- [ ] Export and sharing functionality

In [None]:
# Install required packages
!pip install asksageclient pip_system_certs streamlit plotly pandas numpy seaborn matplotlib rich tiktoken

In [None]:
# ================================
# 🔐 Cell 1 — Load secrets (Colab) + pricing + token utils
# ================================
import os, time, csv
from typing import Optional, Dict
import tiktoken

from google.colab import userdata

ASKSAGE_API_KEY = userdata.get("ASKSAGE_API_KEY")
ASKSAGE_BASE_URL = userdata.get("ASKSAGE_BASE_URL")
ASKSAGE_EMAIL = userdata.get("ASKSAGE_EMAIL")

assert ASKSAGE_API_KEY, "ASKSAGE_API_KEY not provided."
assert ASKSAGE_EMAIL, "ASKSAGE_EMAIL not provided."

print("✓ Secrets loaded")
print("  • EMAIL:", ASKSAGE_EMAIL)
print("  • BASE URL:", ASKSAGE_BASE_URL or "(default)")

# Pricing (USD per 1,000,000 tokens)
PRICES_PER_M = {
    "gpt-5": {"input_per_m": 1.25, "output_per_m": 10.00},
    "gpt-5-mini": {"input_per_m": 0.25, "output_per_m": 2.00},
}

# Tokenizer
enc = tiktoken.get_encoding("o200k_base")

def count_tokens(text: str) -> int:
    return len(enc.encode(text or ""))

def cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
    if model not in PRICES_PER_M:
        raise ValueError(f"Unknown model: {model}")
    r = PRICES_PER_M[model]
    return (input_tokens / 1_000_000) * r["input_per_m"] + (output_tokens / 1_000_000) * r["output_per_m"]

In [None]:
# ================================
# 🔧 Cell 2 — Import bootcamp_common and setup AskSage client
# ================================
import sys
sys.path.append('../../../')  # Adjust path to reach bootcamp_common

from bootcamp_common.ask_sage import AskSageClient

# Initialize AskSage client
client = AskSageClient(
    api_key=ASKSAGE_API_KEY,
    base_url=ASKSAGE_BASE_URL
)

print("✓ AskSage client initialized")

In [None]:
import os
import json
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
from datetime import datetime, timedelta

import openai
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from rich.console import Console
from rich.table import Table

console = Console()
print("📊 Catalog Visualizer loading...")

## Sample Catalog Data

In [None]:
# Generate sample catalog data
np.random.seed(42)

categories = ['Electronics', 'Books', 'Clothing', 'Home & Garden', 'Sports', 'Toys']
subcategories = {
    'Electronics': ['Laptops', 'Phones', 'Tablets', 'Accessories'],
    'Books': ['Fiction', 'Non-fiction', 'Textbooks', 'Comics'],
    'Clothing': ['Men', 'Women', 'Kids', 'Accessories'],
    'Home & Garden': ['Furniture', 'Decor', 'Tools', 'Plants'],
    'Sports': ['Equipment', 'Apparel', 'Fitness', 'Outdoor'],
    'Toys': ['Educational', 'Action Figures', 'Board Games', 'Electronic']
}

# Generate sample catalog items
catalog_data = []
for i in range(500):
    category = np.random.choice(categories)
    subcategory = np.random.choice(subcategories[category])
    
    item = {
        'id': f'ITEM_{i:04d}',
        'name': f'{subcategory} Product {i % 100}',
        'category': category,
        'subcategory': subcategory,
        'price': np.random.uniform(10, 500),
        'rating': np.random.uniform(1, 5),
        'reviews': np.random.randint(0, 1000),
        'stock': np.random.randint(0, 100),
        'sales_last_30d': np.random.randint(0, 50),
        'created_date': (datetime.now() - timedelta(days=np.random.randint(1, 365))).isoformat(),
        'tags': np.random.choice(['premium', 'bestseller', 'new', 'sale', 'trending'], 
                               size=np.random.randint(0, 3), replace=False).tolist()
    }
    catalog_data.append(item)

# Create DataFrame
df = pd.DataFrame(catalog_data)
df['created_date'] = pd.to_datetime(df['created_date'])
df['revenue_30d'] = df['price'] * df['sales_last_30d']

console.print(f"📦 Generated {len(df)} catalog items")
console.print(f"Categories: {', '.join(categories)}")
console.print(f"Date range: {df['created_date'].min().date()} to {df['created_date'].max().date()}")

## Visualization Engine

In [None]:
class CatalogVisualizer:
    """Create interactive visualizations for catalog data"""
    
    def __init__(self, data: pd.DataFrame):
        self.data = data
        self.setup_client()
    
    def setup_client(self):
        if os.getenv('OPENAI_API_KEY'):
            try:
                self.client = openai.OpenAI()
                self.has_api = True
                console.print("✅ AI insights enabled")
            except Exception as e:
                self.has_api = False
                console.print(f"⚠️ Using static insights: {e}")
        else:
            self.has_api = False
            console.print("💡 No API key, using template insights")
    
    def create_category_overview(self) -> go.Figure:
        """Create category distribution visualization"""
        category_stats = self.data.groupby('category').agg({
            'id': 'count',
            'price': 'mean',
            'rating': 'mean',
            'revenue_30d': 'sum'
        }).round(2)
        
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Item Count by Category', 'Average Price', 
                          'Average Rating', 'Total Revenue (30d)'),
            specs=[[{"type": "bar"}, {"type": "bar"}],
                   [{"type": "bar"}, {"type": "bar"}]]
        )
        
        # Item count
        fig.add_trace(go.Bar(x=category_stats.index, y=category_stats['id'], 
                            name='Count', marker_color='lightblue'), row=1, col=1)
        
        # Average price
        fig.add_trace(go.Bar(x=category_stats.index, y=category_stats['price'], 
                            name='Avg Price', marker_color='lightgreen'), row=1, col=2)
        
        # Average rating
        fig.add_trace(go.Bar(x=category_stats.index, y=category_stats['rating'], 
                            name='Avg Rating', marker_color='orange'), row=2, col=1)
        
        # Revenue
        fig.add_trace(go.Bar(x=category_stats.index, y=category_stats['revenue_30d'], 
                            name='Revenue', marker_color='lightcoral'), row=2, col=2)
        
        fig.update_layout(height=600, showlegend=False, title_text="Catalog Category Overview")
        return fig
    
    def create_price_distribution(self) -> go.Figure:
        """Create price distribution analysis"""
        fig = px.box(self.data, x='category', y='price', 
                     title='Price Distribution by Category',
                     color='category')
        fig.update_layout(xaxis_tickangle=-45)
        return fig

# TODO: Implement performance analytics method
    def create_performance_analytics(self) -> go.Figure:
        """Create performance analytics dashboard"""
        
        # TODO: Create a comprehensive performance visualization showing:
        # - Sales trends over time
        # - Rating vs sales correlation
        # - Stock levels and turnover
        # - Revenue by subcategory
        
        # HINT: Use make_subplots to create a multi-panel dashboard
        # HINT: Include scatter plots, time series, and bar charts
        
        pass  # Replace with your implementation

# Initialize visualizer
visualizer = CatalogVisualizer(df)
print("📊 Visualizer ready!")

## Task 1: Basic Visualizations

In [None]:
# Create and display basic visualizations
console.print("\n📈 [bold blue]Creating Catalog Visualizations[/bold blue]")

# Category overview
category_fig = visualizer.create_category_overview()
category_fig.show()

# Price distribution
price_fig = visualizer.create_price_distribution()
price_fig.show()

console.print("✅ Basic visualizations created")

## AI Insights Generator

In [None]:
class AIInsightsGenerator:
    """Generate AI-powered insights from catalog data"""
    
    def __init__(self, visualizer: CatalogVisualizer):
        self.visualizer = visualizer
        self.data = visualizer.data
    
    def generate_summary_stats(self) -> Dict[str, Any]:
        """Generate key summary statistics"""
        return {
            'total_items': len(self.data),
            'total_categories': self.data['category'].nunique(),
            'avg_price': self.data['price'].mean(),
            'avg_rating': self.data['rating'].mean(),
            'total_revenue_30d': self.data['revenue_30d'].sum(),
            'top_category': self.data['category'].value_counts().index[0],
            'low_stock_items': (self.data['stock'] < 10).sum(),
            'high_performers': ((self.data['rating'] > 4) & (self.data['sales_last_30d'] > 20)).sum()
        }

# TODO: Implement AI-powered insights generation
    def generate_insights(self) -> List[str]:
        """Generate AI-powered business insights"""
        
        # TODO: Use AI to analyze the catalog data and generate insights
        # TODO: Include analysis of:
        # - Performance trends and patterns
        # - Inventory optimization opportunities
        # - Pricing strategy recommendations
        # - Product category performance
        
        stats = self.generate_summary_stats()
        
        if self.visualizer.has_api:
            # TODO: Implement AI analysis here
            pass
        
        # Mock insights for demonstration
        return [
            f"Electronics category leads with {stats['top_category']} items",
            f"Average rating of {stats['avg_rating']:.2f} indicates good customer satisfaction",
            f"{stats['low_stock_items']} items need restocking (stock < 10)",
            f"{stats['high_performers']} products are high performers (rating > 4, sales > 20)",
            "Consider bundling slow-moving items with bestsellers"
        ]

    def identify_opportunities(self) -> Dict[str, List[str]]:
        """Identify business opportunities"""
        
        # Low stock items
        low_stock = self.data[self.data['stock'] < 10]
        
        # High-rated but low-selling items
        underperformers = self.data[
            (self.data['rating'] > 4) & (self.data['sales_last_30d'] < 5)
        ]
        
        # High-selling items that could be repriced
        repricing_candidates = self.data[
            (self.data['sales_last_30d'] > 30) & (self.data['rating'] > 4)
        ]
        
        return {
            'restock_needed': low_stock['name'].tolist()[:5],
            'marketing_opportunities': underperformers['name'].tolist()[:5],
            'pricing_optimization': repricing_candidates['name'].tolist()[:5]
        }

# Initialize insights generator
insights_generator = AIInsightsGenerator(visualizer)
print("🤖 AI insights generator ready!")

## Task 2: Generate AI Insights

In [None]:
# Generate and display AI insights
console.print("\n🤖 [bold blue]AI-Powered Catalog Insights[/bold blue]")

# Summary statistics
stats = insights_generator.generate_summary_stats()
stats_table = Table(title="Catalog Summary")
stats_table.add_column("Metric", style="cyan")
stats_table.add_column("Value", style="green")

for key, value in stats.items():
    if isinstance(value, float):
        value = f"{value:.2f}"
    stats_table.add_row(key.replace('_', ' ').title(), str(value))

console.print(stats_table)

# AI insights
insights = insights_generator.generate_insights()
console.print("\n💡 [bold yellow]Key Insights:[/bold yellow]")
for i, insight in enumerate(insights, 1):
    console.print(f"  {i}. {insight}")

# Business opportunities
opportunities = insights_generator.identify_opportunities()
console.print("\n🎯 [bold green]Business Opportunities:[/bold green]")
for category, items in opportunities.items():
    console.print(f"  [bold]{category.replace('_', ' ').title()}:[/bold]")
    for item in items:
        console.print(f"    - {item}")

print("\n✅ AI insights generated!")

## Task 3: Build Interactive Dashboard

In [None]:
# TODO: Create a Streamlit dashboard for catalog visualization
# Save this code to a separate file: catalog_dashboard.py

dashboard_code = '''import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime

st.set_page_config(page_title="Catalog Visualizer", layout="wide")

def load_data():
    # TODO: Load your catalog data here
    # For now, using sample data
    return pd.read_csv("catalog_data.csv")  # You'll need to save the data

def main():
    st.title("📊 Catalog Visualizer Dashboard")
    
    # Load data
    try:
        df = load_data()
    except:
        st.error("Please run the lab notebook first to generate catalog_data.csv")
        return
    
    # Sidebar filters
    with st.sidebar:
        st.header("Filters")
        
        # Category filter
        categories = st.multiselect(
            "Categories", 
            df['category'].unique(),
            default=df['category'].unique()
        )
        
        # Price range
        price_range = st.slider(
            "Price Range",
            min_value=float(df['price'].min()),
            max_value=float(df['price'].max()),
            value=(float(df['price'].min()), float(df['price'].max()))
        )
        
        # Rating filter
        min_rating = st.slider("Minimum Rating", 1.0, 5.0, 1.0)
    
    # Filter data
    filtered_df = df[
        (df['category'].isin(categories)) &
        (df['price'].between(price_range[0], price_range[1])) &
        (df['rating'] >= min_rating)
    ]
    
    # Main dashboard
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric("Total Items", len(filtered_df))
    with col2:
        st.metric("Avg Price", f"${filtered_df['price'].mean():.2f}")
    with col3:
        st.metric("Avg Rating", f"{filtered_df['rating'].mean():.2f}")
    with col4:
        st.metric("Total Revenue (30d)", f"${filtered_df['revenue_30d'].sum():.0f}")
    
    # Visualizations
    col1, col2 = st.columns(2)
    
    with col1:
        # Category distribution
        fig1 = px.pie(filtered_df, names='category', title='Items by Category')
        st.plotly_chart(fig1, use_container_width=True)
    
    with col2:
        # Price vs Rating scatter
        fig2 = px.scatter(filtered_df, x='price', y='rating', 
                         size='sales_last_30d', color='category',
                         title='Price vs Rating (size = sales)')
        st.plotly_chart(fig2, use_container_width=True)
    
    # Data table
    st.subheader("Filtered Catalog Data")
    st.dataframe(filtered_df, use_container_width=True)

if __name__ == "__main__":
    main()
'''

# Save dashboard code
Path("catalog_dashboard.py").write_text(dashboard_code)

# Save sample data for dashboard
df.to_csv("catalog_data.csv", index=False)

console.print("✅ Dashboard code saved to catalog_dashboard.py")
console.print("✅ Sample data saved to catalog_data.csv")
console.print("\n🚀 Run dashboard with: streamlit run catalog_dashboard.py")

## Extension Ideas

🚀 **Advanced Visualization Features:**

1. **Real-time Updates**: Connect to live data sources
2. **Machine Learning**: Predictive analytics and recommendations
3. **Geographic Mapping**: Location-based catalog analysis
4. **Time Series**: Trend analysis and forecasting
5. **A/B Testing**: Compare different catalog versions
6. **Custom Metrics**: User-defined KPIs and calculations
7. **Export/Sharing**: PDF reports and collaborative features

## Deliverable Checklist

- [ ] Interactive catalog data visualizations
- [ ] Multi-panel dashboard with filtering capabilities
- [ ] AI-generated insights and recommendations
- [ ] Performance analytics and trend analysis
- [ ] Export functionality for charts and data
- [ ] User-friendly interface with intuitive controls

**Bonus Points:**
- [ ] Real-time data connections
- [ ] Predictive analytics integration
- [ ] Custom visualization types
- [ ] Mobile-responsive design

## Summary: Day 3 Labs Complete

🎉 **Congratulations!** You've completed all Day 3 labs:
- **Note Catalog Lab**: AI-powered knowledge management
- **Package Refactor Lab**: Legacy code modernization
- **Unit Tests Lab**: Comprehensive test generation
- **Catalog Visualizer**: Interactive data exploration

Ready for integration and deployment? Let's build the complete notebook system!