<a href="https://colab.research.google.com/github/SaquibKhan-DS/311-Customer-Service-Optimization/blob/main/src/nyc_311_analyzer_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Professional Visualization Module for NYC 311 Service Analysis
Enhanced visualizations for business presentations and portfolio
"""

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

class NYC311Visualizer:
    """
    Professional visualization class for NYC 311 analysis.
    Creates publication-ready charts for business presentations.
    """

    def __init__(self, analyzer):
        """
        Initialize with NYC311Analyzer instance.

        Parameters:
        -----------
        analyzer : NYC311Analyzer
            Initialized analyzer with processed data
        """
        self.analyzer = analyzer
        self.df = analyzer.processed_df

        # Professional color schemes
        self.colors_primary = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
        self.colors_sequential = ['#08519c', '#3182bd', '#6baed6', '#9ecae1', '#c6dbef', '#eff3ff']
        self.colors_diverging = ['#d73027', '#fc8d59', '#fee08b', '#e0f3f8', '#91bfdb', '#4575b4']

        # Set professional styling
        plt.style.use('seaborn-v0_8-whitegrid')
        sns.set_palette(self.colors_primary)

    def create_executive_dashboard(self, save_path=None):
        """
        Create a professional executive dashboard with key metrics.

        Parameters:
        -----------
        save_path : str, optional
            Path to save the dashboard image

        Returns:
        --------
        plotly.graph_objects.Figure
        """
        # Get metrics and analysis data
        metrics = self.analyzer.calculate_service_metrics()
        temporal_data = self.analyzer.analyze_temporal_patterns()
        dept_data = self.analyzer.department_efficiency_analysis()

        # Create subplots
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Daily Request Volume & Response Time',
                'Top 10 Complaint Types',
                'Department Performance Comparison',
                'Peak Hours Analysis'
            ),
            specs=[
                [{"secondary_y": True}, {"type": "bar"}],
                [{"type": "scatter"}, {"secondary_y": True}]
            ]
        )

        # Chart 1: Daily patterns with dual y-axis
        daily_data = temporal_data['daily_patterns']
        fig.add_trace(
            go.Bar(
                x=daily_data.index,
                y=daily_data['request_count'],
                name='Request Volume',
                marker_color='#1f77b4',
                opacity=0.7
            ),
            row=1, col=1, secondary_y=False
        )

        fig.add_trace(
            go.Scatter(
                x=daily_data.index,
                y=daily_data['avg_response_hours'],
                name='Avg Response Time',
                line=dict(color='#ff7f0e', width=3),
                marker=dict(size=8)
            ),
            row=1, col=1, secondary_y=True
        )

        # Chart 2: Top complaint types
        top_complaints = self.df['Complaint Type'].value_counts().head(10)
        fig.add_trace(
            go.Bar(
                x=top_complaints.values,
                y=top_complaints.index,
                orientation='h',
                name='Complaint Volume',
                marker_color='#2ca02c'
            ),
            row=1, col=2
        )

        # Chart 3: Department efficiency scatter
        top_depts = dept_data.head(10)
        fig.add_trace(
            go.Scatter(
                x=top_depts['total_requests'],
                y=top_depts['avg_response_hours'],
                mode='markers+text',
                text=top_depts.index,
                textposition="top center",
                marker=dict(
                    size=top_depts['sla_24h_compliance'],
                    color=top_depts['efficiency_score'],
                    colorscale='RdYlGn_r',
                    showscale=True,
                    colorbar=dict(title="Efficiency Score")
                ),
                name='Departments'
            ),
            row=2, col=1
        )

        # Chart 4: Hourly patterns
        hourly_data = temporal_data['hourly_patterns']
        fig.add_trace(
            go.Bar(
                x=hourly_data.index,
                y=hourly_data['request_count'],
                name='Hourly Requests',
                marker_color='#9467bd',
                opacity=0.7
            ),
            row=2, col=2, secondary_y=False
        )

        fig.add_trace(
            go.Scatter(
                x=hourly_data.index,
                y=hourly_data['avg_response_hours'],
                name='Response Time',
                line=dict(color='#d62728', width=2),
                yaxis='y4'
            ),
            row=2, col=2, secondary_y=True
        )

        # Update layout
        fig.update_layout(
            height=800,
            title_text="NYC 311 Service Performance Dashboard",
            title_x=0.5,
            title_font_size=24,
            showlegend=False,
            template='plotly_white'
        )

        # Update axis labels
        fig.update_yaxes(title_text="Request Count", row=1, col=1, secondary_y=False)
        fig.update_yaxes(title_text="Response Time (Hours)", row=1, col=1, secondary_y=True)
        fig.update_xaxes(title_text="Request Volume", row=1, col=2)
        fig.update_xaxes(title_text="Total Requests", row=2, col=1)
        fig.update_yaxes(title_text="Avg Response Time (Hours)", row=2, col=1)
        fig.update_xaxes(title_text="Hour of Day", row=2, col=2)
        fig.update_yaxes(title_text="Request Count", row=2, col=2, secondary_y=False)
        fig.update_yaxes(title_text="Response Time (Hours)", row=2, col=2, secondary_y=True)

        if save_path:
            fig.write_html(save_path.replace('.png', '.html'))
            fig.write_image(save_path, width=1200, height=800, scale=2)

        return fig

    def plot_response_time_distribution(self, save_path=None):
        """
        Create professional response time distribution plot.

        Parameters:
        -----------
        save_path : str, optional
            Path to save the plot

        Returns:
        --------
        matplotlib.figure.Figure
        """
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle('Response Time Distribution Analysis', fontsize=16, fontweight='bold')

        # Overall distribution
        sns.histplot(
            data=self.df,
            x='elapsed_time_hours',
            bins=50,
            ax=axes[0,0],
            color=self.colors_primary[0],
            alpha=0.7
        )
        axes[0,0].set_title('Overall Response Time Distribution')
        axes[0,0].set_xlabel('Response Time (Hours)')
        axes[0,0].axvline(self.df['elapsed_time_hours'].median(), color='red', linestyle='--', label=f'Median: {self.df["elapsed_time_hours"].median():.1f}h')
        axes[0,0].legend()

        # Box plot by complaint type (top 10)
        top_complaints = self.df['Complaint Type'].value_counts().head(10).index
        df_top = self.df[self.df['Complaint Type'].isin(top_complaints)]

        sns.boxplot(
            data=df_top,
            y='Complaint Type',
            x='elapsed_time_hours',
            ax=axes[0,1],
            palette='husl'
        )
        axes[0,1].set_title('Response Time by Complaint Type (Top 10)')
        axes[0,1].set_xlabel('Response Time (Hours)')

        # Time series of response times
        monthly_response = self.df.groupby(self.df['created_dt'].dt.to_period('M'))['elapsed_time_hours'].mean()
        monthly_response.plot(kind='line', ax=axes[1,0], color=self.colors_primary[2], linewidth=2, marker='o')
        axes[1,0].set_title('Monthly Average Response Time Trend')
        axes[1,0].set_xlabel('Month')
        axes[1,0].set_ylabel('Avg Response Time (Hours)')
        axes[1,0].tick_params(axis='x', rotation=45)

        # SLA compliance visualization
        sla_thresholds = [24, 48, 168]  # 1 day, 2 days, 1 week
        sla_compliance = []
        labels = ['24 Hours', '48 Hours', '1 Week']

        for threshold in sla_thresholds:
            compliance = (self.df['elapsed_time_hours'] <= threshold).mean() * 100
            sla_compliance.append(compliance)

        bars = axes[1,1].bar(labels, sla_compliance, color=self.colors_primary[:3], alpha=0.8)
        axes[1,1].set_title('SLA Compliance Rates')
        axes[1,1].set_ylabel('Compliance Rate (%)')
        axes[1,1].set_ylim(0, 100)

        # Add percentage labels on bars
        for bar, pct in zip(bars, sla_compliance):
            axes[1,1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
                          f'{pct:.1f}%', ha='center', va='bottom', fontweight='bold')

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')

        return fig

    def plot_geographic_analysis(self, save_path=None):
        """
        Create professional geographic analysis visualizations.

        Parameters:
        -----------
        save_path : str, optional
            Path to save the plot

        Returns:
        --------
        matplotlib.figure.Figure
        """
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('Geographic Distribution Analysis', fontsize=16, fontweight='bold')

        # City-wise request volume
        city_counts = self.df['City'].value_counts().head(15)
        city_counts.plot(kind='barh', ax=axes[0,0], color=self.colors_primary[0])
        axes[0,0].set_title('Request Volume by City (Top 15)')
        axes[0,0].set_xlabel('Number of Requests')

        # City-wise average response time
        city_response = self.df.groupby('City')['elapsed_time_hours'].mean().sort_values(ascending=False).head(15)
        city_response.plot(kind='barh