#  Interactive Metrics Dashboard
# Purpose: Real-time interactive dashboard para monitoring del pipeline
# Features:
# - Live metrics refreshing
# - Interactive filters
# - Drill-down capabilities
# - Export functionality

In [1]:
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output

print("‚úì Dashboard initialized")

‚úì Dashboard initialized


#  Auto-Refresh Metrics

In [33]:
class MetricsDashboard:
    """Interactive dashboard with auto-refresh."""
    
    def __init__(self):
        self.metrics = None
        self.events_df = None
        self.last_update = None
    
    def load_data(self):
        """Load latest metrics and events."""
        try:
            metrics_file = Path('../data/metrics/latest_metrics.json')
            if metrics_file.exists():
                with open(metrics_file) as f:
                    self.metrics = json.load(f)
            
            curated_dir = Path('../data/curated')
            if curated_dir.exists():
                latest_file = max(curated_dir.glob('*.json'), key=lambda x: x.stat().st_mtime)
                with open(latest_file) as f:
                    events = json.load(f)
                self.events_df = pd.DataFrame(events)
                self.events_df['created_at'] = pd.to_datetime(self.events_df['created_at'])
            
            self.last_update = datetime.now()
            return True
            
        except Exception as e:
            print(f"Error loading data: {e}")
            return False
        
    def create_summary_cards(self):
        """Create KPI summary cards."""
        if not self.metrics:
            return None
        
        summary = self.metrics['summary']
        
        fig = make_subplots(
            rows=1, cols=4,
            subplot_titles=('Total Events','Repositories','Contributors','Event Types'),
            specs=[[{'type': 'indicator'}]*4]
        )
        
        # KPI 1
        fig.add_trace(go.Indicator(mode="number", value=summary['total_events'],
                                   number={'valueformat': ','},
                                   domain={'x': [0, 1], 'y': [0, 1]}), row=1, col=1)
        # KPI 2
        fig.add_trace(go.Indicator(mode="number", value=len(self.metrics['repos']),
                                   number={'valueformat': ','},
                                   domain={'x': [0, 1], 'y': [0, 1]}), row=1, col=2)
        # KPI 3
        fig.add_trace(go.Indicator(mode="number", value=len(self.metrics['contributors']),
                                   number={'valueformat': ','},
                                   domain={'x': [0, 1], 'y': [0, 1]}), row=1, col=3)
        # KPI 4
        fig.add_trace(go.Indicator(mode="number", value=len(self.metrics['event_types']),
                                   number={'valueformat': ','},
                                   domain={'x': [0, 1], 'y': [0, 1]}), row=1, col=4)
        
        fig.update_layout(height=200, title_text=f"üìä Pipeline Metrics Overview (Updated: {self.last_update.strftime('%H:%M:%S')})", showlegend=False)
        return fig
    
    def create_comprehensive_dashboard(self):
        """Create full dashboard with multiple charts."""
        if not self.metrics or self.events_df is None:
            print("‚ö†Ô∏è  No data available. Run the pipeline first.")
            return None

# Initialize dashboard

In [34]:
dashboard = MetricsDashboard()
print("‚úì Dashboard class initialized")

‚úì Dashboard class initialized


#  Display Dashboard

In [35]:
if dashboard.load_data():
    print("‚úì Data loaded successfully")
    
    # Show KPI cards
    kpi_fig = dashboard.create_summary_cards()
    if kpi_fig:
        kpi_fig.show()
    
    # Show comprehensive dashboard
    main_fig = dashboard.create_comprehensive_dashboard()
    if main_fig:
        main_fig.show()
else:
    print("‚ö†Ô∏è  Failed to load data. Make sure the pipeline has run at least once.")

‚úì Data loaded successfully


Bad pipe message: %s [b'"Google Chrome";v="143", "Chromium";v="143", "Not A(Brand']
Bad pipe message: %s [b'v="24"\r\nsec-ch-ua-mobile: ?0\r\nsec', b'h-ua-platform: "Windows"\r\nUpgrade-Insecure-R', b'uests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Geck', b' Chrome/143.0.0.0 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,imag', b'webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\r\nSec-Fetch-Site: none\r\nSec-Fetch-Mo', b': navigate\r\nSec-Fetch-User: ?1\r\nSec-Fetch-Dest: document\r\nAccept-Encoding: gzip, deflate, br, zstd\r', b'ccept-Lan', b'age: es-ES,es;q=0.9,en;q=0.8\r\nCookie: _xsrf=2|723ff012|1f1b91adc91a221971b9ed6100b07d87|1768257768; us', b'name-127-0-0-1-8888="2|1:0|10:1768257838|23:username-127-0-0-1-8888|200:eyJ1c2VybmFtZSI6ICIxNWJiZTA3']
Bad pipe message: %s [b'dlYjc0MTA0YjA2ZTIwNmJmMGJkY2QwNiIsICJuYW1lIjogIkFub255bW91cyBMeXNpdGhlYSIsICJk']
Bad pipe message: %s [b'

In [7]:
#  Interactive Filters

if dashboard.events_df is not None:
    # Date range selector
    min_date = dashboard.events_df['created_at'].min().date()
    max_date = dashboard.events_df['created_at'].max().date()
    date_range = widgets.SelectionRangeSlider(
        options=pd.date_range(min_date, max_date).tolist(),
        index=(0, len(pd.date_range(min_date, max_date)) - 1),
        description='Date Range:',
        layout=widgets.Layout(width='80%')
    )
    
    # Event type selector
    event_types = dashboard.events_df['event_type'].unique().tolist()
    event_type_selector = widgets.SelectMultiple(options=event_types, value=event_types, description='Event Types:')
    
    # Repository selector
    repos = dashboard.events_df['repo_name'].unique().tolist()
    repo_selector = widgets.SelectMultiple(options=repos, value=repos[:min(5,len(repos))], description='Repositories:')
    
    # Refresh button
    refresh_button = widgets.Button(description='üîÑ Refresh Data', button_style='success', tooltip='Reload metrics from disk')
    
    output = widgets.Output()
    
    # Update function
    def on_filter_change(*args):
        with output:
            clear_output(wait=True)
            
            filtered_df = dashboard.events_df[
                (dashboard.events_df['event_type'].isin(event_type_selector.value)) &
                (dashboard.events_df['repo_name'].isin(repo_selector.value))
            ]
            
            if len(filtered_df) == 0:
                print("‚ö†Ô∏è  No data matches the selected filters")
                return
            
            # Filtered chart
            fig = px.bar(
                filtered_df.groupby('event_type').size().reset_index(),
                x='event_type', y=0,
                title=f'Filtered Events ({len(filtered_df):,} total)',
                labels={'0':'Count','event_type':'Event Type'}
            )
            fig.show()
            
            # Summary stats
            print(f"\n Filtered Summary:")
            print(f"   Total events: {len(filtered_df):,}")
            print(f"   Repositories: {filtered_df['repo_name'].nunique()}")
            print(f"   Contributors: {filtered_df['actor_login'].nunique()}")
    
    # Refresh handler
    def on_refresh_click(b):
        with output:
            clear_output(wait=True)
            print("üîÑ Refreshing data...")
            if dashboard.load_data():
                print(f"‚úì Data refreshed at {dashboard.last_update.strftime('%H:%M:%S')}")
                on_filter_change()
            else:
                print("‚ö†Ô∏è  Failed to refresh data")
    
    # Connect widgets
    event_type_selector.observe(on_filter_change, 'value')
    repo_selector.observe(on_filter_change, 'value')
    refresh_button.on_click(on_refresh_click)
    
    # Display widgets
    print("\n  Interactive Filters:")
    display(widgets.VBox([event_type_selector, repo_selector, refresh_button, output]))
    
    # Initial render
    on_filter_change()


üéõÔ∏è  Interactive Filters:


VBox(children=(SelectMultiple(description='Event Types:', index=(0, 1, 2), options=('ForkEvent', 'PullRequestE‚Ä¶

In [8]:
# -----------------------------
# üì• Export Dashboard
# -----------------------------
if dashboard.metrics:
    export_button = widgets.Button(description='üì• Export HTML', button_style='info', tooltip='Export dashboard to HTML file')
    export_output = widgets.Output()
    
    def on_export_click(b):
        with export_output:
            clear_output(wait=True)
            try:
                fig = dashboard.create_comprehensive_dashboard()
                
                output_dir = Path('../data/exports')
                output_dir.mkdir(exist_ok=True)
                
                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
                output_file = output_dir / f'dashboard_{timestamp}.html'
                
                fig.write_html(str(output_file))
                
                print(f"‚úì Dashboard exported to: {output_file}")
                print(f"  File size: {output_file.stat().st_size / 1024:.1f} KB")
                print(f"  Open in browser: file://{output_file.absolute()}")
                
            except Exception as e:
                print(f"‚ö†Ô∏è  Export failed: {e}")
    
    export_button.on_click(on_export_click)
    display(widgets.VBox([export_button, export_output]))

VBox(children=(Button(button_style='info', description='üì• Export HTML', style=ButtonStyle(), tooltip='Export d‚Ä¶

In [9]:
# -----------------------------
# üîî Pipeline Status Check
# -----------------------------
def check_pipeline_health():
    """Check pipeline health and data freshness."""
    print("\n PIPELINE HEALTH CHECK")
    print("=" * 60)
    
    # Metrics file
    metrics_file = Path('../data/metrics/latest_metrics.json')
    if metrics_file.exists():
        print("‚úì Metrics file exists")
        age_minutes = (datetime.now().timestamp() - metrics_file.stat().st_mtime)/60
        print(f"  Age: {age_minutes:.1f} minutes")
        if age_minutes > 60:
            print("  ‚ö†Ô∏è  Metrics are over 1 hour old")
    else:
        print("‚ö†Ô∏è  Metrics file not found")
    
    # Checkpoint
    checkpoint_file = Path('../data/state/checkpoint.json')
    if checkpoint_file.exists():
        print("\n‚úì Checkpoint file exists")
        with open(checkpoint_file) as f:
            checkpoint = json.load(f)
        print(f"  Last processed: {checkpoint.get('last_processed_timestamp')}")
        print(f"  Status: {checkpoint.get('status')}")
        print(f"  Records: {checkpoint.get('records_processed'):,}")
    else:
        print("\n‚ö†Ô∏è  Checkpoint file not found")
    
    # Run history
    run_history_file = Path('../data/state/run_history.jsonl')
    if run_history_file.exists():
        with open(run_history_file) as f:
            runs = [json.loads(line) for line in f]
        print(f"\n‚úì Run history: {len(runs)} executions")
        if runs:
            last_run = runs[-1]
            print(f"  Last run: {last_run['status']}")
            print(f"  Duration: {last_run['duration_seconds']:.2f}s")
            success_count = sum(1 for r in runs if r['status'] == 'success')
            success_rate = success_count / len(runs) * 100
            print(f"  Success rate: {success_rate:.1f}% ({success_count}/{len(runs)})")
    else:
        print("\n‚ö†Ô∏è  Run history not found")
    
    print("\n" + "="*60)

check_pipeline_health()


üè• PIPELINE HEALTH CHECK
‚úì Metrics file exists
  Age: 65.4 minutes
  ‚ö†Ô∏è  Metrics are over 1 hour old

‚úì Checkpoint file exists
  Last processed: 2026-01-12T22:04:05+00:00
  Status: success
  Records: 14

‚úì Run history: 1 executions
  Last run: success
  Duration: 2.01s
  Success rate: 100.0% (1/1)

