# Drift Detector Performance Visualization

**Purpose**: Comprehensive visualization of drift detector performance across datasets

**Visualizations**:
- Pareto fronts for multi-objective optimization
- Scatter plots showing accuracy vs runtime trade-offs
- MTR vs Runtime plots for synthetic datasets
- Baseline comparisons
- Performance trends across different configurations

**Last Updated**: 2025-10-01

---

## 1. Setup and Configuration

In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from statistics import mean, stdev, median
import re
import math

# Shared configuration
from eval_config import (
    REAL_DATASETS,
    SYNTHETIC_DATASETS,
    ALL_DETECTORS,
    DETECTOR_COLORS,
    BASE_PATH,
    setup_plot_style
)

# Setup plotting
setup_plot_style()
plt.rcParams['figure.dpi'] = 200
%matplotlib inline

In [None]:
# ============================================================================
# CONFIGURATION
# ============================================================================

# Choose evaluation mode
MTR_MODE = False  # Set to True for MTR evaluation (synthetic datasets)

# Configuration based on mode
if MTR_MODE:
    datasets = SYNTHETIC_DATASETS
    metric1 = "RUNTIME"
    metric2 = "MTR"
    mode_name = "MTR"
else:
    datasets = REAL_DATASETS
    metric1 = "ACCURACY"
    metric2 = "RUNTIME"
    mode_name = "Standard"

detectors = ALL_DETECTORS
colors = DETECTOR_COLORS
base_path = BASE_PATH
classifier = "HoeffdingTreeClassifier"

print(f"Mode: {mode_name}")
print(f"Datasets: {datasets}")
print(f"Metrics: {metric1} vs {metric2}")
print(f"Detectors: {len(detectors)}")

## 2. Helper Functions

In [None]:
def is_pareto_efficient_mixed(points, maximize):
    """
    Find Pareto-efficient points when some objectives are to be maximized and others minimized.
    
    Args:
        points: numpy array of shape (n_points, n_objectives)
        maximize: list/array of bools, True if objective is to be maximized, else minimized
    
    Returns:
        Boolean array indicating whether each point is Pareto efficient
    """
    points = np.asarray(points)
    points = points.astype(float)
    maximize = np.asarray(maximize)
    
    if maximize.shape[0] != points.shape[1]:
        raise ValueError("Length of 'maximize' must match number of objectives.")
    
    # Convert maximization objectives to minimization by negating them
    adjusted_points = points.copy()
    for i, to_maximize in enumerate(maximize):
        if to_maximize:
            adjusted_points[:, i] = -adjusted_points[:, i]
    
    n_points = adjusted_points.shape[0]
    is_efficient = np.ones(n_points, dtype=bool)
    
    for i in range(n_points):
        if not is_efficient[i]:
            continue
        # A point is dominated if another point is better in all objectives
        dominates = np.all(adjusted_points <= adjusted_points[i], axis=1) & \
                   np.any(adjusted_points < adjusted_points[i], axis=1)
        is_efficient[i] = not np.any(dominates)
        if is_efficient[i]:
            dominated_by_i = np.all(adjusted_points[i] <= adjusted_points, axis=1) & \
                            np.any(adjusted_points[i] < adjusted_points, axis=1)
            is_efficient[dominated_by_i] = False
            is_efficient[i] = True
    
    return is_efficient


def pareto_front_simple(x, y):
    """
    Simple Pareto front calculation for 2D case (maximize x, minimize y).
    
    Args:
        x: Array of first objective values
        y: Array of second objective values
    
    Returns:
        Indices of Pareto-efficient points
    """
    is_dominated = np.zeros(len(x), dtype=bool)
    for i in range(len(x)):
        for j in range(len(x)):
            if (x[j] >= x[i] and y[j] <= y[i]) and (x[j] != x[i] or y[j] != y[i]):
                is_dominated[i] = True
                break
    return np.where(~is_dominated)[0]


def load_experiment_results(detector, dataset, metric1, metric2, base_path=BASE_PATH):
    """
    Load experiment results for a detector-dataset combination.
    
    Returns:
        DataFrame with results or None if not found
    """
    exp_path = f"{base_path}{detector}_{dataset}_{classifier}_{metric1}-{metric2}"
    
    if not os.path.exists(exp_path):
        return None
    
    try:
        nbr_runs = [d for d in os.listdir(exp_path) if d.isdigit()]
        if not nbr_runs:
            return None
        
        run_path = os.path.join(exp_path, max(nbr_runs), "results.csv")
        if not os.path.exists(run_path):
            return None
        
        df = pd.read_csv(run_path)
        df = df[[metric1, metric2]].dropna()
        return df
    except Exception as e:
        print(f"Error loading {detector} on {dataset}: {e}")
        return None

## 3. Pareto Front Visualization

Visualize Pareto-optimal configurations for each detector on each dataset.

In [None]:
# Plot Pareto fronts for all detectors on each dataset
plt.close('all')

for dataset in datasets:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    pareto_data = {}
    
    for dd in detectors:
        df = load_experiment_results(dd, dataset, metric1, metric2, base_path)
        
        if df is not None and len(df) > 0:
            x = df[metric1].values
            y = df[metric2].values
            
            # Calculate Pareto front
            if MTR_MODE:
                # MTR mode: maximize MTR, minimize RUNTIME
                objectives = np.vstack((x, y)).T
                maximize = [True, False]  # [MTR, RUNTIME]
                pareto_mask = is_pareto_efficient_mixed(objectives, maximize)
                pareto_points = objectives[pareto_mask]
                
                # Sort by MTR for plotting
                sorted_indices = np.argsort(pareto_points[:, 0])
                pareto_x = pareto_points[sorted_indices, 0]
                pareto_y = pareto_points[sorted_indices, 1]
            else:
                # Standard mode: maximize ACCURACY, minimize RUNTIME
                pareto_indices = pareto_front_simple(x, y)
                pareto_x = x[pareto_indices]
                pareto_y = y[pareto_indices]
                
                # Sort by accuracy for plotting
                sorted_indices = np.argsort(pareto_x)
                pareto_x = pareto_x[sorted_indices]
                pareto_y = pareto_y[sorted_indices]
            
            # Plot Pareto front
            ax.scatter(pareto_x, pareto_y, 
                      color=colors.get(dd, '#808080'),
                      label=dd,
                      alpha=0.8,
                      s=60,
                      edgecolors='black',
                      linewidth=0.5)
            
            pareto_data[dd] = (pareto_x, pareto_y)
    
    # Formatting
    ax.set_xlabel(metric1, fontsize=12)
    ax.set_ylabel(metric2, fontsize=12)
    ax.set_title(f'Pareto Fronts: {dataset} ({mode_name} Mode)', fontsize=14, pad=15)
    
    # Legend
    legend = ax.legend(ncol=3, fontsize=9, loc='best')
    for handle in legend.legend_handles:
        handle.set_sizes([50.0])
        handle.set_alpha(1.0)
    
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print(f"\nPareto front sizes for {dataset}:")
    for dd, (px, py) in pareto_data.items():
        print(f"  {dd}: {len(px)} points")

## 4. Full Scatter Plots

Show all evaluated configurations (not just Pareto front) for selected detectors.

In [None]:
# Select specific detectors to visualize in detail
selected_detectors = ["CSDDM", "BNDM", "D3", "IBDD", "OCDD", "SPLL"]

for dataset in datasets[:3]:  # Show first 3 datasets
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for dd in selected_detectors:
        df = load_experiment_results(dd, dataset, metric1, metric2, base_path)
        
        if df is not None and len(df) > 0:
            x = df[metric1].values
            y = df[metric2].values
            
            # Plot all points
            ax.scatter(x, y,
                      color=colors.get(dd, '#808080'),
                      label=dd,
                      alpha=0.5,
                      s=30)
    
    ax.set_xlabel(metric1, fontsize=12)
    ax.set_ylabel(metric2, fontsize=12)
    ax.set_title(f'All Configurations: {dataset}', fontsize=14, pad=15)
    ax.legend(ncol=2, fontsize=10)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 5. Single Detector Analysis

Detailed view of a single detector's performance across configurations.

In [None]:
# Select a detector to analyze in detail
target_detector = "CSDDM"  # Change this to analyze different detectors
target_dataset = datasets[0]  # First dataset

df = load_experiment_results(target_detector, target_dataset, metric1, metric2, base_path)

if df is not None and len(df) > 0:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    x = df[metric1].values
    y = df[metric2].values
    
    # Scatter plot
    scatter = ax.scatter(x, y, c=range(len(x)), cmap='viridis', alpha=0.6, s=50)
    
    # Colorbar showing evaluation order
    cbar = plt.colorbar(scatter, ax=ax)
    cbar.set_label('Evaluation Order', rotation=270, labelpad=20)
    
    ax.set_xlabel(metric1, fontsize=12)
    ax.set_ylabel(metric2, fontsize=12)
    ax.set_title(f'{target_detector} on {target_dataset}\n{len(df)} Configurations Evaluated',
                fontsize=14, pad=15)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Statistics
    print(f"\n{target_detector} on {target_dataset} Statistics:")
    print(f"  {metric1}: {x.min():.3f} - {x.max():.3f} (mean: {x.mean():.3f})")
    print(f"  {metric2}: {y.min():.1f} - {y.max():.1f} (mean: {y.mean():.1f})")
else:
    print(f"No data found for {target_detector} on {target_dataset}")

## 6. Best Configuration Comparison

Compare the best configuration of each detector across datasets.

In [None]:
# Collect best results for each detector on each dataset
best_results = {}

for dataset in datasets:
    best_results[dataset] = {}
    
    for dd in detectors:
        df = load_experiment_results(dd, dataset, metric1, metric2, base_path)
        
        if df is not None and len(df) > 0:
            # Find best configuration (highest metric1, lowest metric2)
            if MTR_MODE:
                # For MTR: maximize MTR, minimize RUNTIME
                # Use simple scoring: normalize and combine
                score = (df[metric1] - df[metric1].min()) / (df[metric1].max() - df[metric1].min() + 1e-10) - \
                       (df[metric2] - df[metric2].min()) / (df[metric2].max() - df[metric2].min() + 1e-10)
            else:
                # For standard: maximize ACCURACY, minimize RUNTIME
                score = (df[metric1] - df[metric1].min()) / (df[metric1].max() - df[metric1].min() + 1e-10) - \
                       (df[metric2] - df[metric2].min()) / (df[metric2].max() - df[metric2].min() + 1e-10)
            
            best_idx = score.argmax()
            best_results[dataset][dd] = {
                metric1: df[metric1].iloc[best_idx],
                metric2: df[metric2].iloc[best_idx]
            }

# Plot comparison
for dataset in datasets:
    if not best_results[dataset]:
        continue
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    dds_with_results = list(best_results[dataset].keys())
    x_vals = [best_results[dataset][dd][metric1] for dd in dds_with_results]
    y_vals = [best_results[dataset][dd][metric2] for dd in dds_with_results]
    colors_list = [colors.get(dd, '#808080') for dd in dds_with_results]
    
    ax.scatter(x_vals, y_vals, c=colors_list, s=100, alpha=0.7, edgecolors='black', linewidth=1)
    
    # Add labels
    for dd, x, y in zip(dds_with_results, x_vals, y_vals):
        ax.annotate(dd, (x, y), fontsize=8, alpha=0.7,
                   xytext=(5, 5), textcoords='offset points')
    
    ax.set_xlabel(metric1, fontsize=12)
    ax.set_ylabel(metric2, fontsize=12)
    ax.set_title(f'Best Configurations: {dataset}', fontsize=14, pad=15)
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 7. Performance Heatmap

Heatmap showing best metric1 value for each detector-dataset combination.

In [None]:
# Create performance matrix
performance_matrix = np.zeros((len(detectors), len(datasets)))

for i, dd in enumerate(detectors):
    for j, dataset in enumerate(datasets):
        if dataset in best_results and dd in best_results[dataset]:
            performance_matrix[i, j] = best_results[dataset][dd][metric1]
        else:
            performance_matrix[i, j] = np.nan

# Plot heatmap
fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(performance_matrix,
           xticklabels=datasets,
           yticklabels=detectors,
           annot=True,
           fmt='.2f',
           cmap='RdYlGn',
           cbar_kws={'label': f'Best {metric1}'},
           ax=ax)

ax.set_title(f'Best {metric1} by Detector and Dataset', fontsize=14, pad=20)
ax.set_xlabel('Dataset', fontsize=12)
ax.set_ylabel('Drift Detector', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

## 8. Summary Statistics

In [None]:
# Calculate summary statistics
summary_data = []

for dd in detectors:
    metric1_values = []
    metric2_values = []
    
    for dataset in datasets:
        if dataset in best_results and dd in best_results[dataset]:
            metric1_values.append(best_results[dataset][dd][metric1])
            metric2_values.append(best_results[dataset][dd][metric2])
    
    if metric1_values:
        summary_data.append({
            'Detector': dd,
            f'{metric1} Mean': np.mean(metric1_values),
            f'{metric1} Std': np.std(metric1_values),
            f'{metric2} Mean': np.mean(metric2_values),
            f'{metric2} Std': np.std(metric2_values),
            'Datasets': len(metric1_values)
        })

summary_df = pd.DataFrame(summary_data)
summary_df = summary_df.sort_values(f'{metric1} Mean', ascending=False)

print(f"\n{'='*80}")
print(f"SUMMARY: Best Configuration Performance ({mode_name} Mode)")
print(f"{'='*80}")
print(summary_df.to_string(index=False, float_format='%.3f'))
print(f"\nNote: Statistics computed across {len(datasets)} datasets")

## 9. Export Results

Save best configurations to CSV for further analysis.

In [None]:
# Flatten best_results for export
export_data = []

for dataset in datasets:
    for dd in detectors:
        if dataset in best_results and dd in best_results[dataset]:
            export_data.append({
                'Dataset': dataset,
                'Detector': dd,
                metric1: best_results[dataset][dd][metric1],
                metric2: best_results[dataset][dd][metric2]
            })

export_df = pd.DataFrame(export_data)
output_file = f'best_configurations_{mode_name.lower()}.csv'
export_df.to_csv(output_file, index=False)

print(f"\nBest configurations exported to: {output_file}")
print(f"Total configurations: {len(export_df)}")
print(f"\nFirst few rows:")
print(export_df.head(10).to_string(index=False))