# Final Results Analysis

This notebook creates the final deliverable visualizations:
- Accuracy vs dataset size Pareto curve
- Accuracy comparison (GA vs random baselines)
- Training efficiency plots (accuracy per sample)
- Summary tables


In [None]:
import sys
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Add parent directory to path
sys.path.insert(0, str(Path().absolute().parent))
import config

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)


In [None]:
# Load evaluation results
eval_path = config.RESULTS_DIR / "evaluation.json"
if eval_path.exists():
    with open(eval_path, 'r') as f:
        eval_results = json.load(f)
    print(f"Loaded evaluation results from {eval_path}")
else:
    print(f"Evaluation results not found at {eval_path}")
    print("Run 'python training/evaluate_models.py' first.")
    eval_results = None


## Accuracy vs Dataset Size Pareto Curve


In [None]:
if eval_results:
    # Extract data
    k_values = []
    ga_accs = []
    random_mean_accs = []
    random_std_accs = []
    
    for k in config.K_VALUES:
        ga_key = f'ga_k{k}'
        random_key = f'random_k{k}'
        
        if ga_key in eval_results['results']:
            k_values.append(k)
            ga_accs.append(eval_results['results'][ga_key]['test_accuracy'])
            
            if random_key in eval_results['results']:
                random_mean_accs.append(eval_results['results'][random_key]['mean_accuracy'])
                random_std_accs.append(eval_results['results'][random_key]['std_accuracy'])
            else:
                random_mean_accs.append(None)
                random_std_accs.append(None)
    
    # Full dataset accuracy
    full_acc = None
    if 'full_dataset' in eval_results['results']:
        full_acc = eval_results['results']['full_dataset']['test_accuracy']
    
    # Plot
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # GA-selected
    ax.plot(k_values, ga_accs, 'o-', linewidth=3, markersize=10, 
            label='GA-Selected', color='steelblue', zorder=3)
    
    # Random baseline
    valid_k = [k for k, acc in zip(k_values, random_mean_accs) if acc is not None]
    valid_mean = [acc for acc in random_mean_accs if acc is not None]
    valid_std = [std for std, acc in zip(random_std_accs, random_mean_accs) if acc is not None]
    
    if valid_k:
        ax.errorbar(valid_k, valid_mean, yerr=valid_std, fmt='s-', 
                   linewidth=2, markersize=8, capsize=5, capthick=2,
                   label='Random Baseline (mean ± std)', color='orange', zorder=2)
    
    # Full dataset (horizontal line)
    if full_acc is not None:
        ax.axhline(y=full_acc, color='red', linestyle='--', linewidth=2, 
                  label=f'Full Dataset ({full_acc:.2f}%)', zorder=1)
    
    ax.set_xlabel('Subset Size (k)', fontsize=13, fontweight='bold')
    ax.set_ylabel('Test Accuracy (%)', fontsize=13, fontweight='bold')
    ax.set_title('Accuracy vs Dataset Size', fontsize=16, fontweight='bold')
    ax.legend(fontsize=11, loc='best')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('results/accuracy_vs_size.png', dpi=150, bbox_inches='tight')
    plt.show()


## Training Efficiency (Accuracy per Sample)


In [None]:
if eval_results:
    # Extract efficiency data
    k_values = []
    ga_efficiencies = []
    random_mean_efficiencies = []
    
    for k in config.K_VALUES:
        ga_key = f'ga_k{k}'
        random_key = f'random_k{k}'
        
        if ga_key in eval_results['results']:
            k_values.append(k)
            ga_eff = eval_results['results'][ga_key].get('training_efficiency')
            ga_efficiencies.append(ga_eff if ga_eff is not None else 0)
            
            if random_key in eval_results['results']:
                random_eff = eval_results['results'][random_key].get('mean_efficiency')
                random_mean_efficiencies.append(random_eff if random_eff is not None else 0)
            else:
                random_mean_efficiencies.append(0)
    
    # Plot
    fig, ax = plt.subplots(figsize=(12, 8))
    
    ax.plot(k_values, ga_efficiencies, 'o-', linewidth=3, markersize=10, 
            label='GA-Selected', color='steelblue', zorder=3)
    
    valid_k = [k for k, eff in zip(k_values, random_mean_efficiencies) if eff > 0]
    valid_eff = [eff for eff in random_mean_efficiencies if eff > 0]
    
    if valid_k:
        ax.plot(valid_k, valid_eff, 's-', linewidth=2, markersize=8,
               label='Random Baseline (mean)', color='orange', zorder=2)
    
    ax.set_xlabel('Subset Size (k)', fontsize=13, fontweight='bold')
    ax.set_ylabel('Training Efficiency (Accuracy / k)', fontsize=13, fontweight='bold')
    ax.set_title('Training Efficiency vs Dataset Size', fontsize=16, fontweight='bold')
    ax.legend(fontsize=11, loc='best')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('results/training_efficiency.png', dpi=150, bbox_inches='tight')
    plt.show()


## Summary Table


In [None]:
if eval_results:
    # Create summary table
    rows = []
    
    for k in config.K_VALUES:
        ga_key = f'ga_k{k}'
        random_key = f'random_k{k}'
        
        row = {'k': k}
        
        if ga_key in eval_results['results']:
            ga_result = eval_results['results'][ga_key]
            row['GA Accuracy'] = f"{ga_result['test_accuracy']:.2f}%"
            row['GA Efficiency'] = f"{ga_result.get('training_efficiency', 0):.4f}"
            row['GA Macro F1'] = f"{ga_result['f1_scores']['macro']:.4f}"
        else:
            row['GA Accuracy'] = 'N/A'
            row['GA Efficiency'] = 'N/A'
            row['GA Macro F1'] = 'N/A'
        
        if random_key in eval_results['results']:
            random_result = eval_results['results'][random_key]
            row['Random Accuracy'] = f"{random_result['mean_accuracy']:.2f}% ± {random_result['std_accuracy']:.2f}%"
            row['Random Efficiency'] = f"{random_result.get('mean_efficiency', 0):.4f}"
        else:
            row['Random Accuracy'] = 'N/A'
            row['Random Efficiency'] = 'N/A'
        
        rows.append(row)
    
    # Full dataset row
    if 'full_dataset' in eval_results['results']:
        full_result = eval_results['results']['full_dataset']
        rows.append({
            'k': 'Full',
            'GA Accuracy': 'N/A',
            'GA Efficiency': 'N/A',
            'GA Macro F1': 'N/A',
            'Random Accuracy': 'N/A',
            'Random Efficiency': 'N/A',
            'Full Accuracy': f"{full_result['test_accuracy']:.2f}%",
            'Full Macro F1': f"{full_result['f1_scores']['macro']:.4f}"
        })
    
    # Create DataFrame and display
    df = pd.DataFrame(rows)
    print("\n" + "="*80)
    print("SUMMARY TABLE")
    print("="*80)
    print(df.to_string(index=False))
    print("="*80)
    
    # Save to CSV
    df.to_csv('results/summary_table.csv', index=False)
    print("\nSaved summary table to results/summary_table.csv")
