# üî¨ Notebook 6: Advanced Analysis (Going Beyond)

## Military Object Detection - Deep Dive Analysis

This notebook goes beyond standard metrics to provide advanced insights into model behavior.

### Objectives:
1. **Error Taxonomy**: Classify detection errors
2. **Confidence Calibration**: Analyze prediction reliability
3. **Object Size Analysis**: Performance by object scale
4. **Attention Visualization**: Understand model focus areas
5. **Class Similarity Analysis**: Which classes are confused
6. **Robustness Analysis**: Edge case identification

---

## 1. Setup & Imports

In [None]:
# Standard imports
import os
import sys
from pathlib import Path
import json
import warnings
from collections import Counter, defaultdict

# Data manipulation
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from PIL import Image

# Computer Vision
import cv2

# YAML
import yaml

# Deep Learning
import torch
import torch.nn.functional as F
from ultralytics import YOLO

# Metrics
from sklearn.metrics import confusion_matrix
from scipy import stats
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform

# Progress
from tqdm.notebook import tqdm

# Suppress warnings
warnings.filterwarnings('ignore')

# Plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

print("‚úÖ All imports successful!")

In [None]:
# Define paths
PROJECT_ROOT = Path('..')
DATASET_ROOT = PROJECT_ROOT / 'military_object_dataset'
CONFIG_DIR = PROJECT_ROOT / 'config'
MODELS_DIR = PROJECT_ROOT / 'models'
RESULTS_DIR = PROJECT_ROOT / 'results'
FIGURES_DIR = PROJECT_ROOT / 'figures'

# Dataset paths
VAL_IMAGES = DATASET_ROOT / 'val' / 'images'
VAL_LABELS = DATASET_ROOT / 'val' / 'labels'

# Load configuration
with open(CONFIG_DIR / 'dataset.yaml', 'r') as f:
    dataset_config = yaml.safe_load(f)

CLASS_NAMES = dataset_config['names']
NUM_CLASSES = dataset_config['nc']

print(f"üìã Loaded configuration with {NUM_CLASSES} classes")

In [None]:
# Load best model
best_model_path = MODELS_DIR / 'best_model.pt'

if not best_model_path.exists():
    runs_dir = PROJECT_ROOT / 'runs' / 'detect'
    if runs_dir.exists():
        for exp_dir in sorted(runs_dir.iterdir(), reverse=True):
            candidate = exp_dir / 'weights' / 'best.pt'
            if candidate.exists():
                best_model_path = candidate
                break

if best_model_path.exists():
    model = YOLO(str(best_model_path))
    print(f"‚úÖ Loaded model: {best_model_path}")
else:
    print("‚ö†Ô∏è No trained model found. Using pretrained for demo.")
    model = YOLO('yolov8n.pt')

## 2. Error Taxonomy Analysis

In [None]:
def calculate_iou(box1, box2):
    """
    Calculate IoU between two boxes [x1, y1, x2, y2].
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union = area1 + area2 - intersection
    
    return intersection / union if union > 0 else 0


def classify_errors(gt_boxes, gt_classes, pred_boxes, pred_classes, pred_confs, iou_threshold=0.5):
    """
    Classify detection errors into categories:
    - True Positive: Correct detection
    - Localization Error: Right class, wrong box (IoU 0.1-0.5)
    - Classification Error: Wrong class, right location
    - Duplicate Detection: Multiple detections of same object
    - Background Error: Detection on background
    - Missed Detection: Ground truth not detected
    """
    errors = {
        'true_positive': [],
        'localization_error': [],
        'classification_error': [],
        'duplicate': [],
        'background_error': [],
        'missed': []
    }
    
    gt_matched = [False] * len(gt_boxes)
    pred_assigned = [False] * len(pred_boxes)
    
    # Match predictions to ground truth
    for pi, (pred_box, pred_cls, pred_conf) in enumerate(zip(pred_boxes, pred_classes, pred_confs)):
        best_iou = 0
        best_gt_idx = -1
        
        for gi, (gt_box, gt_cls) in enumerate(zip(gt_boxes, gt_classes)):
            iou = calculate_iou(pred_box, gt_box)
            if iou > best_iou:
                best_iou = iou
                best_gt_idx = gi
        
        if best_iou >= iou_threshold:
            if pred_cls == gt_classes[best_gt_idx]:
                if gt_matched[best_gt_idx]:
                    errors['duplicate'].append({
                        'pred_idx': pi, 'gt_idx': best_gt_idx,
                        'iou': best_iou, 'conf': pred_conf
                    })
                else:
                    errors['true_positive'].append({
                        'pred_idx': pi, 'gt_idx': best_gt_idx,
                        'iou': best_iou, 'conf': pred_conf
                    })
                    gt_matched[best_gt_idx] = True
            else:
                errors['classification_error'].append({
                    'pred_idx': pi, 'gt_idx': best_gt_idx,
                    'pred_cls': pred_cls, 'gt_cls': gt_classes[best_gt_idx],
                    'iou': best_iou, 'conf': pred_conf
                })
            pred_assigned[pi] = True
            
        elif best_iou >= 0.1:
            errors['localization_error'].append({
                'pred_idx': pi, 'gt_idx': best_gt_idx,
                'iou': best_iou, 'conf': pred_conf
            })
            pred_assigned[pi] = True
        else:
            errors['background_error'].append({
                'pred_idx': pi, 'conf': pred_conf
            })
    
    # Find missed detections
    for gi, matched in enumerate(gt_matched):
        if not matched:
            errors['missed'].append({
                'gt_idx': gi, 'gt_cls': gt_classes[gi]
            })
    
    return errors

In [None]:
def parse_yolo_label(label_path, img_size=640):
    """Parse YOLO format label file."""
    boxes = []
    classes = []
    
    if not label_path.exists():
        return boxes, classes
    
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                cls = int(parts[0])
                x_center = float(parts[1]) * img_size
                y_center = float(parts[2]) * img_size
                width = float(parts[3]) * img_size
                height = float(parts[4]) * img_size
                
                x1 = x_center - width / 2
                y1 = y_center - height / 2
                x2 = x_center + width / 2
                y2 = y_center + height / 2
                
                boxes.append([x1, y1, x2, y2])
                classes.append(cls)
    
    return boxes, classes

In [None]:
# Collect error statistics
print("üîç Analyzing detection errors...")

all_errors = {
    'true_positive': 0,
    'localization_error': 0,
    'classification_error': 0,
    'duplicate': 0,
    'background_error': 0,
    'missed': 0
}

classification_errors_detail = []  # (gt_class, pred_class)
missed_by_class = Counter()

val_images = list(VAL_IMAGES.glob('*.jpg'))[:300]  # Sample for speed

for img_path in tqdm(val_images, desc="Analyzing"):
    # Get ground truth
    label_path = VAL_LABELS / f"{img_path.stem}.txt"
    gt_boxes, gt_classes = parse_yolo_label(label_path)
    
    if len(gt_boxes) == 0:
        continue
    
    # Get predictions
    results = model.predict(str(img_path), conf=0.25, verbose=False)
    
    if len(results[0].boxes) > 0:
        pred_boxes = results[0].boxes.xyxy.cpu().numpy().tolist()
        pred_classes = results[0].boxes.cls.cpu().numpy().astype(int).tolist()
        pred_confs = results[0].boxes.conf.cpu().numpy().tolist()
    else:
        pred_boxes, pred_classes, pred_confs = [], [], []
    
    # Classify errors
    errors = classify_errors(gt_boxes, gt_classes, pred_boxes, pred_classes, pred_confs)
    
    for error_type, error_list in errors.items():
        all_errors[error_type] += len(error_list)
        
        if error_type == 'classification_error':
            for err in error_list:
                classification_errors_detail.append((err['gt_cls'], err['pred_cls']))
        
        if error_type == 'missed':
            for err in error_list:
                missed_by_class[err['gt_cls']] += 1

In [None]:
# Visualize error distribution
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Error type distribution
error_types = list(all_errors.keys())
error_counts = list(all_errors.values())
colors = ['#2ecc71', '#f39c12', '#e74c3c', '#9b59b6', '#e67e22', '#3498db']

bars = axes[0].bar(error_types, error_counts, color=colors)
axes[0].set_xlabel('Error Type', fontsize=12)
axes[0].set_ylabel('Count', fontsize=12)
axes[0].set_title('Detection Error Taxonomy', fontsize=14, fontweight='bold')
axes[0].tick_params(axis='x', rotation=45)

for bar, count in zip(bars, error_counts):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5,
                 str(count), ha='center', fontsize=10)

# Pie chart
total = sum(error_counts)
percentages = [c/total*100 for c in error_counts]

axes[1].pie(error_counts, labels=error_types, autopct='%1.1f%%', colors=colors,
            explode=[0.05 if t == 'true_positive' else 0 for t in error_types])
axes[1].set_title('Error Distribution', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.savefig('../figures/23_error_taxonomy.png', dpi=150, bbox_inches='tight')
plt.show()

print("üíæ Figure saved to: figures/23_error_taxonomy.png")

In [None]:
# Missed detections by class
fig, ax = plt.subplots(figsize=(12, 6))

missed_classes = [CLASS_NAMES[i] for i in sorted(missed_by_class.keys())]
missed_counts = [missed_by_class[i] for i in sorted(missed_by_class.keys())]

bars = ax.barh(missed_classes, missed_counts, color='#e74c3c')
ax.set_xlabel('Number of Missed Detections', fontsize=12)
ax.set_ylabel('Class', fontsize=12)
ax.set_title('Missed Detections by Class', fontsize=14, fontweight='bold')

for bar, count in zip(bars, missed_counts):
    ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
            str(count), va='center', fontsize=10)

plt.tight_layout()
plt.savefig('../figures/24_missed_by_class.png', dpi=150, bbox_inches='tight')
plt.show()

print("üíæ Figure saved to: figures/24_missed_by_class.png")

## 3. Confidence Calibration Analysis

In [None]:
def collect_calibration_data(model, images_dir, labels_dir, n_samples=200):
    """
    Collect data for calibration analysis.
    """
    data = {
        'confidence': [],
        'correct': [],
        'class_id': []
    }
    
    image_files = list(images_dir.glob('*.jpg'))[:n_samples]
    
    for img_path in tqdm(image_files, desc="Collecting calibration data"):
        label_path = labels_dir / f"{img_path.stem}.txt"
        gt_boxes, gt_classes = parse_yolo_label(label_path)
        
        results = model.predict(str(img_path), conf=0.1, verbose=False)
        
        if len(results[0].boxes) > 0:
            pred_boxes = results[0].boxes.xyxy.cpu().numpy()
            pred_classes = results[0].boxes.cls.cpu().numpy().astype(int)
            pred_confs = results[0].boxes.conf.cpu().numpy()
            
            for pbox, pcls, pconf in zip(pred_boxes, pred_classes, pred_confs):
                # Check if this prediction is correct
                is_correct = False
                for gbox, gcls in zip(gt_boxes, gt_classes):
                    if calculate_iou(pbox, gbox) >= 0.5 and pcls == gcls:
                        is_correct = True
                        break
                
                data['confidence'].append(float(pconf))
                data['correct'].append(int(is_correct))
                data['class_id'].append(int(pcls))
    
    return pd.DataFrame(data)

In [None]:
# Collect calibration data
print("üîç Analyzing confidence calibration...")
calibration_df = collect_calibration_data(model, VAL_IMAGES, VAL_LABELS, n_samples=200)

In [None]:
# Calculate calibration curve
def calculate_calibration_curve(df, n_bins=10):
    """Calculate expected accuracy vs confidence."""
    bins = np.linspace(0, 1, n_bins + 1)
    
    mean_confidence = []
    mean_accuracy = []
    counts = []
    
    for i in range(n_bins):
        mask = (df['confidence'] >= bins[i]) & (df['confidence'] < bins[i+1])
        if mask.sum() > 0:
            mean_confidence.append(df.loc[mask, 'confidence'].mean())
            mean_accuracy.append(df.loc[mask, 'correct'].mean())
            counts.append(mask.sum())
    
    return mean_confidence, mean_accuracy, counts

In [None]:
# Calculate and plot calibration curve
conf, acc, counts = calculate_calibration_curve(calibration_df)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Calibration curve
axes[0].plot([0, 1], [0, 1], 'k--', label='Perfect calibration', linewidth=2)
axes[0].scatter(conf, acc, s=[c/2 for c in counts], c='#3498db', alpha=0.7, label='Model')
axes[0].plot(conf, acc, 'b-', alpha=0.5)
axes[0].set_xlabel('Mean Predicted Confidence', fontsize=12)
axes[0].set_ylabel('Fraction of True Positives', fontsize=12)
axes[0].set_title('Calibration Curve', fontsize=14, fontweight='bold')
axes[0].set_xlim([0, 1])
axes[0].set_ylim([0, 1])
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Reliability diagram with histogram
axes[1].bar(conf, acc, width=0.08, alpha=0.7, color='#3498db', label='Accuracy')
axes[1].plot([0, 1], [0, 1], 'k--', label='Perfect calibration')
axes[1].set_xlabel('Confidence Bin', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].set_title('Reliability Diagram', fontsize=14, fontweight='bold')
axes[1].legend()
axes[1].set_xlim([0, 1])
axes[1].set_ylim([0, 1])

plt.tight_layout()
plt.savefig('../figures/25_calibration.png', dpi=150, bbox_inches='tight')
plt.show()

print("üíæ Figure saved to: figures/25_calibration.png")

In [None]:
# Calculate Expected Calibration Error (ECE)
total_samples = sum(counts)
ece = sum([c * abs(a - cn) for c, a, cn in zip(counts, acc, conf)]) / total_samples if total_samples > 0 else 0

print(f"\nüìä Calibration Metrics:")
print(f"   Expected Calibration Error (ECE): {ece:.4f}")
print(f"   Total predictions analyzed: {len(calibration_df)}")

# Interpretation
if ece < 0.05:
    print("   ‚úÖ Model is well-calibrated")
elif ece < 0.1:
    print("   ‚ö†Ô∏è Model has slight calibration issues")
else:
    print("   ‚ùå Model is poorly calibrated")

## 4. Object Size Analysis

In [None]:
def collect_size_performance(model, images_dir, labels_dir, n_samples=200):
    """
    Analyze performance by object size.
    """
    data = {
        'area': [],
        'detected': [],
        'class_id': [],
        'size_category': []
    }
    
    image_files = list(images_dir.glob('*.jpg'))[:n_samples]
    
    for img_path in tqdm(image_files, desc="Analyzing sizes"):
        label_path = labels_dir / f"{img_path.stem}.txt"
        gt_boxes, gt_classes = parse_yolo_label(label_path)
        
        results = model.predict(str(img_path), conf=0.25, verbose=False)
        
        pred_boxes = []
        pred_classes = []
        if len(results[0].boxes) > 0:
            pred_boxes = results[0].boxes.xyxy.cpu().numpy().tolist()
            pred_classes = results[0].boxes.cls.cpu().numpy().astype(int).tolist()
        
        for gbox, gcls in zip(gt_boxes, gt_classes):
            area = (gbox[2] - gbox[0]) * (gbox[3] - gbox[1])
            
            # Check if detected
            detected = False
            for pbox, pcls in zip(pred_boxes, pred_classes):
                if calculate_iou(gbox, pbox) >= 0.5 and gcls == pcls:
                    detected = True
                    break
            
            # Categorize size (relative to 640x640)
            img_area = 640 * 640
            relative_area = area / img_area
            
            if relative_area < 0.01:
                size_cat = 'Small (<1%)'
            elif relative_area < 0.1:
                size_cat = 'Medium (1-10%)'
            else:
                size_cat = 'Large (>10%)'
            
            data['area'].append(area)
            data['detected'].append(int(detected))
            data['class_id'].append(gcls)
            data['size_category'].append(size_cat)
    
    return pd.DataFrame(data)

In [None]:
# Collect size performance data
print("üîç Analyzing performance by object size...")
size_df = collect_size_performance(model, VAL_IMAGES, VAL_LABELS, n_samples=200)

In [None]:
# Visualize size performance
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Detection rate by size category
size_order = ['Small (<1%)', 'Medium (1-10%)', 'Large (>10%)']
size_stats = size_df.groupby('size_category')['detected'].agg(['sum', 'count', 'mean']).reset_index()
size_stats['size_category'] = pd.Categorical(size_stats['size_category'], categories=size_order, ordered=True)
size_stats = size_stats.sort_values('size_category')

colors = ['#e74c3c', '#f39c12', '#2ecc71']
bars = axes[0].bar(size_stats['size_category'], size_stats['mean'], color=colors)
axes[0].set_xlabel('Object Size', fontsize=12)
axes[0].set_ylabel('Detection Rate', fontsize=12)
axes[0].set_title('Detection Rate by Object Size', fontsize=14, fontweight='bold')
axes[0].set_ylim([0, 1])

for bar, mean, count in zip(bars, size_stats['mean'], size_stats['count']):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                 f'{mean:.2%}\n(n={count})', ha='center', fontsize=10)

# Scatter plot of detection vs area
axes[1].scatter(size_df['area'], size_df['detected'], alpha=0.5, c=size_df['detected'].map({0: '#e74c3c', 1: '#2ecc71'}))
axes[1].set_xlabel('Object Area (pixels¬≤)', fontsize=12)
axes[1].set_ylabel('Detected (0/1)', fontsize=12)
axes[1].set_title('Detection Success vs Object Area', fontsize=14, fontweight='bold')
axes[1].set_xscale('log')

plt.tight_layout()
plt.savefig('../figures/26_size_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print("üíæ Figure saved to: figures/26_size_analysis.png")

## 5. Class Similarity / Confusion Analysis

In [None]:
# Create confusion similarity matrix
if len(classification_errors_detail) > 0:
    confusion_counts = np.zeros((NUM_CLASSES, NUM_CLASSES))
    
    for gt_cls, pred_cls in classification_errors_detail:
        confusion_counts[gt_cls, pred_cls] += 1
    
    # Normalize
    row_sums = confusion_counts.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1
    confusion_normalized = confusion_counts / row_sums
    
    # Plot
    fig, ax = plt.subplots(figsize=(12, 10))
    
    mask = confusion_counts == 0  # Mask zeros
    
    sns.heatmap(
        confusion_counts,
        annot=True,
        fmt='.0f',
        cmap='Reds',
        xticklabels=[CLASS_NAMES[i] for i in range(NUM_CLASSES)],
        yticklabels=[CLASS_NAMES[i] for i in range(NUM_CLASSES)],
        mask=mask,
        ax=ax
    )
    
    ax.set_xlabel('Predicted Class', fontsize=12)
    ax.set_ylabel('True Class', fontsize=12)
    ax.set_title('Classification Confusion Matrix', fontsize=14, fontweight='bold')
    ax.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig('../figures/27_class_confusion.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("üíæ Figure saved to: figures/27_class_confusion.png")
else:
    print("‚ö†Ô∏è No classification errors found for analysis")

In [None]:
# Identify most confused class pairs
if len(classification_errors_detail) > 0:
    print("\nüîç Most Confused Class Pairs:")
    print("=" * 60)
    
    confusion_pairs = Counter(classification_errors_detail)
    
    for (gt_cls, pred_cls), count in confusion_pairs.most_common(10):
        print(f"   {CLASS_NAMES[gt_cls]:25s} ‚Üí {CLASS_NAMES[pred_cls]:25s}: {count}")

## 6. Threshold Sensitivity Analysis

In [None]:
def evaluate_at_threshold(model, images_dir, labels_dir, conf_threshold, n_samples=100):
    """
    Evaluate detection performance at a specific confidence threshold.
    """
    tp, fp, fn = 0, 0, 0
    
    image_files = list(images_dir.glob('*.jpg'))[:n_samples]
    
    for img_path in image_files:
        label_path = labels_dir / f"{img_path.stem}.txt"
        gt_boxes, gt_classes = parse_yolo_label(label_path)
        
        results = model.predict(str(img_path), conf=conf_threshold, verbose=False)
        
        pred_boxes = []
        if len(results[0].boxes) > 0:
            pred_boxes = results[0].boxes.xyxy.cpu().numpy().tolist()
            pred_classes = results[0].boxes.cls.cpu().numpy().astype(int).tolist()
        else:
            pred_classes = []
        
        gt_matched = [False] * len(gt_boxes)
        
        for pbox, pcls in zip(pred_boxes, pred_classes):
            matched = False
            for gi, (gbox, gcls) in enumerate(zip(gt_boxes, gt_classes)):
                if not gt_matched[gi] and calculate_iou(pbox, gbox) >= 0.5 and pcls == gcls:
                    tp += 1
                    gt_matched[gi] = True
                    matched = True
                    break
            if not matched:
                fp += 1
        
        fn += sum(1 for m in gt_matched if not m)
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    return {'precision': precision, 'recall': recall, 'f1': f1}

In [None]:
# Evaluate at different thresholds
print("üîç Analyzing threshold sensitivity...")

thresholds = np.arange(0.1, 0.95, 0.1)
threshold_results = []

for thresh in tqdm(thresholds, desc="Evaluating thresholds"):
    result = evaluate_at_threshold(model, VAL_IMAGES, VAL_LABELS, thresh, n_samples=100)
    result['threshold'] = thresh
    threshold_results.append(result)

In [None]:
# Plot threshold sensitivity
thresh_df = pd.DataFrame(threshold_results)

fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(thresh_df['threshold'], thresh_df['precision'], 'b-o', label='Precision', linewidth=2)
ax.plot(thresh_df['threshold'], thresh_df['recall'], 'g-o', label='Recall', linewidth=2)
ax.plot(thresh_df['threshold'], thresh_df['f1'], 'r-o', label='F1 Score', linewidth=2)

# Find optimal threshold
optimal_idx = thresh_df['f1'].idxmax()
optimal_thresh = thresh_df.loc[optimal_idx, 'threshold']
optimal_f1 = thresh_df.loc[optimal_idx, 'f1']

ax.axvline(optimal_thresh, color='red', linestyle='--', alpha=0.5,
           label=f'Optimal: {optimal_thresh:.2f} (F1={optimal_f1:.3f})')

ax.set_xlabel('Confidence Threshold', fontsize=12)
ax.set_ylabel('Score', fontsize=12)
ax.set_title('Threshold Sensitivity Analysis', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])

plt.tight_layout()
plt.savefig('../figures/28_threshold_sensitivity.png', dpi=150, bbox_inches='tight')
plt.show()

print("üíæ Figure saved to: figures/28_threshold_sensitivity.png")
print(f"\nüìä Optimal confidence threshold: {optimal_thresh:.2f} (F1={optimal_f1:.3f})")

## 7. Summary Report

In [None]:
print("=" * 70)
print("üî¨ ADVANCED ANALYSIS SUMMARY")
print("=" * 70)

print("\nüìä ERROR TAXONOMY:")
total_errors = sum(all_errors.values())
for error_type, count in all_errors.items():
    pct = count / total_errors * 100 if total_errors > 0 else 0
    print(f"   {error_type:25s}: {count:5d} ({pct:5.1f}%)")

print(f"\nüìè SIZE-BASED PERFORMANCE:")
for _, row in size_stats.iterrows():
    print(f"   {row['size_category']:20s}: {row['mean']:.1%} detection rate (n={int(row['count'])})")

print(f"\nüéØ CALIBRATION:")
print(f"   Expected Calibration Error: {ece:.4f}")
if ece < 0.05:
    print("   Model confidence is well-calibrated")
elif ece < 0.1:
    print("   Model has slight overconfidence")
else:
    print("   Model needs confidence calibration")

print(f"\n‚öôÔ∏è OPTIMAL SETTINGS:")
print(f"   Recommended confidence threshold: {optimal_thresh:.2f}")
print(f"   Expected F1 score at optimal: {optimal_f1:.3f}")

print("\nüí° RECOMMENDATIONS:")
if all_errors['missed'] > all_errors['background_error']:
    print("   ‚Ä¢ Lower confidence threshold to reduce missed detections")
if all_errors['background_error'] > all_errors['missed']:
    print("   ‚Ä¢ Increase confidence threshold to reduce false positives")
if all_errors['classification_error'] > total_errors * 0.1:
    print("   ‚Ä¢ Consider class-specific training or more training data")
if all_errors['localization_error'] > total_errors * 0.1:
    print("   ‚Ä¢ Adjust box regression loss or NMS threshold")

print("\n" + "=" * 70)

In [None]:
# Save analysis results
analysis_results = {
    'error_taxonomy': all_errors,
    'calibration_ece': float(ece),
    'optimal_threshold': float(optimal_thresh),
    'optimal_f1': float(optimal_f1),
    'size_performance': size_stats.to_dict(orient='records'),
}

with open(RESULTS_DIR / 'advanced_analysis.json', 'w') as f:
    json.dump(analysis_results, f, indent=2)

print(f"üíæ Analysis results saved to: {RESULTS_DIR / 'advanced_analysis.json'}")

In [None]:
print("\n‚úÖ Advanced Analysis Complete!")
print("\nüìÅ All notebooks completed successfully!")
print("\nüéâ Project Summary:")
print("   ‚Ä¢ Notebook 01: EDA - Dataset exploration and visualization")
print("   ‚Ä¢ Notebook 02: Preprocessing - Augmentation and class weights")
print("   ‚Ä¢ Notebook 03: Training - YOLOv8 model training pipeline")
print("   ‚Ä¢ Notebook 04: Evaluation - Comprehensive metrics and analysis")
print("   ‚Ä¢ Notebook 05: Inference - Production inference and export")
print("   ‚Ä¢ Notebook 06: Advanced - Error taxonomy and deep analysis")