# Task 03 Solutions: Imbalanced Data Evaluation

Solutions for evaluating models on imbalanced datasets

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score,
    roc_curve, precision_recall_curve, confusion_matrix
)

In [None]:
# Load imbalanced data (80% class 0, 20% class 1)
df = pd.read_csv('../fixtures/input/classification_data.csv')
y_true = df['true_label'].values
y_pred = df['predicted_label'].values
y_prob = df['predicted_probability'].values

print(f"Class distribution:")
print(f"  Class 0: {np.sum(y_true == 0)} ({np.mean(y_true == 0)*100:.1f}%)")
print(f"  Class 1: {np.sum(y_true == 1)} ({np.mean(y_true == 1)*100:.1f}%)")

## Task 3.1: Why Accuracy is Misleading

In [None]:
# Solution: Compare with naive baseline
y_pred_naive = np.zeros(len(y_true))  # Always predict majority class

accuracy_naive = accuracy_score(y_true, y_pred_naive)
f1_naive = f1_score(y_true, y_pred_naive, zero_division=0)

accuracy_model = accuracy_score(y_true, y_pred)
f1_model = f1_score(y_true, y_pred)

print(f"Naive Baseline (always predict 0):")
print(f"  Accuracy: {accuracy_naive:.4f}")
print(f"  F1-Score: {f1_naive:.4f}")

print(f"\nActual Model:")
print(f"  Accuracy: {accuracy_model:.4f}")
print(f"  F1-Score: {f1_model:.4f}")

print(f"\nüí° Insight: Naive gets {accuracy_naive:.1%} accuracy by doing nothing!")
print(f"   But F1={f1_naive:.2f} reveals it catches no positives.")

assert f1_model > f1_naive
print("\n‚úÖ Demonstrated why accuracy misleads!")

## Task 3.2: ROC-AUC vs PR-AUC

In [None]:
# Solution: Calculate both
roc_auc = roc_auc_score(y_true, y_prob)
pr_auc = average_precision_score(y_true, y_prob)

print(f"ROC-AUC: {roc_auc:.4f}")
print(f"PR-AUC:  {pr_auc:.4f}")

# Plot both curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# ROC Curve
fpr, tpr, _ = roc_curve(y_true, y_prob)
ax1.plot(fpr, tpr, linewidth=2, label=f'Model (AUC={roc_auc:.3f})')
ax1.plot([0, 1], [0, 1], 'k--', label='Random')
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate')
ax1.set_title('ROC Curve')
ax1.legend()
ax1.grid(alpha=0.3)

# PR Curve
precision, recall, _ = precision_recall_curve(y_true, y_prob)
baseline_pr = np.mean(y_true)  # Random baseline
ax2.plot(recall, precision, linewidth=2, label=f'Model (AP={pr_auc:.3f})')
ax2.axhline(baseline_pr, color='red', linestyle='--', label=f'Random (AP={baseline_pr:.3f})')
ax2.set_xlabel('Recall')
ax2.set_ylabel('Precision')
ax2.set_title('Precision-Recall Curve')
ax2.legend()
ax2.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nüí° Insight: ROC-AUC={roc_auc:.3f} looks great, but PR-AUC={pr_auc:.3f}")
print(f"   is more realistic on imbalanced data (baseline={baseline_pr:.3f})")

assert pr_auc > baseline_pr
print("\n‚úÖ Both metrics calculated and plotted!")

## Task 3.3: Per-Class Analysis

In [None]:
# Solution: Analyze each class separately
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

# Class 0 (negative) metrics
class_0_precision = tn / (tn + fn) if (tn + fn) > 0 else 0
class_0_recall = tn / (tn + fp) if (tn + fp) > 0 else 0

# Class 1 (positive) metrics
class_1_precision = tp / (tp + fp) if (tp + fp) > 0 else 0
class_1_recall = tp / (tp + fn) if (tp + fn) > 0 else 0

print(f"Class 0 (Negative - Majority):")
print(f"  Precision: {class_0_precision:.4f}")
print(f"  Recall (Specificity): {class_0_recall:.4f}")

print(f"\nClass 1 (Positive - Minority):")
print(f"  Precision: {class_1_precision:.4f}")
print(f"  Recall (Sensitivity): {class_1_recall:.4f}")

print(f"\nüí° Insight: Model performs {'better' if class_0_recall > class_1_recall else 'worse'}")
print(f"   on majority class (common in imbalanced data)")

assert class_1_precision is not None
print("\n‚úÖ Per-class analysis complete!")

## Summary: Best Practices for Imbalanced Data

1. ‚ùå **Don't use accuracy** - misleading on imbalanced data
2. ‚úÖ **Use F1-score** - balances precision and recall
3. ‚úÖ **Use PR-AUC** - more realistic than ROC-AUC
4. ‚úÖ **Compare to naive baseline** - always predict majority
5. ‚úÖ **Analyze per-class metrics** - find which class struggles
6. ‚úÖ **Consider business costs** - FP vs FN trade-off
7. ‚úÖ **Use confusion matrix** - see actual error patterns