# Task 01 Solutions: Calculate Metrics

Complete solutions for all subtasks in task_01_calculate_metrics.ipynb

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)

## Task 1.1: Calculate Precision from Confusion Matrix

**Solution:** Precision = TP / (TP + FP)

In [None]:
# Given confusion matrix values
TP = 45
FP = 5
FN = 10
TN = 40

# Solution
precision = TP / (TP + FP)
print(f"Precision: {precision}")

# Verify
assert precision == 0.9, f"Expected 0.9, got {precision}"
print("✅ Test passed!")

## Task 1.2: Calculate Recall

**Solution:** Recall = TP / (TP + FN)

In [None]:
# Solution
recall = TP / (TP + FN)
print(f"Recall: {recall}")

# Verify
assert abs(recall - 0.8182) < 0.01, f"Expected ~0.8182, got {recall}"
print("✅ Test passed!")

## Task 1.3: Calculate F1-Score

**Solution:** F1 = 2 × (Precision × Recall) / (Precision + Recall)

In [None]:
# Solution
f1 = 2 * (precision * recall) / (precision + recall)
print(f"F1-Score: {f1}")

# Verify
assert abs(f1 - 0.8571) < 0.01, f"Expected ~0.8571, got {f1}"
print("✅ Test passed!")

## Task 1.4: Calculate Accuracy

**Solution:** Accuracy = (TP + TN) / Total

In [None]:
# Solution
accuracy = (TP + TN) / (TP + TN + FP + FN)
print(f"Accuracy: {accuracy}")

# Verify
assert accuracy == 0.85, f"Expected 0.85, got {accuracy}"
print("✅ Test passed!")

## Task 1.5: Calculate Metrics using sklearn

**Solution:** Use sklearn functions on actual predictions

In [None]:
# Load data
df = pd.read_csv('../fixtures/input/classification_data.csv')
y_true = df['true_label'].values
y_pred = df['predicted_label'].values

# Solution
accuracy_sklearn = accuracy_score(y_true, y_pred)
precision_sklearn = precision_score(y_true, y_pred)
recall_sklearn = recall_score(y_true, y_pred)
f1_sklearn = f1_score(y_true, y_pred)

print(f"Accuracy:  {accuracy_sklearn:.4f}")
print(f"Precision: {precision_sklearn:.4f}")
print(f"Recall:    {recall_sklearn:.4f}")
print(f"F1-Score:  {f1_sklearn:.4f}")

# Verify all are calculated
assert accuracy_sklearn is not None
assert precision_sklearn is not None
assert recall_sklearn is not None
assert f1_sklearn is not None
print("✅ All metrics calculated!")

## Task 1.6: Generate Classification Report

**Solution:** Use classification_report for comprehensive summary

In [None]:
# Solution
report = classification_report(y_true, y_pred)
print(report)

# Verify it's a string with metrics
assert isinstance(report, str)
assert 'precision' in report
assert 'recall' in report
assert 'f1-score' in report
print("✅ Report generated!")

## Task 1.7: Compare with Naive Baseline

**Solution:** Naive baseline always predicts majority class

In [None]:
# Solution: Predict most frequent class
from collections import Counter
most_common_class = Counter(y_true).most_common(1)[0][0]
y_pred_naive = np.full(len(y_true), most_common_class)

# Calculate naive baseline metrics
accuracy_naive = accuracy_score(y_true, y_pred_naive)
f1_naive = f1_score(y_true, y_pred_naive, zero_division=0)

print(f"Naive Baseline:")
print(f"  Accuracy: {accuracy_naive:.4f}")
print(f"  F1-Score: {f1_naive:.4f}")

print(f"\nModel Performance:")
print(f"  Accuracy: {accuracy_sklearn:.4f}")
print(f"  F1-Score: {f1_sklearn:.4f}")

print(f"\nImprovement:")
print(f"  Accuracy: +{(accuracy_sklearn - accuracy_naive):.4f}")
print(f"  F1-Score: +{(f1_sklearn - f1_naive):.4f}")

# Verify model is better than naive
assert f1_sklearn > f1_naive, "Model should be better than naive baseline"
print("\n✅ Model beats naive baseline!")

## Task 1.8: Calculate Specificity

**Solution:** Specificity = TN / (TN + FP)

In [None]:
# Solution: Get confusion matrix and calculate
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

specificity = tn / (tn + fp)
sensitivity = tp / (tp + fn)  # Same as recall

print(f"Specificity (TNR): {specificity:.4f}")
print(f"Sensitivity (TPR): {sensitivity:.4f}")

# Verify it's reasonable
assert 0 <= specificity <= 1
assert abs(sensitivity - recall_sklearn) < 0.001  # Should match recall
print("✅ Specificity calculated correctly!")

## Summary

All tasks completed! Key takeaways:

1. **Precision** focuses on positive predictions quality
2. **Recall** focuses on finding all positives  
3. **F1** balances both with harmonic mean
4. **Accuracy** can be misleading on imbalanced data
5. Always compare against **naive baseline**
6. **Specificity** matters when FP is costly