# Understanding ML Metrics (Classification & Regression)

Learning notebook with explanations and exercises.

## Imports

In [1]:

import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, average_precision_score, confusion_matrix,
    mean_absolute_error, mean_squared_error, r2_score
)


## Classification: Imbalanced Data

In [2]:

y_true = np.array([1]*10 + [0]*90)


In [3]:

y_pred_A = np.zeros_like(y_true)
y_score_A = np.random.uniform(0.0, 0.3, size=100)


In [4]:

y_pred_B = np.array([1]*6 + [0]*4 + [1]*10 + [0]*80)
y_score_B = np.concatenate([
    np.random.uniform(0.6, 0.9, size=10),
    np.random.uniform(0.0, 0.4, size=90)
])


In [5]:

def print_classification_metrics(name, y_true, y_pred, y_score):
    print(name)
    print(confusion_matrix(y_true, y_pred))
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, zero_division=0))
    print("Recall:", recall_score(y_true, y_pred))
    print("F1:", f1_score(y_true, y_pred, zero_division=0))
    print("ROC-AUC:", roc_auc_score(y_true, y_score))
    print("PR-AUC:", average_precision_score(y_true, y_score))


In [6]:

print_classification_metrics("Model A", y_true, y_pred_A, y_score_A)
print_classification_metrics("Model B", y_true, y_pred_B, y_score_B)


Model A
[[90  0]
 [10  0]]
Accuracy: 0.9
Precision: 0.0
Recall: 0.0
F1: 0.0
ROC-AUC: 0.56
PR-AUC: 0.1409267011264716
Model B
[[80 10]
 [ 4  6]]
Accuracy: 0.86
Precision: 0.375
Recall: 0.6
F1: 0.46153846153846156
ROC-AUC: 1.0
PR-AUC: 0.9999999999999999


## Regression with Outliers

In [7]:

y_true = np.array([50, 52, 49, 51, 50, 300])
y_pred_1 = np.array([49, 51, 50, 50, 52, 100])
y_pred_2 = np.array([45, 47, 46, 48, 44, 250])


In [8]:

def print_regression_metrics(name, y_true, y_pred):
    print(name)
    print("MAE:", mean_absolute_error(y_true, y_pred))
    print("RMSE:", mean_squared_error(y_true, y_pred, squared=False))
    print("R2:", r2_score(y_true, y_pred))


In [9]:

print_regression_metrics("Model 1", y_true, y_pred_1)
print_regression_metrics("Model 2", y_true, y_pred_2)


Model 1
MAE: 34.333333333333336


TypeError: got an unexpected keyword argument 'squared'