# ML Metrics Cheat Sheet & Demos
This notebook demonstrates **classification, regression, clustering, and recommendation** metrics with concise code.
Run cells top-to-bottom.

In [None]:

import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_regression, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score,
                             confusion_matrix, mean_absolute_error, mean_squared_error, r2_score,
                             silhouette_score, davies_bouldin_score)
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
np.random.seed(42)


## 1) Classification Metrics

In [None]:

X, y = make_classification(n_samples=1000, n_features=10, n_informative=6, weights=[0.7,0.3], random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

y_prob = clf.predict_proba(X_test)[:,1]
y_pred = (y_prob >= 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [None]:

fpr, tpr, _ = roc_curve(y_test, y_prob)
plt.plot(fpr, tpr, label="ROC")
plt.plot([0,1],[0,1],'--')
plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
plt.title("ROC Curve"); plt.legend(); plt.show()


## 2) Regression Metrics

In [None]:

Xr, yr = make_regression(n_samples=800, n_features=5, noise=10.0, random_state=42)
Xr_train, Xr_test, yr_train, yr_test = train_test_split(Xr, yr, test_size=0.3, random_state=42)

reg = LinearRegression().fit(Xr_train, yr_train)
yr_pred = reg.predict(Xr_test)

print("MAE:", mean_absolute_error(yr_test, yr_pred))
print("MSE:", mean_squared_error(yr_test, yr_pred))
print("RMSE:", np.sqrt(mean_squared_error(yr_test, yr_pred)))
print("R^2:", r2_score(yr_test, yr_pred))


## 3) Clustering Metrics

In [None]:

Xc, _ = make_blobs(n_samples=500, centers=3, cluster_std=0.7, random_state=42)
labels = KMeans(n_clusters=3, n_init=10, random_state=42).fit_predict(Xc)

print("Silhouette Score:", silhouette_score(Xc, labels))
print("Davies-Bouldin Index:", davies_bouldin_score(Xc, labels))


## 4) Recommendation Metrics

In [None]:

rng = np.random.default_rng(42)
n_users, n_items = 5, 20
relevance = (rng.random((n_users, n_items)) < 0.25).astype(int)
scores = rng.normal(size=(n_users, n_items))

def precision_at_k(rel, score, k=5):
    idx = np.argsort(-score)[:k]
    return rel[idx].sum()/k

def recall_at_k(rel, score, k=5):
    idx = np.argsort(-score)[:k]
    total_rel = rel.sum()
    return rel[idx].sum()/total_rel if total_rel>0 else 0

for K in [3,5,10]:
    precs = [precision_at_k(relevance[u], scores[u], K) for u in range(n_users)]
    recs  = [recall_at_k(relevance[u], scores[u], K) for u in range(n_users)]
    print(f"K={K} Precision@K={np.mean(precs):.3f}, Recall@K={np.mean(recs):.3f}")


### Cheat Sheet Formulas


**Classification**  
- Accuracy = (TP+TN)/(TP+TN+FP+FN)  
- Precision = TP/(TP+FP)  
- Recall = TP/(TP+FN)  
- F1 = 2·(Precision·Recall)/(Precision+Recall)  
- ROC-AUC = Area under ROC (TPR vs FPR)  

**Regression**  
- MAE = (1/n) Σ |y−ŷ|  
- MSE = (1/n) Σ (y−ŷ)²  
- RMSE = √MSE  
- R² = 1 − SS_res/SS_tot  

**Clustering**  
- Silhouette = (b−a)/max(a,b)  
- Davies–Bouldin = avg similarity measure (lower better)  

**Recommendation**  
- Precision@K = #relevant@K / K  
- Recall@K = #relevant@K / #all_relevant  
