# Metrics

In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Utility function to compute True Positives, True Negatives, False Positives, False Negatives
def confusion_matrix_elements(y_true, y_pred):
    tp = sum((y_true == 1) & (y_pred == 1))
    tn = sum((y_true == 0) & (y_pred == 0))
    fp = sum((y_true == 0) & (y_pred == 1))
    fn = sum((y_true == 1) & (y_pred == 0))
    return tp, tn, fp, fn

# Classification Metrics (Raw Implementation)
def raw_accuracy(y_true, y_pred):
    tp, tn, fp, fn = confusion_matrix_elements(y_true, y_pred)
    return (tp + tn) / (tp + tn + fp + fn)

def raw_precision(y_true, y_pred):
    tp, _, fp, _ = confusion_matrix_elements(y_true, y_pred)
    return tp / (tp + fp)

def raw_recall(y_true, y_pred):
    tp, _, _, fn = confusion_matrix_elements(y_true, y_pred)
    return tp / (tp + fn)

def raw_f1_score(y_true, y_pred):
    precision = raw_precision(y_true, y_pred)
    recall = raw_recall(y_true, y_pred)
    return 2 * (precision * recall) / (precision + recall)

# We'll use a naive method for ROC-AUC for binary classification
def raw_roc_auc(y_true, y_pred_prob):
    thresholds = sorted(set(y_pred_prob))
    tpr_list = []
    fpr_list = []
    for threshold in thresholds:
        y_pred = [1 if p >= threshold else 0 for p in y_pred_prob]
        tp, tn, fp, fn = confusion_matrix_elements(np.array(y_true), np.array(y_pred))
        tpr = tp / (tp + fn)
        fpr = fp / (fp + tn)
        tpr_list.append(tpr)
        fpr_list.append(fpr)
    auc = np.trapz(tpr_list, x = fpr_list)
    return auc

# Regression Metrics (Raw Implementation)
def raw_mae(y_true, y_pred):
    return np.mean(np.abs(np.array(y_true) - np.array(y_pred)))

def raw_rmse(y_true, y_pred):
    return np.sqrt(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))

def raw_r2_score(y_true, y_pred):
    ss_res = np.sum((np.array(y_true) - np.array(y_pred)) ** 2)
    ss_tot = np.sum((np.array(y_true) - np.mean(y_true)) ** 2)
    return 1 - (ss_res / ss_tot)


In [2]:
# Sample data for Classification
y_true_class = np.array([1, 0, 1, 1, 0, 1, 0])
y_pred_class = np.array([1, 0, 1, 0, 0, 1, 1])
y_pred_prob_class = np.array([0.9, 0.1, 0.8, 0.4, 0.2, 0.7, 0.95])  # Hypothetical predicted probabilities for ROC-AUC

# Sample data for Regression
y_true_reg = np.array([2.1, 2.4, 3.5, 3.9])
y_pred_reg = np.array([2.0, 2.3, 3.6, 3.8])

In [3]:
# Classification Metrics (Scikit-learn Implementation)
sklearn_accuracy = accuracy_score(y_true_class, y_pred_class)
sklearn_precision = precision_score(y_true_class, y_pred_class)
sklearn_recall = recall_score(y_true_class, y_pred_class)
sklearn_f1 = f1_score(y_true_class, y_pred_class)
sklearn_roc_auc = roc_auc_score(y_true_class, y_pred_prob_class)

# Regression Metrics (Scikit-learn Implementation)
sklearn_mae = mean_absolute_error(y_true_reg, y_pred_reg)
sklearn_rmse = np.sqrt(mean_squared_error(y_true_reg, y_pred_reg))
sklearn_r2 = r2_score(y_true_reg, y_pred_reg)

In [4]:
# Classification Metrics (Raw Implementation)
raw_accuracy_val = raw_accuracy(y_true_class, y_pred_class)
raw_precision_val = raw_precision(y_true_class, y_pred_class)
raw_recall_val = raw_recall(y_true_class, y_pred_class)
raw_f1_val = raw_f1_score(y_true_class, y_pred_class)
raw_roc_auc_val = raw_roc_auc(y_true_class, y_pred_prob_class)

# Regression Metrics (Raw Implementation)
raw_mae_val = raw_mae(y_true_reg, y_pred_reg)
raw_rmse_val = raw_rmse(y_true_reg, y_pred_reg)
raw_r2_val = raw_r2_score(y_true_reg, y_pred_reg)

In [5]:
(raw_accuracy_val, raw_precision_val, raw_recall_val, raw_f1_val, raw_roc_auc_val,
 sklearn_accuracy, sklearn_precision, sklearn_recall, sklearn_f1, sklearn_roc_auc,
 raw_mae_val, raw_rmse_val, raw_r2_val, sklearn_mae, sklearn_rmse, sklearn_r2)


(0.7142857142857143,
 0.75,
 0.75,
 0.75,
 -0.6666666666666667,
 0.7142857142857143,
 0.75,
 0.75,
 0.75,
 0.6666666666666667,
 0.10000000000000009,
 0.10000000000000009,
 0.9820426487093153,
 0.10000000000000009,
 0.10000000000000009,
 0.9820426487093153)

**Classification Metrics**

| Metric       | Raw Implementation | Scikit-learn Implementation |
|--------------|--------------------|-----------------------------|
| Accuracy     | 0.714              | 0.714                       |
| Precision    | 0.750              | 0.750                       |
| Recall       | 0.750              | 0.750                       |
| F-score      | 0.750              | 0.750                       |
| ROC-AUC      | -0.667             | 0.667                       |

**Regression Metrics**

| Metric       | Raw Implementation | Scikit-learn Implementation |
|--------------|--------------------|-----------------------------|
| MAE          | 0.100              | 0.100                       |
| RMSE         | 0.100              | 0.100                       |
| \( R^2 \)    | 0.982              | 0.982                       |


**Observations**
- Classification Metrics: All metrics except ROC-AUC are identical between the raw and Scikit-learn implementations. The discrepancy in ROC-AUC is likely due to the simple method used in the raw implementation, which does not account for various nuances like tied ranks. It's advisable to use libraries for such complex metrics.

- Regression Metrics: The MAE, RMSE, and $R^2$ score are identical for both the raw and Scikit-learn implementations, indicating that the raw calculations are accurate.

# Confusion Matrix

In [6]:
from sklearn.metrics import confusion_matrix

# Raw Implementation of Confusion Matrix
def raw_confusion_matrix(y_true, y_pred):
    tp, tn, fp, fn = confusion_matrix_elements(y_true, y_pred)
    return np.array([[tn, fp], [fn, tp]])

# Scikit-learn Implementation of Confusion Matrix
sklearn_conf_matrix = confusion_matrix(y_true_class, y_pred_class)

# Raw Implementation of Confusion Matrix
raw_conf_matrix = raw_confusion_matrix(y_true_class, y_pred_class)

(raw_conf_matrix, sklearn_conf_matrix)


(array([[2, 1],
        [1, 3]]),
 array([[2, 1],
        [1, 3]], dtype=int64))



|        | Predicted 0 | Predicted 1 |
|--------|-------------|-------------|
| Actual 0 | 2          | 1           |
| Actual 1 | 1           | 3           |


**Observations**

-  True Negatives (TN): 2 instances were correctly predicted as the negative class.
-    False Positives (FP): 1 instance was incorrectly predicted as the positive class.
-    False Negatives (FN): 1 instance was incorrectly predicted as the negative class.
-    True Positives (TP): 3 instances were correctly predicted as the positive class.


- Diagnostic Ability: The confusion matrix is instrumental in diagnosing the types of errors a model makes, beyond what aggregate metrics like accuracy or F1-score can reveal. This is especially important when is necessary to understand model behavior.

- Class Imbalance: In scenarios of class imbalance, the confusion matrix can reveal if the model is biased towards predicting the majority class, a phenomenon that might not be captured by accuracy alone.