# Binary Classifier Evaluation Metrics

In [2]:
import sklearn.metrics
import numpy as np
import jax
import jax.numpy as jnp
from typing import List
import pandas as pd
import matplotlib.pyplot as plt
import lr_model_jax

## Trying Out the Metrics

In [3]:
# define some dummy observations and predictions
y_actual = np.array([0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0]) # N
y_pred = np.array([0.06, 0.92, 0.86, 0.03, 0.40, 0.70, 0.23, 0.4, 0.2, 0.8, 0.9, 0.65, 0.75, 0.4]) # N

In [4]:
# accuracy, notice that we have to threshold
print("Accuracy ", sklearn.metrics.accuracy_score(y_actual, y_pred > 0.5))

# accuracy under null model
# in real world, the mean would be based on TRAINING set
null_ypred = np.mean(y_actual) * np.ones_like(y_actual)
print("Accuracy (Null) ", sklearn.metrics.accuracy_score(y_actual, null_ypred > 0.5))

# balanced accuracy
print("Balanced Accuracy ", sklearn.metrics.balanced_accuracy_score(y_actual, y_pred > 0.5))

print("Balanced Accuracy (Null) ", sklearn.metrics.balanced_accuracy_score(y_actual, null_ypred > 0.5))


# recall
print("Recall ", sklearn.metrics.recall_score(y_actual, y_pred > 0.5))

# recall when saying yes all the time
print("Recall (Ones all the time) ", sklearn.metrics.recall_score(y_actual, np.ones_like(y_actual)))

# precision
print("Precision ", sklearn.metrics.precision_score(y_actual, y_pred > 0.5))

# precision when saying yes all the time
print("Precision (Ones all the time) ", sklearn.metrics.precision_score(y_actual, np.ones_like(y_actual)))


# precision when predicting one positive example that happens to be correct
yhat = np.zeros_like(y_actual)
yhat[1] = 1
print("Precision (Trivial) ", sklearn.metrics.precision_score(y_actual, yhat))

# f1 score
print("F1 ", sklearn.metrics.f1_score(y_actual, y_pred > 0.5))
print("F1 (Ones all the time)", sklearn.metrics.f1_score(y_actual, np.ones_like(y_actual)))

# AUC-ROC, notice: no thresholding
print("AUC-ROC ", sklearn.metrics.roc_auc_score(y_actual, y_pred))
print("AUC-PR ", sklearn.metrics.average_precision_score(y_actual, y_pred))
print("AUC-PR (Null) ", sklearn.metrics.average_precision_score(y_actual, np.mean(y_actual) * np.ones_like(y_actual)))


Accuracy  0.7142857142857143
Accuracy (Null)  0.6428571428571429
Balanced Accuracy  0.7333333333333334
Balanced Accuracy (Null)  0.5
Recall  0.6666666666666666
Recall (Ones all the time)  1.0
Precision  0.8571428571428571
Precision (Ones all the time)  0.6428571428571429
Precision (Trivial)  1.0
F1  0.75
F1 (Ones all the time) 0.782608695652174
AUC-ROC  0.7555555555555555
AUC-PR  0.8063492063492064
AUC-PR (Null)  0.6428571428571429


## Applying to Logistic Regression Model

In [7]:
df = pd.read_csv("../data/nonseparable_binary_data.csv")
rng = jax.random.key(42)
best_Beta = lr_model_jax.optimize(rng,
                                  input_df = df[['x1','x2']],
                                  y = df.y.to_numpy(),
                                  learning_rate = 0.1,
                                  epochs = 100)
yhat = lr_model_jax.predict(Beta = best_Beta, input_df = df[['x1','x2']])

mu = np.mean(df.y)

# null model prediction
yhat_null = mu * np.ones(df.shape[0])

# hard decisions ...
threshold = 0.5 
yhat_hard = yhat > threshold
yhat_null_hard = yhat_null > threshold

ytrue = df.y.to_numpy()

dict(
    accuracy = sklearn.metrics.accuracy_score(ytrue, yhat_hard),
    accuracy_null = sklearn.metrics.accuracy_score(ytrue, yhat_null_hard),
    
    balanced_accuracy = sklearn.metrics.balanced_accuracy_score(ytrue, yhat_hard),
    balanced_accuracy_null = sklearn.metrics.balanced_accuracy_score(ytrue, yhat_null_hard),

    recall = sklearn.metrics.recall_score(ytrue, yhat_hard),
    recall_null = sklearn.metrics.recall_score(ytrue, yhat_null_hard),

    precision = sklearn.metrics.precision_score(ytrue, yhat_hard),
    precision_null = sklearn.metrics.precision_score(ytrue, yhat_null_hard),

    f1 = sklearn.metrics.f1_score(ytrue, yhat_hard),
    f1_null = sklearn.metrics.f1_score(ytrue, yhat_null_hard),

    auc_roc = sklearn.metrics.roc_auc_score(ytrue, yhat),
    auc_pr = sklearn.metrics.average_precision_score(ytrue, yhat),
    auc_pr_null = sklearn.metrics.average_precision_score(ytrue, yhat_null),
)



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


{'accuracy': 0.553,
 'accuracy_null': 0.5,
 'balanced_accuracy': 0.5529999999999999,
 'balanced_accuracy_null': 0.5,
 'recall': 0.6,
 'recall_null': 0.0,
 'precision': 0.5484460694698354,
 'precision_null': 0.0,
 'f1': 0.5730659025787965,
 'f1_null': 0.0,
 'auc_roc': 0.547216,
 'auc_pr': 0.46840853414104267,
 'auc_pr_null': 0.5}