In [4]:
import pandas as pd
from joblib import load
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use([hep.style.ROOT, hep.style.firamath])

In [32]:
version = '8.0.5'
train = pd.read_csv(f'../data_files/{version}/train.csv', index_col=[0])
X_train = train.drop(['category', 'Lb_M', 'IsSimulated', 'QSQR'], axis=1)
y_train = train.category

test = df = pd.read_csv(f'../data_files/{version}/test.csv', index_col=[0])
X_test = test.drop(['category', 'Lb_M', 'IsSimulated', 'QSQR'], axis=1)
y_test = test.category

In [30]:
nn = tf.keras.models.load_model(f'../model_evaluation/models/NN_{version}')
knn = load(f'../model_evaluation/models/KNN_{version}.joblib')
rfc = load(f'../model_evaluation/models/RFC_{version}.joblib')
dtc = load(f'../model_evaluation/models/DTC_{version}.joblib')
xgb = load(f'../model_evaluation/models/XGB_{version}.joblib')

models = {'_NN': nn, 'KNN': knn, 'RFC': rfc, 'DTC': dtc, 'XGB': xgb}

## Generate Predictions

In [33]:
train_preds = {}
test_preds = {}

for name, model in models.items():
    try:
        train_preds[name] = model.predict_proba(X_train.to_numpy())[:,1]
        test_preds[name] = model.predict_proba(X_test.to_numpy())[:,1]
    except:
        train_preds[name] = model.predict(X_train.to_numpy()).flatten()
        test_preds[name] = model.predict(X_test.to_numpy()).flatten()



In [35]:
print("F1 Scores\n=========")
for name, pred in train_preds.items():
    metric = tfa.metrics.F1Score(num_classes=1, threshold=0.8)
    metric.update_state(np.array([[i] for i in y_train.to_numpy()]), np.array([[i] for i in pred]))
    train_f1 = metric.result()
    # Reset metric
    metric = tfa.metrics.F1Score(num_classes=1, threshold=0.8)
    metric.update_state(np.array([[i] for i in y_test.to_numpy()]), np.array([[i] for i in test_preds[name]]))
    test_f1 = metric.result()
    print("\nModel | Train | Test")
    print(f"{name}   : {train_f1.numpy()[0]:.3f}   {test_f1.numpy()[0]:.3f}\n")

F1 Scores

Model | Train | Test
_NN   : 0.931   0.924


Model | Train | Test
KNN   : 0.873   0.860


Model | Train | Test
RFC   : 0.842   0.821


Model | Train | Test
DTC   : 0.848   0.842


Model | Train | Test
XGB   : 0.799   0.798

