# Feature Evaluation
We will now evaluate how each model evaluates features by randomly shuffling a single column in turn and seeing how this impacts the binary accuracy score. Features of greater importance should effect this score to a larger degree. Thus feature importance is proportional to the magnitude of the change in the performance metric.

In [23]:
import pandas as pd
from joblib import load
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use([hep.style.ROOT, hep.style.firamath])

In [24]:
version = '9.0.5'
train = pd.read_csv(f'../data_files/{version}/train.csv', index_col=[0])
X_train = train.drop(['category', 'Lb_M', 'IsSimulated'], axis=1)
y_train = train.category

val = df = pd.read_csv(f'../data_files/{version}/val.csv', index_col=[0])
X_val = val.drop(['category', 'Lb_M', 'IsSimulated'], axis=1)
y_val = val.category

test = df = pd.read_csv(f'../data_files/{version}/test.csv', index_col=[0])
X_test = test.drop(['category', 'Lb_M', 'IsSimulated'], axis=1)
y_test = test.category

# Initial Accuracies

In [25]:
nn = tf.keras.models.load_model(f'../neural_network/models/v{version}')
old_nn = tf.keras.models.load_model(f'../neural_network/models/v{version}')
knn = load(f'../classification_methods/models/KNN_{version}_tune.joblib')
rfc = load(f'../classification_methods/models/RFC_{version}_tune.joblib')
dtc = load(f'../classification_methods/models/DTC_{version}_tune.joblib')
xgb = load(f'../classification_methods/models/XGB_{version}.joblib')

models = {'NN': nn, 'KNN': knn, 'RFC': rfc, 'DTC': dtc, 'OLDNN': old_nn, 'XGB': xgb}

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [68]:
cols_to_shuffle = X_train.columns.to_list()

In [76]:
orig_accs = []
for name, model in models.items():
    try:
        preds = model.predict_proba(X_train.to_numpy())[:,1]
    except:
        preds = model.predict(X_train.to_numpy()).flatten()
    pred_class = np.where(preds>0.5, 1, 0).flatten()
    orig_acc = accuracy_score(y_train, pred_class)
    orig_accs.append(orig_acc)
    print(f"Accuracy: {orig_acc:.5f}")

Accuracy: 0.95140
Accuracy: 0.94146
Accuracy: 0.95360
Accuracy: 0.92442
Accuracy: 0.95140
Accuracy: 0.96276


# Randomly Shuffle

In [70]:
all_new_accs = []

for name, model in models.items():
    new_accs = []
    for col in cols_to_shuffle:
        dummy = X_train.copy()
        # Do this each time to ensure the old changes aren't permuted across
        dummy[col] = np.random.permutation(X_train[col])

        try:
            preds = model.predict_proba(dummy.to_numpy())[:,1]
        except:
            preds = model.predict(dummy.to_numpy()).flatten()

        pred_class = np.where(preds>0.5, 1, 0).flatten()
        new_acc = accuracy_score(y_train, pred_class)
        new_accs.append(new_acc)
    all_new_accs.append(new_accs)

# Plot Results

In [82]:
a = pd.DataFrame({'Feature': cols_to_shuffle})
for i, accs in enumerate(all_new_accs):
    change_in_acc = orig_accs[i] - np.array(accs)
    # The importance of the feauture is proportional to the change in accuracy 
    importance = change_in_acc * (1/np.max(change_in_acc))
    importance /= np.sum(importance)
    a[list(models.keys())[i]] = importance

a = a.sort_values('OLDNN', ascending=False)
a.to_csv('permutation_importance.csv', index=False, header=True)