# Notebook to test an Onnx-Model - in Batch-Mode and Single Record Mode

# Imports

In [1]:
import joblib
import onnxruntime as ort
import numpy as np
from sklearn.metrics import accuracy_score, recall_score

## Load Preprossessed Data & Encoders:

In [2]:
# Load data and LabelEncoders for later use:

data_folder = "data_preprocessed/"
file_joblib_dump = 'Data_Sachkonto_stratified_All3.pkl'

data = joblib.load(data_folder + file_joblib_dump)
X_train = data["X_train"]
X_test = data["X_test"]
y_train = data["y_train"]
y_test = data["y_test"]
target_label_encoder = data["target_label_encoder"]
column_encoders = data["column_encoders"]

# Load Onnx Model

In [3]:
import joblib

data_folder = "data_preprocessed/"
file_joblib_dump = 'OnnxParams_Sachkonto_stratified_All3.pkl'

onnx_params = joblib.load(data_folder + file_joblib_dump)

onnx_model_name = onnx_params["onnx_model_name"]
trained_features = onnx_params["trained_features"]

## Select Trained Features

In [4]:
print(trained_features)
X_train = X_train[trained_features]
X_test = X_test[trained_features]

['Buchungskreis', 'Lieferant', 'Steuerkennzeichen']


In [None]:
# Load ONNX model
session = ort.InferenceSession(onnx_model_name)

# Load test data (assuming X_test is (23499,3) and y_test is (23499,))
#X_test = np.load("X_test.npy")  # Replace with actual data loading
#y_test = np.load("y_test.npy")  # Replace with actual labels

# Ensure input is float32 (ONNX expects float32)
X_test = X_test.astype(np.float32)

# Get input name from the ONNX model
input_name = session.get_inputs()[0].name

# Run inference
outputs = session.run(None, {input_name: X_test.values})

# Extract predicted values
y_pred = np.array(outputs[0])  # First output is usually the prediction

# If the model is a classifier, get the class with the highest probability
if y_pred.ndim > 1 and y_pred.shape[1] > 1:
    y_pred = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Calculate Recall
recall = recall_score(y_test, y_pred, average="macro")  # Use 'micro' or 'weighted' if needed
print(f"Recall: {recall:.4f}")

""" # Compute Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix) """
# Calculate top k accuracy:
from utils_bsak import top_k_accuracy_factory
top_k = 3
#top_k_preds = np.argsort(outputs[1], axis=1, )[:, ::-1][:, :top_k] # sort probabilites asscending, take top k
top_k_accuracy = top_k_accuracy_factory(top_k=top_k)
print(f"top_{top_k}_accuracy: {top_k_accuracy(y_true=y_test, y_pred_prob=outputs[1])}")

## Plot Straight Confusion Matrix

In [None]:
from utils_bsak import plot_confusion_matrix

plot_confusion_matrix(y_test, y_pred, labels=target_label_encoder.classes_, cmap="Blues")

In [None]:
from utils_bsak import plot_top_k_confusion_matrix


plot_top_k_confusion_matrix(y_test=y_test, y_pred_prob=outputs[1], labels=target_label_encoder.classes_, top_k=top_k, cmap="Blues", show_off_top_k_info=True)