In [4]:
from random import random

import numpy as np
import pandas as pd
import onnx
from sklearn import preprocessing
from onnxconverter_common import FloatTensorType
from skl2onnx import convert_sklearn
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
import onnxruntime as rt

In [None]:
def split_data(data_path, feature_columns, target_column, test_size=0.2, random_state=42):
    """
    Split data into train and test sets.

    Parameters:
    - data_path (str): Path to the CSV file containing the dataset.
    - feature_columns (list): List of feature column names in the dataset.
    - target_column (str): Name of the target column in the dataset.
    - test_size (float): Proportion of the dataset to include in the test split.
    - random_state (int): Random seed for reproducibility.

    Returns:
    - tuple: X_train, X_test, y_train, y_test
    """
    data = pd.read_csv(data_path)
    X = feature_columns
    y = data[target_column]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

def evaluate_onnx_model(onnx_model_path, X_test, y_test):
    """
    Evaluate an ONNX model's accuracy on a given test dataset.

    Parameters:
    - onnx_model_path (str): Path to the ONNX model file.
    - X_test (DataFrame): Test features.
    - y_test (Series): True labels for the test set.

    Returns:
    - float: Accuracy of the ONNX model on the test dataset.
    """
    # Load ONNX model and make predictions
    session = rt.InferenceSession(onnx_model_path)
    y_pred_onnx = session.run(None, {'X': X_test.values.astype(np.float32)})[0]

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred_onnx)
    
    print(f'Accuracy of the ONNX model: {accuracy:.4f}')
    return accuracy

def metamorphic_test(onnx_model_path, X_test, y_test):
    """
    Perform metamorphic testing to evaluate the robustness of the ONNX model.

    Parameters:
    - onnx_model_path (str): Path to the ONNX model file.
    - X_test (DataFrame): Test features.
    - y_test (Series): True labels for the test set.

    Returns:
    - dict: Results of the metamorphic tests.
    """
    # Load ONNX model
    session = rt.InferenceSession(onnx_model_path)

    # Original predictions
    original_predictions = session.run(None, {'X': X_test.values.astype(np.float32)})[0]

    # Initialize results dictionary
    results = {
        "original_accuracy": accuracy_score(y_test, original_predictions),
        "tests": []
    }

    # Metamorphic test 1: Adding small noise to numerical features
    noise = np.random.normal(0, 0.01, X_test.shape)
    X_test_noisy = X_test + noise
    noisy_predictions = session.run(None, {'X': X_test_noisy.values.astype(np.float32)})[0]
    
    noisy_accuracy = accuracy_score(y_test, noisy_predictions)
    results["tests"].append({
        "test_name": "Small Noise Addition",
        "accuracy": noisy_accuracy,
        "robustness": noisy_accuracy == results["original_accuracy"]
    })

    # Metamorphic test 2: Shuffling irrelevant features (if any exist)
    # Assuming some features are known to be irrelevant (dummy example below)
    irrelevant_features = ['afspraak_laatstejaar_aantal_woorden', 'belemmering_hist_taal', 'competentie_gedrevenheid_en_ambitie_tonen', 'contacten_onderwerp_boolean__pre__intake', 'deelname_act_actueel_projecten_uniek']  # Replace with actual irrelevant feature names if known
    X_test_shuffled = X_test.copy()
    for feature in irrelevant_features:
        X_test_shuffled[feature] = np.random.permutation(X_test_shuffled[feature].values)

    shuffled_predictions = session.run(None, {'X': X_test_shuffled.values.astype(np.float32)})[0]
    shuffled_accuracy = accuracy_score(y_test, shuffled_predictions)
    results["tests"].append({
        "test_name": "Irrelevant Feature Shuffling",
        "accuracy": shuffled_accuracy,
        "robustness": shuffled_accuracy == results["original_accuracy"]
    })

    # Metamorphic test 3: Scaling numerical features
    scale_factor = 1.1
    X_test_scaled = X_test * scale_factor
    scaled_predictions = session.run(None, {'X': X_test_scaled.values.astype(np.float32)})[0]
    scaled_accuracy = accuracy_score(y_test, scaled_predictions)
    results["tests"].append({
        "test_name": "Feature Scaling",
        "accuracy": scaled_accuracy,
        "robustness": scaled_accuracy == results["original_accuracy"]
    })

    # Print results
    print("Metamorphic Testing Results:")
    for test in results["tests"]:
        print(f"{test['test_name']}: Accuracy = {test['accuracy']:.4f}, Robustness = {test['robustness']}")

    return results

# Example usage
# Assuming the CSV file and ONNX model file paths are correct
data = pd.read_csv('investigation_train_large_checked.csv')
feature_columns = data.drop(['checked', 'Ja', 'Nee'], axis=1)
target_column = "checked"
X_train, X_test, y_train, y_test = split_data("investigation_train_large_checked.csv", feature_columns, target_column)
accuracy = evaluate_onnx_model("model_1.onnx", X_test, y_test)
results = metamorphic_test("model_1.onnx", X_test, y_test)


Accuracy of the ONNX model: 0.9345
Metamorphic Testing Results:
Small Noise Addition: Accuracy = 0.9345, Robustness = True
Irrelevant Feature Shuffling: Accuracy = 0.9345, Robustness = True
Feature Scaling: Accuracy = 0.8533, Robustness = False
