In [17]:
import pandas as pd
import numpy as np
import onnxruntime as rt
import onnx
from skl2onnx.common.data_types import FloatTensorType
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from skl2onnx import convert_sklearn
from scipy.stats import ks_2samp

# Model 2

In [18]:
# Modify the datafiles
train_data = pd.read_csv('data/train_data_2.csv')
test_data = pd.read_csv('data/test_data_2.csv')

y_train = train_data['checked']
X_train = train_data.drop(['checked'], axis=1)
X_train = X_train.astype(np.float32)

y_test = test_data['checked']
X_test = test_data.drop(['checked'], axis=1)
X_test = X_test.astype(np.float32)

In [19]:
model = RandomForestClassifier()

In [20]:
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
original_accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))
print('Accuracy of the original model: ', original_accuracy)

              precision    recall  f1-score   support

           0       0.98      0.53      0.69      2278
           1       0.18      0.92      0.30       251

    accuracy                           0.57      2529
   macro avg       0.58      0.73      0.49      2529
weighted avg       0.90      0.57      0.65      2529

Accuracy of the original model:  0.5662317121391854


In [21]:
# Convert the model to ONNX
onnx_model = convert_sklearn(
    model, initial_types=[('X', FloatTensorType((None, X_train.shape[1])))],
    target_opset=12)

# Check the accuracy of the converted model
sess = rt.InferenceSession(onnx_model.SerializeToString())
y_pred_onnx =  sess.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.5662317121391854


In [22]:
# Save the model
onnx.save(onnx_model, "model/model_2.onnx")

# Load the model
new_session = rt.InferenceSession("model/model_2.onnx")

# Predict the target
y_pred_onnx2 =  new_session.run(None, {'X': X_test.values.astype(np.float32)})

accuracy_onnx_model = accuracy_score(y_test, y_pred_onnx2[0])
print('Accuracy of the ONNX model: ', accuracy_onnx_model)

Accuracy of the ONNX model:  0.5662317121391854
