# Installing the Libraries

In [None]:
!pip install onnxruntime



# Importing libraries

In [None]:
import pandas as pd
import numpy as np
import onnxruntime as rt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pickle

# Data loading

In [None]:
data = pd.read_csv("data_public.csv")
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Data pre-processing used in creating the Onnx model

In [None]:
def remove_outliers(X, y, threshold=1.5): # Function to remove the Outliers
    Q1 = X.quantile(0.25)
    Q3 = X.quantile(0.75)
    IQR = Q3 - Q1
    mask = ~((X < (Q1 - threshold * IQR)) | (X > (Q3 + threshold * IQR))).any(axis=1)
    X_cleaned = X[mask].reset_index(drop=True)
    y_cleaned = y[mask].reset_index(drop=True)
    return X_cleaned, y_cleaned

In [None]:
X_cleaned, y_cleaned = remove_outliers(X, y)
print(f"Size before outlier removal: {len(data)}")
print(f"Size after outlier removal: {len(X_cleaned)}")

Size before outlier removal: 1200000
Size after outlier removal: 999473


# Reading the same scaler file used in creating the onnx model

In [None]:
with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Independent and dependent variable

In [None]:
X_scaled = scaler.transform(X_cleaned) ## using the scaler.pkl
y_mapped = y_cleaned - 1 # (y-1) because xgboost take 0 as the first class [1,2,3] -> [0,1,2]

# Inference Section

In [None]:
onnx_session = rt.InferenceSession("xgboost_model.onnx")
X_scaled = np.array(X_scaled, dtype=np.float32)
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name

# Predections

In [None]:
predictions = onnx_session.run([output_name], {input_name: X_scaled})[0]
y_pred = predictions.astype(int) + 1

# Accuracy and saving the class prediction

In [None]:
accuracy = accuracy_score(y_cleaned, y_pred)
print(f"Accuracy of the ONNX model: {accuracy:.2%}")


output_data = pd.DataFrame({"Class": y_cleaned, "Predicted_Class": y_pred})
output_data.to_csv("class_predictions.csv", index=False)
print("Predictions saved to 'class_predictions.csv'")

Accuracy of the ONNX model: 77.00%
Predictions saved to 'class_predictions.csv'
