## Random Forest 모델
- Compare  scikit learn vs onnx runtime 

In [28]:
# Iris Data Load 
# Train a model.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
import time

# load iris data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# KNN Model
from sklearn.neighbors import KNeighborsClassifier

# 모델 생성 및 학습
clr = KNeighborsClassifier(n_neighbors=3)
clr.fit(X_train, y_train)
clr

(112, 4) (38, 4) (112,) (38,)


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [29]:
# Convert into ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

import onnxruntime as rt
import numpy as np

# onnx 모델 변환
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)

# onnx 모델 지정
with open("knn_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())

## Inference time 비교

In [30]:
# scikit learn score

start = time.time()

# 1000번 predict
for i in range(1000):
    pred_skl = clr.predict(X_test)
end = time.time()

print('skl 걸린시간:', np.double(end-start))
pred_skl

skl 걸린시간: 1.5183219909667969


array([0, 1, 0, 2, 1, 1, 1, 1, 2, 0, 2, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1,
       2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2, 2, 0])

In [31]:
# Compute the prediction with ONNX Runtime
sess = rt.InferenceSession('knn_iris.onnx')
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

start = time.time()

# 1000번 predict
for i in range(1000):
    pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0]
end = time.time()

print('onnxruntime 걸린시간:', np.double(end-start))
pred_onx

onnxruntime 걸린시간: 1.7948105335235596


array([0, 1, 0, 2, 1, 1, 1, 1, 2, 0, 2, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1,
       2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 2, 1, 2, 2, 2, 0], dtype=int64)

## Scoring - Accuracy 비교

In [32]:
# Accuracy 평가지표
from sklearn.metrics import accuracy_score

# Predict에 대한 Accuracy Score
print('accuracy_score - skl', float(accuracy_score(y_test, pred_skl))) 
print('accuracy_score - onnx', float(accuracy_score(y_test, pred_onx))) 


accuracy_score - skl 0.9736842105263158
accuracy_score - onnx 0.9736842105263158
