## Random Forest 모델
- Compare  scikit learn vs onnx runtime 

In [2]:
# Iris Data Load 
# Train a model.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
import time

# load iris data
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Random Forest Classifier Fit
from sklearn.ensemble import RandomForestClassifier

# 모델 생성 및 학습
clr = RandomForestClassifier()
clr.fit(X_train, y_train)
clr

(112, 4) (38, 4) (112,) (38,)


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [3]:
# Convert into ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

import onnxruntime as rt
import numpy as np

# onnx 모델 변환
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)

# onnx 모델 지정
with open("rf_iris.onnx", "wb") as f:
    f.write(onx.SerializeToString())

## Inference time 비교

In [7]:
# scikit learn score

start = time.time()

# 1000번 predict
for i in range(1000):
    pred_skl = clr.predict(X_test)
    
end = time.time()

print('skl 걸린시간:', np.double(end-start))
pred_skl

skl 걸린시간: 7.474905729293823


array([2, 2, 2, 0, 1, 2, 1, 1, 0, 2, 1, 1, 2, 1, 1, 2, 2, 2, 1, 0, 1, 1,
       0, 2, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1])

In [8]:
# Compute the prediction with ONNX Runtime
sess = rt.InferenceSession("rf_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

start = time.time()

# 1000번 predict
for i in range(1000):
    pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0]
end = time.time()

print('onnxruntime 걸린시간:', np.double(end-start))
pred_onx

onnxruntime 걸린시간: 0.2982659339904785


array([2, 2, 2, 0, 1, 2, 1, 1, 0, 2, 1, 1, 2, 1, 1, 2, 2, 2, 1, 0, 1, 1,
       0, 2, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1], dtype=int64)

## Scoring - Accuracy 비교

In [6]:
# Accuracy 평가지표
from sklearn.metrics import accuracy_score

# Predict에 대한 Accuracy Score
print('accuracy_score - skl', float(accuracy_score(y_test, pred_skl))) 
print('accuracy_score - onnx', float(accuracy_score(y_test, pred_onx))) 


accuracy_score - skl 0.9210526315789473
accuracy_score - onnx 0.9210526315789473
