## 회귀 to ONNX

In [3]:
from sklearn.datasets import load_boston #scikit-learn의 datasets에서 sample data import
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

boston = load_boston() # boston dataset load
print(boston.keys()) # 각 key 확인
# print(boston.DESCR) # boston datasets description

# 데이터 프레임으로 변환
df = pd.DataFrame(data=boston.data, columns=boston.feature_names)
df['price'] = boston.target

# x, y 설정
x = pd.DataFrame(np.c_[df["LSTAT"], df["RM"]], columns=["LSTAT", "RM"])
y = df["price"]

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])


In [4]:
# trian, test set 분리
x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    test_size=0.2, 
                                                    random_state=5)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# 학습
model = LinearRegression()
model.fit(x_train, y_train)

(404, 2) (102, 2) (404,) (102,)


LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [5]:
# train set에 대한 모델 평가
y_train_predict = model.predict(x_train)
rmse = (np.sqrt(mean_squared_error(y_train, y_train_predict)))
r2 = r2_score(y_train, y_train_predict)
print("--train set")
print("price is {}".format(rmse))
print("R2 Score is {}".format(r2))

# test set에 대한 모델 평가
y_train_predict = model.predict(x_test)
rmse = (np.sqrt(mean_squared_error(y_test, y_train_predict)))
r2 = r2_score(y_test, y_train_predict)
print("--test set")
print("price is {}".format(rmse))
print("R2 Score is {}".format(r2))

--train set
price is 5.6371293350711955
R2 Score is 0.6300745149331701
--test set
price is 5.137400784702911
R2 Score is 0.6628996975186952


In [6]:
# Convert into ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(model, initial_types=initial_type)

# .onnx 모델 지정
with open("regression_boston.onnx", "wb") as f:
    f.write(onx.SerializeToString())

print('회귀 onnx 모델 변형 완료')

회귀 onnx 모델 변형 완료


## Inference time 비교

In [7]:
# scikit learn score

import time

start = time.time()

# 1000번 predict
for i in range(1000):
    pred_skl = model.predict(x_test)
    
end = time.time()

print('skl 걸린시간:', np.double(end-start))

skl 걸린시간: 0.42650699615478516


In [None]:
# Compute the prediction with ONNX Runtime

import onnxruntime as rt
import numpy as np

sess = rt.InferenceSession("regression_boston.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

print(input_name, label_name)

start = time.time()

# 1000번 predict
for i in range(1):
    print('h')
    pred_onx = sess.run([label_name], {input_name: x_test.astype(np.float32)})[0]
end = time.time()

print('onnxruntime 걸린시간:', np.double(end-start))