## Linear Regerssion 모델
- Compare  scikit learn vs onnx runtime 

In [5]:
# Boston Data Load 
from sklearn.datasets import load_boston 
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import time

# load boston data
boston = load_boston() # boston dataset load

# 데이터 프레임으로 변환
df = pd.DataFrame(data=boston.data, columns=boston.feature_names)
df['price'] = boston.target

x = pd.DataFrame(np.c_[df["LSTAT"], df["RM"]], columns=["LSTAT", "RM"])
y = df["price"]

x_train, x_test, y_train, y_test = train_test_split(x, y)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# Linear regression Model
from sklearn.linear_model import LinearRegression

# 모델 생성 및 학습
model = LinearRegression()
model.fit(x_train, y_train)
model

(379, 2) (127, 2) (379,) (127,)


LinearRegression()

In [9]:
# Convert into ONNX format
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

import onnxruntime as rt
import numpy as np

# onnx 모델 변환
initial_type = [('float_input', FloatTensorType([None, 2]))]
onx = convert_sklearn(model, initial_types=initial_type)

# onnx 모델 지정
with open("linear_regression.onnx", "wb") as f:
    f.write(onx.SerializeToString())

## Inference time 비교

In [40]:
# scikit learn score
start = time.time()

# 1000번 predict
for i in range(1000):
    pred_skl = model.predict(x_test.values)
end = time.time()

print('skl 걸린시간:', np.double(end-start))

# Compute the prediction with ONNX Runtime
sess = rt.InferenceSession('linear_regression.onnx')
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

start = time.time()

# 1000번 predict
for i in range(1000):
    pred_onx = sess.run([label_name], {input_name: x_test.values.astype(np.float32)})[0]
end = time.time()

print('onnxruntime 걸린시간:', np.double(end-start))

skl 걸린시간: 0.074798583984375
onnxruntime 걸린시간: 0.0359036922454834


## Scoring - Accuracy 비교

In [39]:

from sklearn.metrics import mean_squared_error, r2_score

# skl rmse, r2
skl_rmse = (np.sqrt(mean_squared_error(y_test, pred_skl)))
skl_r2 = r2_score(y_test, y_train_predict)

# onnx rmse, r2
onnx_rmse = (np.sqrt(mean_squared_error(y_test, pred_onx)))
onnx_r2 = r2_score(y_test, pred_onx)

print("skl - RMSE : {}, R2 Score : {}".format(rmse, r2))
print("onnx - RMSE : {}, R2 Score : {}".format(rmse, r2))

skl - RMSE : 4.7971590313883885, R2 Score : 0.6778666277133942
onnx - RMSE : 4.7971590313883885, R2 Score : 0.6778666277133942
