In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import pickle


In [2]:
from sklearn import preprocessing
# Allows us to split our data into training and testing data
from sklearn.model_selection import train_test_split
# Allows us to test parameters of classification algorithms and find the best one
from sklearn.model_selection import GridSearchCV
# Logistic Regression classification algorithm
from sklearn.linear_model import LogisticRegression , LinearRegression 
# Support Vector Machine classification algorithm
from sklearn.svm import SVC , LinearSVC , NuSVC, OneClassSVM, SVR
# Decision Tree classification algorithm
from sklearn.tree import DecisionTreeClassifier , ExtraTreeClassifier , DecisionTreeRegressor , ExtraTreeRegressor 
# K Nearest Neighbors classification algorithm
from sklearn.neighbors import KNeighborsClassifier , RadiusNeighborsClassifier , KNeighborsRegressor , RadiusNeighborsRegressor

In [3]:
#imports for onnx
import onnxruntime as rt 
from skl2onnx import convert_sklearn ,to_onnx
from skl2onnx.common.data_types import FloatTensorType

In [4]:
from sklearn import config_context
from tqdm import tqdm
from skl2onnx.tutorial import measure_time
from pandas import DataFrame
from onnx.reference import ReferenceEvaluator

In [5]:
obesity = pd.read_csv('estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.csv')
obesity.head()

Unnamed: 0,Height,Weight,family_history_with_overweight,SCC,MTRANS_Walking,FAVC_z,FCVC_minmax,NCP_z,CAEC_minmax,CH2O_minmax,FAF_minmax,TUE_z,CALC_z,Age_bin_minmax,NObeyesdad
0,1.62,64.0,1,0,0,2.766876,0.5,0.404704,0.333333,0.5,0.0,0.550985,1.439033,0.25,1
1,1.52,56.0,1,1,0,2.766876,1.0,0.404704,0.333333,1.0,1.0,1.092724,0.516552,0.25,1
2,1.8,77.0,1,0,0,2.766876,0.5,0.404704,0.333333,0.5,0.666667,0.550985,2.472136,0.5,1
3,1.8,87.0,0,0,1,2.766876,1.0,0.404704,0.333333,0.5,0.666667,1.092724,2.472136,0.75,2
4,1.78,89.8,0,0,0,2.766876,0.5,2.164116,0.333333,0.5,0.0,1.092724,0.516552,0.5,3


In [6]:
#lets build a multi logistic model and all the features
Y = obesity['NObeyesdad']
X = obesity.drop(['NObeyesdad'], axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [21]:
#lets build a multi support vector machine model and all the features with grid search
parameters ={'C':[0.1,1,10],'kernel':['linear','rbf']}
svm = SVC()
clf = GridSearchCV(svm, parameters, cv=10)
clf.fit(X_train, Y_train)
print('Best parameters:', clf.best_params_)
Y_pred = clf.predict(X_test)
print('Accuracy of SVM classifier on test set: {:.2f}'.format(clf.score(X_test, Y_test)))
#make the model with pipeline
#save the model


Best parameters: {'C': 10, 'kernel': 'linear'}
Accuracy of SVM classifier on test set: 0.96


In [24]:
import pickle
filename = 'svm_model.pkl'
pickle.dump(clf, open(filename, 'wb'))

loaded_model = pickle.load(open(filename , 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)


0.9569377990430622


# 1st trial of onnx issue(collum name not being used)

In [8]:
#save the model
initial_type = [('float_input', FloatTensorType([None, 14]))]
onx = convert_sklearn(clf, initial_types=initial_type)
with open("obesity_1st_trial.onnx", "wb") as f:
    f.write(onx.SerializeToString())

In [9]:
sess = rt.InferenceSession("obesity.onnx", providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name

pred_onx = sess.run([label_name], {input_name: [X_test.iloc[2].values.astype(np.float32)]})[0]

In [10]:
pred_onx[0]

3

In [11]:
Y_test.iloc[2]

3

In [12]:
input_name

'float_input'

In [13]:
label_name

'label'

In [32]:
import time
#compare onx with sklearn time and accuracy
X_TEST = X_test.values.astype(np.float32)
Y_TEST = Y_test.values.astype(np.float32)

def to_onnx_model(X_TEST):
    sess = rt.InferenceSession("obesity.onnx", providers=["CPUExecutionProvider"])
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    pred_onx = sess.run([label_name], {input_name: X_TEST})[0]
    pred = pred_onx[0]
    return pred

def to_sklearn_model(X_TEST):
    loaded_model = pickle.load(open(filename , 'rb'))
    Y_pred = loaded_model.score(X_TEST, Y_TEST)
    return Y_pred


def Compare_time(X_TEST):
    start = time.time()
    to_onnx_model(X_TEST)
    end = time.time()
    onnx_time = end - start
    start = time.time()
    to_sklearn_model(X_TEST)
    end = time.time()
    sklearn_time = end - start
    return onnx_time, sklearn_time

time_onnx, time_sklearn = Compare_time(X_TEST)
print("ONNX full set  time: ", time_onnx)
print("SKlearn full set time: ", time_sklearn)

#now compare for single prediction per time whole test set
def compare_single_prediction_onnx():
    start = time.time()
    onnx_preds= to_onnx_model([X_TEST[3]])
    end = time.time()
    onnx_time = end - start
    return onnx_time

def compare_single_prediction_sklearn():
    start = time.time()
    loaded_model = pickle.load(open(filename , 'rb'))
    Y_pred = loaded_model.predict([X_TEST[3]])

    end = time.time()
    sklearn_time = end - start
    return sklearn_time

onnx_time = compare_single_prediction_onnx()
sklearn_time = compare_single_prediction_sklearn()
print("ONNX single time: ", onnx_time)
print("SKlearn single time: ", sklearn_time)
    

ONNX full set  time:  0.04570412635803223
SKlearn full set time:  0.00673675537109375
ONNX single time:  0.033486127853393555
SKlearn single time:  0.0


