In [None]:
pip install ucimlrepo

In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
X = car_evaluation.data.features 
y = car_evaluation.data.targets 
  
# metadata 
print(car_evaluation.metadata) 
  
# variable information 
print(car_evaluation.variables) 


In [None]:
import pandas as pd

In [None]:
X.head()

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
ohe = OneHotEncoder(sparse=False)

In [None]:
column_trans = make_column_transformer(
    (OneHotEncoder(), ['buying','maint', 'doors','persons','lug_boot','safety']),
    sparse_threshold=0
)

In [None]:
X1 = column_trans.fit_transform(X)

In [None]:
pd.DataFrame(X1).head()

In [None]:
y.replace(to_replace=['unacc', 'acc','good','vgood'], value=[1, 2, 3, 4], inplace=True)

In [None]:
y['class'].head()

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

In [None]:
logreg = LogisticRegression()

In [None]:
logistic_regression_pipeline = make_pipeline(column_trans, logreg)

In [None]:
X.head()

In [None]:
y.head()

In [None]:
cross_val_score(logistic_regression_pipeline, X, y, cv=10, scoring='accuracy').mean()

In [None]:
# try K=1 through K=25 and record testing accuracy
cv_range = list(range(4, 25))
scores = []
for i in cv_range:
    scores.append(cross_val_score(logistic_regression_pipeline, X, y, cv=i, scoring='accuracy').mean())

In [None]:
# import Matplotlib (scientific plotting library)
import matplotlib.pyplot as plt

# allow plots to appear within the notebook
%matplotlib inline

# plot the relationship between K and testing accuracy
plt.plot(cv_range, scores)
plt.xlabel('cv_range')
plt.ylabel('scores')

In [None]:
# try K=1 through K=25 and record testing accuracy
k_range = list(range(1, 26))
scores = []
from sklearn.neighbors import KNeighborsClassifier
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn_pipeline = make_pipeline(column_trans, knn)
    scores.append(cross_val_score(knn_pipeline, X, y, cv=10, scoring='accuracy').mean())

In [None]:
plt.plot(k_range, scores)
plt.xlabel('k_range')
plt.ylabel('scores')

In [None]:

pip install pydot onnx onnxruntime skl2onnx onnx-tool

In [None]:
import pprint 
from numpy.testing import assert_almost_equal
import onnx
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer 
import onnxruntime as rt
import skl2onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType, StringTensorType
from skl2onnx.common.data_types import Int64TensorType

In [None]:
def convert_dataframe_schema(df, drop=None):
    inputs = []
    for k, v in zip(df.columns, df.dtypes):
        if drop is not None and k in drop:
            continue
        if v == "int64":
            t = Int64TensorType([None, 1])
        elif v == "float64":
            t = FloatTensorType([None, 1])
        else:
            t = StringTensorType([None, 1])
        inputs.append((k, t))
    return inputs


initial_inputs = convert_dataframe_schema(X)

pprint.pprint(initial_inputs)

In [None]:
#Convert the pipeline to ONNX
try:
    model_onnx = convert_sklearn(
        logistic_regression_pipeline, "logistic_regression_pipeline", initial_inputs
    )
    # And save.
    with open("pipeline_car_eval.onnx", "wb") as f:
        f.write(model_onnx.SerializeToString())
except Exception as e:
    print(e)