#### Data: Car Evaluation

In [1]:
import pandas as pd
import numpy as np

from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
X = car_evaluation.data.features 
y = car_evaluation.data.targets 

In [2]:
X.shape, y.shape

((1728, 6), (1728, 1))

In [3]:
# concating the two data extracted from the source
data = pd.concat([X,y], axis = 1)

In [4]:
data.shape

(1728, 7)

In [5]:
data.head(3)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc


In [6]:
data.columns

Index(['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class'], dtype='object')

In [7]:
data.isnull().sum()

buying      0
maint       0
doors       0
persons     0
lug_boot    0
safety      0
class       0
dtype: int64

In [8]:
data['class'].value_counts()

class
unacc    1210
acc       384
good       69
vgood      65
Name: count, dtype: int64

In [9]:
unseen_data = data.sample(frac=0.15, random_state=42)
df = data.drop(unseen_data.index)

In [10]:
unseen_data.to_csv('./Car_Eva/Car_Eva_unseen.csv', index=False)
df.to_csv('./Car_Eva/CarEvaluation.csv', index=False)

In [11]:
data = pd.read_csv('./Car_Eva/CarEvaluation.csv')

In [12]:
data.head(1)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc


In [13]:
from pycaret.classification import ClassificationExperiment
s = ClassificationExperiment()
s.setup(data, target = 'class', session_id = 42)

Unnamed: 0,Description,Value
0,Session id,42
1,Target,class
2,Target type,Multiclass
3,Target mapping,"acc: 0, good: 1, unacc: 2, vgood: 3"
4,Original data shape,"(1469, 7)"
5,Transformed data shape,"(1469, 22)"
6,Transformed train set shape,"(1028, 22)"
7,Transformed test set shape,"(441, 22)"
8,Categorical features,6
9,Preprocess,True


<pycaret.classification.oop.ClassificationExperiment at 0x1b1d44f9690>

In [14]:
best = s.compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9844,0.0,0.9844,0.9865,0.9844,0.9657,0.9662,0.462
catboost,CatBoost Classifier,0.9825,0.0,0.9825,0.9843,0.9827,0.9615,0.962,1.535
gbc,Gradient Boosting Classifier,0.9747,0.0,0.9747,0.9772,0.9749,0.9446,0.9453,0.207
rf,Random Forest Classifier,0.9689,0.0,0.9689,0.9709,0.9675,0.931,0.932,0.091
et,Extra Trees Classifier,0.9679,0.0,0.9679,0.9692,0.9667,0.9288,0.9294,0.093
dt,Decision Tree Classifier,0.9621,0.0,0.9621,0.9638,0.9616,0.916,0.9168,0.031
lr,Logistic Regression,0.9115,0.0,0.9115,0.9194,0.909,0.8056,0.8092,0.87
knn,K Neighbors Classifier,0.9076,0.0,0.9076,0.899,0.896,0.788,0.7914,0.052
lda,Linear Discriminant Analysis,0.8959,0.0,0.8959,0.9092,0.8938,0.7767,0.7817,0.035
svm,SVM - Linear Kernel,0.89,0.0,0.89,0.8963,0.8861,0.7538,0.7585,0.038


In [15]:
s.models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Logistic Regression,sklearn.linear_model._logistic.LogisticRegression,True
knn,K Neighbors Classifier,sklearn.neighbors._classification.KNeighborsCl...,True
nb,Naive Bayes,sklearn.naive_bayes.GaussianNB,True
dt,Decision Tree Classifier,sklearn.tree._classes.DecisionTreeClassifier,True
svm,SVM - Linear Kernel,sklearn.linear_model._stochastic_gradient.SGDC...,True
rbfsvm,SVM - Radial Kernel,sklearn.svm._classes.SVC,False
gpc,Gaussian Process Classifier,sklearn.gaussian_process._gpc.GaussianProcessC...,False
mlp,MLP Classifier,sklearn.neural_network._multilayer_perceptron....,False
ridge,Ridge Classifier,sklearn.linear_model._ridge.RidgeClassifier,True
rf,Random Forest Classifier,sklearn.ensemble._forest.RandomForestClassifier,True


In [16]:
lightgbm = s.create_model('lightgbm')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9515,0.0,0.9515,0.9645,0.9513,0.89,0.8926
1,0.9903,0.0,0.9903,0.9922,0.9908,0.9787,0.979
2,1.0,0.0,1.0,1.0,1.0,1.0,1.0
3,0.9903,0.0,0.9903,0.9907,0.9904,0.979,0.9793
4,0.9806,0.0,0.9806,0.9829,0.9811,0.9586,0.9595
5,0.9806,0.0,0.9806,0.9825,0.9811,0.9582,0.9585
6,0.9903,0.0,0.9903,0.9904,0.9902,0.9786,0.9789
7,0.9903,0.0,0.9903,0.9907,0.9899,0.9787,0.9789
8,0.9902,0.0,0.9902,0.9906,0.9893,0.978,0.9783
9,0.9804,0.0,0.9804,0.9804,0.9804,0.9566,0.9566


In [17]:
lightgbm

In [18]:
tune_lightgbm = s.tune_model(lightgbm)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9417,0.0,0.9417,0.9542,0.9445,0.8704,0.8724
1,0.9515,0.0,0.9515,0.9588,0.9534,0.896,0.8979
2,0.9612,0.0,0.9612,0.9671,0.9627,0.9168,0.9175
3,0.9806,0.0,0.9806,0.9807,0.9799,0.957,0.9573
4,0.9612,0.0,0.9612,0.9645,0.9612,0.9166,0.918
5,0.9417,0.0,0.9417,0.9437,0.9416,0.8729,0.8731
6,0.9709,0.0,0.9709,0.971,0.97,0.9349,0.9358
7,0.9515,0.0,0.9515,0.9532,0.9459,0.8907,0.8925
8,0.951,0.0,0.951,0.9555,0.9492,0.891,0.8923
9,0.951,0.0,0.951,0.969,0.9557,0.8943,0.897


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [19]:
final_lightgbm = s.finalize_model(tune_lightgbm)

In [20]:
final_lightgbm

In [21]:
s.evaluate_model(final_lightgbm)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [22]:
data = s.predict_model(final_lightgbm)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Light Gradient Boosting Machine,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [23]:
data[data['class'] !=data['prediction_label']]

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class,prediction_label,prediction_score


In [24]:
data = pd.read_csv('./Car_Eva/CarEvaluation.csv')

In [25]:
data.head(1)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc


In [26]:
from pycaret.classification import setup, models, create_model, tune_model, finalize_model, save_model, evaluate_model
cls = setup(data, target='class', session_id = 42)

Unnamed: 0,Description,Value
0,Session id,42
1,Target,class
2,Target type,Multiclass
3,Target mapping,"acc: 0, good: 1, unacc: 2, vgood: 3"
4,Original data shape,"(1469, 7)"
5,Transformed data shape,"(1469, 22)"
6,Transformed train set shape,"(1028, 22)"
7,Transformed test set shape,"(441, 22)"
8,Categorical features,6
9,Preprocess,True


In [27]:
lightgbm = create_model('lightgbm')
lightgbm

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9515,0.0,0.9515,0.9645,0.9513,0.89,0.8926
1,0.9903,0.0,0.9903,0.9922,0.9908,0.9787,0.979
2,1.0,0.0,1.0,1.0,1.0,1.0,1.0
3,0.9903,0.0,0.9903,0.9907,0.9904,0.979,0.9793
4,0.9806,0.0,0.9806,0.9829,0.9811,0.9586,0.9595
5,0.9806,0.0,0.9806,0.9825,0.9811,0.9582,0.9585
6,0.9903,0.0,0.9903,0.9904,0.9902,0.9786,0.9789
7,0.9903,0.0,0.9903,0.9907,0.9899,0.9787,0.9789
8,0.9902,0.0,0.9902,0.9906,0.9893,0.978,0.9783
9,0.9804,0.0,0.9804,0.9804,0.9804,0.9566,0.9566


In [28]:
tuned_lightgbm = tune_model(lightgbm)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.9417,0.0,0.9417,0.9542,0.9445,0.8704,0.8724
1,0.9515,0.0,0.9515,0.9588,0.9534,0.896,0.8979
2,0.9612,0.0,0.9612,0.9671,0.9627,0.9168,0.9175
3,0.9806,0.0,0.9806,0.9807,0.9799,0.957,0.9573
4,0.9612,0.0,0.9612,0.9645,0.9612,0.9166,0.918
5,0.9417,0.0,0.9417,0.9437,0.9416,0.8729,0.8731
6,0.9709,0.0,0.9709,0.971,0.97,0.9349,0.9358
7,0.9515,0.0,0.9515,0.9532,0.9459,0.8907,0.8925
8,0.951,0.0,0.951,0.9555,0.9492,0.891,0.8923
9,0.951,0.0,0.951,0.969,0.9557,0.8943,0.897


Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [29]:
final_lightgbm = finalize_model(tuned_lightgbm)
final_lightgbm

In [30]:
save_model(final_lightgbm, './Car_Eva/Car_Eva_Unseen')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('label_encoding',
                  TransformerWrapperWithInverse(exclude=None, include=None,
                                                transformer=LabelEncoder())),
                 ('numerical_imputer',
                  TransformerWrapper(exclude=None, include=[],
                                     transformer=SimpleImputer(add_indicator=False,
                                                               copy=True,
                                                               fill_value=None,
                                                               keep_empty_features=False,
                                                               missing_values=nan,
                                                               strategy='mean'))),
                 ('...
                  LGBMClassifier(boosting_type='gbdt', class_weight=None,
                                 colsample_bytree=1.0, importance_type='split

In [31]:
import joblib

from sklearn.metrics import classification_report
test = pd.read_csv('./Car_Eva/Car_Eva_unseen.csv')

In [32]:
test.shape

(259, 7)

In [33]:
cls = joblib.load('./Car_Eva/Car_Eva_Unseen.pkl')
cls

In [34]:
unseen_X = test.drop('class', axis=1)
y = test['class']

In [35]:
y_pred = cls.predict(unseen_X)
y_pred

0      unacc
1      unacc
2      unacc
3        acc
4      unacc
       ...  
254    unacc
255    unacc
256    unacc
257    unacc
258      acc
Name: class, Length: 259, dtype: object

In [36]:
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

         acc       1.00      0.93      0.96        59
        good       0.83      1.00      0.91        10
       unacc       0.99      1.00      0.99       179
       vgood       1.00      1.00      1.00        11

    accuracy                           0.98       259
   macro avg       0.96      0.98      0.97       259
weighted avg       0.99      0.98      0.98       259



This classification result shows how well this model performed in predicting different classes in Car Evaluation dataset. Here's the explanation of each metric:

Precision: It measures the accuracy of the positive predictions made by the modeli.ele, for class 'acc', the precision is 1.00, which means that all instances predicted as 'acc' were correct.

Recall: It measures the ability of the model to correctly identify all positive instanci.emple, for class 'good', the recall is 1.00, which means that all actual instances of 'good' were correctly predicted by the model.

F1-score: It is the harmonic mean of precision and recall, providing a single score that balances both metrics. It's usefulween you want to consider both precision and recall together.

Support: It indicates the number of actual instances of each cCar Evaluations in the dataset.

Accuracy: It measures the overall correctness of the model across allthereforesses. Here, the accuracy is 0.98, which means that the model correctly predicted 98% of all instances in the dataset.

The "macro avg" and "weighted avg" are the averages of precision, recall, and F1-score across all classes. "Macro avg" gives equal weight to each class, while "weighted avg" takes into account the class imbalance by weighting each class's score by its support.

Overall, this classification result suggests that the model has excellent performance, with high precision, recall, and F1-score for most classes, and a high overall accuracy of 98%.