<a href="https://colab.research.google.com/github/Alexeanred/Car-Evaluation-Web-App/blob/main/Car_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score, precision_score, accuracy_score, plot_confusion_matrix, classification_report, f1_score

In [None]:
url = '/content/car_evaluation.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,buying,maintance,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [None]:
#cols = ['buying','maintance','doors','persons','lug_boot','safety','class']
#df.columns = cols
#df.head(5)

In [None]:
df.info()
# Không có missing value

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   buying     1728 non-null   object
 1   maintance  1728 non-null   object
 2   doors      1728 non-null   object
 3   persons    1728 non-null   object
 4   lug_boot   1728 non-null   object
 5   safety     1728 non-null   object
 6   class      1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


In [None]:
def show(df):
  for i in df.columns:
    print("Feature: {} with {} Levels".format(i,df[i].unique()))
show(df)

Feature: buying with ['vhigh' 'high' 'med' 'low'] Levels
Feature: maintance with ['vhigh' 'high' 'med' 'low'] Levels
Feature: doors with ['2' '3' '4' '5more'] Levels
Feature: persons with ['2' '4' 'more'] Levels
Feature: lug_boot with ['small' 'med' 'big'] Levels
Feature: safety with ['low' 'med' 'high'] Levels
Feature: class with ['unacc' 'acc' 'vgood' 'good'] Levels


In [None]:
df.dtypes

buying       object
maintance    object
doors        object
persons      object
lug_boot     object
safety       object
class        object
dtype: object

In [None]:
categories = [df[i].unique() for i in df.columns]
categories[0] = categories[0][::-1]
categories[1] = categories[1][::-1]
categories[-1][2:] = categories[-1][2:][::-1]

In [None]:
categories

[array(['low', 'med', 'high', 'vhigh'], dtype=object),
 array(['low', 'med', 'high', 'vhigh'], dtype=object),
 array(['2', '3', '4', '5more'], dtype=object),
 array(['2', '4', 'more'], dtype=object),
 array(['small', 'med', 'big'], dtype=object),
 array(['low', 'med', 'high'], dtype=object),
 array(['unacc', 'acc', 'good', 'vgood'], dtype=object)]

In [None]:
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder(categories= categories)
df[df.columns] = oe.fit_transform(df[df.columns])

In [None]:
df.head(5)

Unnamed: 0,buying,maintance,doors,persons,lug_boot,safety,class
0,3.0,3.0,0.0,0.0,0.0,0.0,0.0
1,3.0,3.0,0.0,0.0,0.0,1.0,0.0
2,3.0,3.0,0.0,0.0,0.0,2.0,0.0
3,3.0,3.0,0.0,0.0,1.0,0.0,0.0
4,3.0,3.0,0.0,0.0,1.0,1.0,0.0


In [None]:
x = df.drop(['class'], axis = 1)
y = df['class']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
print("X_train: {}".format(x_train.shape))
print("X_test: {}".format(x_test.shape))
print("Y_train: {}".format(y_train.shape))
print("Y_test: {}".format(y_test.shape))

X_train: (1209, 6)
X_test: (519, 6)
Y_train: (1209,)
Y_test: (519,)


In [None]:
def evaluation_parametrics(y_train,yp_train,y_test,yp_test):
  print("--------------------------------------------------------------------------")
  print("Classification Report for Train Data")
  print(classification_report(y_train, yp_train))
  print("Classification Report for Test Data")
  print(classification_report(y_test, yp_test))
  print("--------------------------------------------------------------------------")
  # Accuracy
  print("Accuracy on Train Data is: {}".format(round(accuracy_score(y_train,yp_train),2)))
  print("Accuracy on Test Data is: {}".format(round(accuracy_score(y_test,yp_test),2)))
  print("--------------------------------------------------------------------------")
  # Precision
  print("Precision on Train Data is: {}".format(round(precision_score(y_train,yp_train,average = "weighted"),2)))
  print("Precision on Test Data is: {}".format(round(precision_score(y_test,yp_test,average = "weighted"),2)))
  print("--------------------------------------------------------------------------")
  # Recall 
  print("Recall on Train Data is: {}".format(round(recall_score(y_train,yp_train,average = "weighted"),2)))
  print("Recall on Test Data is: {}".format(round(recall_score(y_test,yp_test,average = "weighted"),2)))
  print("--------------------------------------------------------------------------")
  # F1 Score
  print("F1 Score on Train Data is: {}".format(round(f1_score(y_train,yp_train,average = "weighted"),2)))
  print("F1 Score on Test Data is: {}".format(round(f1_score(y_test,yp_test,average = "weighted"),2)))
  print("--------------------------------------------------------------------------")

In [None]:
lr = LogisticRegression(max_iter = 1000,random_state = 48)
lr.fit(x_train,y_train)

yp_train = lr.predict(x_train)
yp_test = lr.predict(x_test)

evaluation_parametrics(y_train,yp_train,y_test,yp_test)

--------------------------------------------------------------------------
Classification Report for Train Data
              precision    recall  f1-score   support

         0.0       0.88      0.93      0.90       852
         1.0       0.66      0.58      0.62       266
         2.0       0.53      0.38      0.44        50
         3.0       0.79      0.63      0.70        41

    accuracy                           0.82      1209
   macro avg       0.71      0.63      0.67      1209
weighted avg       0.81      0.82      0.82      1209

Classification Report for Test Data
              precision    recall  f1-score   support

         0.0       0.87      0.93      0.90       358
         1.0       0.66      0.58      0.62       118
         2.0       0.55      0.32      0.40        19
         3.0       0.75      0.75      0.75        24

    accuracy                           0.82       519
   macro avg       0.71      0.64      0.67       519
weighted avg       0.81      0.82    

In [None]:
dt = DecisionTreeClassifier(max_depth = 7,random_state = 48) # Keeping max_depth = 7 to avoid overfitting
dt.fit(x_train,y_train)

yp_train = dt.predict(x_train)
yp_test = dt.predict(x_test)

evaluation_parametrics(y_train,yp_train,y_test,yp_test)

--------------------------------------------------------------------------
Classification Report for Train Data
              precision    recall  f1-score   support

         0.0       0.98      0.97      0.98       852
         1.0       0.86      0.94      0.90       266
         2.0       0.81      0.76      0.78        50
         3.0       0.93      0.68      0.79        41

    accuracy                           0.95      1209
   macro avg       0.90      0.84      0.86      1209
weighted avg       0.95      0.95      0.95      1209

Classification Report for Test Data
              precision    recall  f1-score   support

         0.0       0.97      0.98      0.97       358
         1.0       0.86      0.81      0.83       118
         2.0       0.52      0.58      0.55        19
         3.0       0.77      0.83      0.80        24

    accuracy                           0.92       519
   macro avg       0.78      0.80      0.79       519
weighted avg       0.92      0.92    

In [None]:
rf = RandomForestClassifier(max_depth = 7,random_state = 48) # Keeping max_depth = 7 same as DT
rf.fit(x_train,y_train)

yp_train = rf.predict(x_train)
yp_test = rf.predict(x_test)

evaluation_parametrics(y_train,yp_train,y_test,yp_test)

--------------------------------------------------------------------------
Classification Report for Train Data
              precision    recall  f1-score   support

         0.0       1.00      0.98      0.99       852
         1.0       0.89      0.99      0.94       266
         2.0       0.95      0.78      0.86        50
         3.0       0.97      0.76      0.85        41

    accuracy                           0.97      1209
   macro avg       0.95      0.88      0.91      1209
weighted avg       0.97      0.97      0.97      1209

Classification Report for Test Data
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98       358
         1.0       0.88      0.89      0.89       118
         2.0       0.71      0.63      0.67        19
         3.0       0.88      0.88      0.88        24

    accuracy                           0.94       519
   macro avg       0.86      0.84      0.85       519
weighted avg       0.94      0.94    