In [33]:
import numpy as np

import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

columns_name = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 
                'safety', 'class']

file_path = '/Users/minjeong/Downloads/car+evaluation/car.data'
data = pd.read_csv(file_path, header=None, names=columns_name)
data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [34]:
# 데이터프레임을 숫자로 변경
# 범주형 데이터(카테고리)를 숫자로 인코딩 하기 위해 사용함
from sklearn.preprocessing import OrdinalEncoder

encoder_buying = OrdinalEncoder(categories=[['vhigh', 'high', 'med', 'low']])
data['buying'] = encoder_buying.fit_transform(data[['buying']])

encoder_maint = OrdinalEncoder(categories=[['vhigh', 'high', 'med', 'low']])
data['maint'] = encoder_maint.fit_transform(data[['maint']])

# 'doors' 열에서 '5more' 값을 6으로 변경
data['doors'] = data['doors'].replace('5more', 6)

# 'persons' 열에서 'more' 값을 5로 변경
data['persons'] = data['persons'].replace('more', 5)

encoder_lug_boot = OrdinalEncoder(categories=[['small', 'med', 'big']])
data['lug_boot'] = encoder_lug_boot.fit_transform(data[['lug_boot']])

encoder_safety = OrdinalEncoder(categories=[['low', 'med', 'high']])
data['safety'] = encoder_safety.fit_transform(data[['safety']])

encoder_class = OrdinalEncoder(categories=[['unacc', 'acc', 'good', 'vgood']])
data['class'] = encoder_class.fit_transform(data[['class']])

data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,0.0,0.0,2,2,0.0,0.0,0.0
1,0.0,0.0,2,2,0.0,1.0,0.0
2,0.0,0.0,2,2,0.0,2.0,0.0
3,0.0,0.0,2,2,1.0,0.0,0.0
4,0.0,0.0,2,2,1.0,1.0,0.0
...,...,...,...,...,...,...,...
1723,3.0,3.0,6,5,1.0,1.0,2.0
1724,3.0,3.0,6,5,1.0,2.0,3.0
1725,3.0,3.0,6,5,2.0,0.0,0.0
1726,3.0,3.0,6,5,2.0,1.0,2.0


In [37]:
x = data.copy()

x = data.drop(columns=['class'], axis=1)
x

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,0.0,0.0,2,2,0.0,0.0
1,0.0,0.0,2,2,0.0,1.0
2,0.0,0.0,2,2,0.0,2.0
3,0.0,0.0,2,2,1.0,0.0
4,0.0,0.0,2,2,1.0,1.0
...,...,...,...,...,...,...
1723,3.0,3.0,6,5,1.0,1.0
1724,3.0,3.0,6,5,1.0,2.0
1725,3.0,3.0,6,5,2.0,0.0
1726,3.0,3.0,6,5,2.0,1.0


In [38]:
y = data['class']
y

0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
1723    2.0
1724    3.0
1725    0.0
1726    2.0
1727    3.0
Name: class, Length: 1728, dtype: float64

In [39]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((1296, 6), (432, 6), (1296,), (432,))

## SupportVectorMachine

In [40]:
# SVM

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC

clf_svm = SVC(random_state=0)
clf_svm.fit(x_train, y_train)

pred_svm = clf_svm.predict(x_test)

print("\n--- SVM Classifier ---")
print(accuracy_score(y_test, pred_svm))
print(confusion_matrix(y_test, pred_svm))


--- SVM Classifier ---
0.9143518518518519
[[286  10   0   0]
 [ 10  85   4   0]
 [  0   9  12   0]
 [  0   3   1  12]]


In [43]:
# LR

from sklearn.linear_model import LogisticRegression

clf_lr = LogisticRegression(random_state=0)
clf_lr.fit(x_train, y_train)

pred_lr = clf_lr.predict(x_test)

print ("\n--- Logistic Regression Classifier ---")
print (accuracy_score(y_test, pred_lr))
print (confusion_matrix(y_test, pred_lr))


--- Logistic Regression Classifier ---
0.8379629629629629
[[280  15   1   0]
 [ 31  64   3   1]
 [  0  11   8   2]
 [  0   5   1  10]]


In [44]:
# DT

from sklearn.tree import DecisionTreeClassifier

clf_dt = DecisionTreeClassifier(random_state=0)
clf_dt.fit(x_train, y_train)

pred_dt = clf_dt.predict(x_test)

print ("\n--- Decision Tree Classifier ---")
print (accuracy_score(y_test, pred_dt))
print (confusion_matrix(y_test, pred_dt))


--- Decision Tree Classifier ---
0.9699074074074074
[[295   1   0   0]
 [  7  88   4   0]
 [  0   0  21   0]
 [  0   1   0  15]]


In [45]:
# RT

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

print ("\n--- Random Forest ---")
rf_clf = RandomForestClassifier(random_state=0)
rf_clf.fit(x_train, y_train)
pred_rf = rf_clf.predict(x_test)
print(accuracy_score(y_test, pred_rf))
print (confusion_matrix(y_test, pred_rf))


--- Random Forest ---
0.9814814814814815
[[295   1   0   0]
 [  0  96   2   1]
 [  0   0  20   1]
 [  0   3   0  13]]
