In [1]:
import pandas as pd
import seaborn as sns
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics

#### Data: https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data

In [2]:
columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
df = pd.read_csv("car.data", names=columns)

In [3]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [4]:
len(df)

1728

In [5]:
df['class'].value_counts()

unacc    1210
acc       384
good       69
vgood      65
Name: class, dtype: int64

In [6]:
cm = sns.light_palette("green", as_cmap=True)
pd.crosstab([df['buying'], df['maint']], df['class']).style.background_gradient(cmap=cm)

Unnamed: 0_level_0,class,acc,good,unacc,vgood
buying,maint,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
high,high,36,0,72,0
high,low,36,0,72,0
high,med,36,0,72,0
high,vhigh,0,0,108,0
low,high,33,0,62,13
low,low,10,23,62,13
low,med,10,23,62,13
low,vhigh,36,0,72,0
med,high,36,0,72,0
med,low,10,23,62,13


In [7]:
pd.crosstab([df['doors'], df['persons']], df['class']).style.background_gradient(cmap=cm)

Unnamed: 0_level_0,class,acc,good,unacc,vgood
doors,persons,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,2,0,0,144,0
2,4,48,9,82,5
2,more,33,6,100,5
3,2,0,0,144,0
3,4,48,9,82,5
3,more,51,9,74,10
4,2,0,0,144,0
4,4,51,9,74,10
4,more,51,9,74,10
5more,2,0,0,144,0


In [8]:
pd.crosstab([df['lug_boot'], df['safety']], df['class']).style.background_gradient(cmap=cm)

Unnamed: 0_level_0,class,acc,good,unacc,vgood
lug_boot,safety,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
big,high,64,0,88,40
big,low,0,0,192,0
big,med,80,24,88,0
med,high,70,9,88,25
med,low,0,0,192,0
med,med,65,15,112,0
small,high,70,21,101,0
small,low,0,0,192,0
small,med,35,0,157,0


In [9]:
feature_columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
df_sub = df.copy()
df_sub = pd.get_dummies(df, columns=feature_columns)
df_sub['class'] = df_sub['class'].map({'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3})
X = df_sub[df_sub.columns[1:]]
y = df_sub['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [10]:
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [11]:
y_pred = clf.predict(X_test)

In [12]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.928709055876686


In [13]:
print("Precision:",metrics.precision_score(y_test, y_pred, average=None))

Precision: [0.96994536 0.8220339  0.77777778 0.94117647]


In [14]:
print("Recall:",metrics.recall_score(y_test, y_pred, average=None))

Recall: [0.96467391 0.87387387 0.63636364 0.88888889]


In [15]:
# cross checking
print (y_test.value_counts())
print (pd.Series(y_pred).value_counts())

0    368
1    111
2     22
3     18
Name: class, dtype: int64
0    366
1    118
2     18
3     17
dtype: int64
