In [1]:
#import necessary packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

In [2]:
data=pd.read_csv("car_evaluation.csv")
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,outcome
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   buying    1728 non-null   object
 1   maint     1728 non-null   object
 2   doors     1728 non-null   int64 
 3   persons   1728 non-null   int64 
 4   lug_boot  1728 non-null   object
 5   safety    1728 non-null   object
 6   outcome   1728 non-null   object
dtypes: int64(2), object(5)
memory usage: 94.6+ KB


In [4]:
data.isnull().sum()

buying      0
maint       0
doors       0
persons     0
lug_boot    0
safety      0
outcome     0
dtype: int64

In [5]:
data.shape

(1728, 7)

In [6]:
data.describe()

Unnamed: 0,doors,persons
count,1728.0,1728.0
mean,3.5,3.666667
std,1.118358,1.24758
min,2.0,2.0
25%,2.75,2.0
50%,3.5,4.0
75%,4.25,5.0
max,5.0,5.0


In [7]:
X=data.iloc[:,:-1]
y =data.outcome


In [8]:
# encoded the values
enc=LabelEncoder()
X.buying=enc.fit_transform(X.buying)
X.maint=enc.fit_transform(X.maint)
X.lug_boot=enc.fit_transform(X.lug_boot)
X.safety=enc.fit_transform(X.safety)
X.head(3)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,2,2,2,1
1,3,3,2,2,2,2
2,3,3,2,2,2,0


In [9]:
# train the data 
X_test,X_train,y_test,y_train=train_test_split(X,y,random_state=10)

In [10]:
model=SVC(C=100,gamma=0.1,kernel='rbf')#value ranges C= 0.1 to 1000,gamma= 0.01 to 10,kernel= rbf for radial bases function 
model.fit(X_train,y_train)#training the model 

SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [11]:
y_predict=model.predict(X_test)
accuracy_score(y_test,y_predict)

0.9452160493827161

In [12]:
print(Counter(y_test))
pd.crosstab(y_test,y_predict)

Counter({'unacc': 903, 'acc': 300, 'good': 50, 'vgood': 43})


col_0,acc,good,unacc,vgood
outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acc,268,15,17,0
good,4,46,0,0
unacc,29,4,870,0
vgood,2,0,0,41


In [13]:
#grid search cross validation
from sklearn.model_selection import GridSearchCV
parameters ={'kernel':['rbf','linear'],
            'C':[1,10,100,500],
            'gamma':[0.01,0.1,0.5,1.2]}
grid_model=GridSearchCV(SVC(random_state=10),parameters,verbose=3)
grid_model.fit(X_train,y_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.713, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.713, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.709, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.709, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.709, total=   0.0s
[CV] C=1, gamma=0.01, kernel=linear ..................................
[CV] ...... C=1, gamma=0.01, kernel=linear, score=0.713, total=   0.0s
[CV] C=1, gamma=0.01, kernel=linear ..................................
[CV] ...... C=1

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.756, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] ....... C=1, gamma=0.1, kernel=linear, score=0.713, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] ....... C=1, gamma=0.1, kernel=linear, score=0.747, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] ....... C=1, gamma=0.1, kernel=linear, score=0.709, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] ....... C=1, gamma=0.1, kernel=linear, score=0.733, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] ....... C=1, gamma=0.1, kernel=linear, score=0.733, total=   0.0s
[CV] C=1, gamma=0.5, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.5, kernel=rbf, score=0.805, total=   0.0s
[CV] 

[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.860, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.849, total=   0.0s
[CV] C=100, gamma=0.01, kernel=linear ................................
[CV] .... C=100, gamma=0.01, kernel=linear, score=0.713, total=   0.0s
[CV] C=100, gamma=0.01, kernel=linear ................................
[CV] .... C=100, gamma=0.01, kernel=linear, score=0.747, total=   0.0s
[CV] C=100, gamma=0.01, kernel=linear ................................
[CV] .... C=100, gamma=0.01, kernel=linear, score=0.721, total=   0.0s
[CV] C=100, gamma=0.01, kernel=linear ................................
[CV] .... C=100, gamma=0.01, kernel=linear, score=0.709, total=   0.0s
[CV] C=100, gamma=0.01, kernel=linear ................................
[CV] .... C=100, gamma=0.01, kernel=linear, score=0.767, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] .

[CV] ..... C=500, gamma=0.5, kernel=linear, score=0.713, total=   0.1s
[CV] C=500, gamma=0.5, kernel=linear .................................
[CV] ..... C=500, gamma=0.5, kernel=linear, score=0.747, total=   0.1s
[CV] C=500, gamma=0.5, kernel=linear .................................
[CV] ..... C=500, gamma=0.5, kernel=linear, score=0.721, total=   0.1s
[CV] C=500, gamma=0.5, kernel=linear .................................
[CV] ..... C=500, gamma=0.5, kernel=linear, score=0.709, total=   0.1s
[CV] C=500, gamma=0.5, kernel=linear .................................
[CV] ..... C=500, gamma=0.5, kernel=linear, score=0.767, total=   0.1s
[CV] C=500, gamma=1.2, kernel=rbf ....................................
[CV] ........ C=500, gamma=1.2, kernel=rbf, score=0.782, total=   0.0s
[CV] C=500, gamma=1.2, kernel=rbf ....................................
[CV] ........ C=500, gamma=1.2, kernel=rbf, score=0.782, total=   0.0s
[CV] C=500, gamma=1.2, kernel=rbf ....................................
[CV] .

[Parallel(n_jobs=1)]: Done 160 out of 160 | elapsed:    4.2s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=10, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1, 10, 100, 500], 'gamma': [0.01, 0.1, 0.5, 1.2],
                         'kernel': ['rbf', 'linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [14]:
grid_model.best_score_

0.9490777866880513

In [15]:
grid_model.best_params_

{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}

In [16]:
conda update--all



Note: you may need to restart the kernel to use updated packages.



CommandNotFoundError: No command 'conda update--all'.
Did you mean 'conda update'?



In [17]:
grid_model.best_index_

18