In [98]:
import numpy as np
import pandas as pd
from time import time
from sklearn.metrics import accuracy_score
from sklearn.metrics import fbeta_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import fbeta_score,make_scorer
from IPython.display import display # Allows the use of display() for DataFrames

In [99]:
data=pd.read_csv('car_evaluation.csv')
data.head()

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [100]:
data.columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety','car_class']
data.sample(15)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,car_class
134,vhigh,high,3,2,small,low,unacc
958,med,vhigh,5more,4,med,high,acc
124,vhigh,high,2,4,big,high,unacc
217,vhigh,med,2,2,small,high,unacc
1577,low,med,4,4,med,low,unacc
1222,med,low,3,2,big,high,unacc
741,high,med,5more,4,med,med,acc
1570,low,med,4,2,med,high,unacc
660,high,med,2,4,med,med,unacc
1256,med,low,4,4,big,low,unacc


In [101]:
#checking missing values in the dataset
data.isnull().sum()

buying       0
maint        0
doors        0
persons      0
lug_boot     0
safety       0
car_class    0
dtype: int64

In [102]:
#separating target variables
target=data['car_class']
features=data.drop(['car_class'],axis=1)

In [103]:
#encoding categorical variables
features=pd.get_dummies(features)

In [104]:
features.head()

Unnamed: 0,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,doors_3,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
0,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,0,1,0,0,1
1,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,0,1,1,0,0
2,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,0,1,0
3,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,0,0,1
4,0,0,0,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,1,0,0


In [105]:
target.unique()

array(['unacc', 'acc', 'vgood', 'good'], dtype=object)

In [106]:
target= target.map( {'unacc':1,'acc':2,'vgood':3,'good':4} ).astype(int)

In [107]:
#splitting the data
from sklearn import cross_validation

# Split the 'features' and 'income' data into training and testing sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(features, 
                                                    target, 
                                                    test_size = 0.2, 
                                                    random_state = 0)


In [108]:
from sklearn.ensemble import RandomForestClassifier
Model1 = RandomForestClassifier(random_state=0)

Model1.fit(X_train,y_train)
Predicted = Model1.predict(X_test)

accuracy = accuracy_score(y_test,Predicted)
print('Accuracy of Random Forest',accuracy)
f_score=fbeta_score(y_test,Predicted,0.5,average=None)
print('Fbeta score of Random Forest',f_score)

Accuracy of Random Forest 0.9421965317919075
Fbeta score of Random Forest [0.98177299 0.88161209 0.76388889 0.74074074]


In [109]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [110]:
Model2 = SVC(random_state=0)

Model2.fit(X_train,y_train)
Predicted = Model2.predict(X_test)

accuracy = accuracy_score(y_test,Predicted)
print('Accuracy of SVM',accuracy)
f_score=fbeta_score(y_test,Predicted,0.5,average=None)
print('Fbeta score of SVM',f_score)

Accuracy of SVM 0.8901734104046243
Fbeta score of SVM [0.98770852 0.73896353 0.53571429 0.        ]


  'precision', 'predicted', average, warn_for)


In [111]:
Model3 = GaussianNB()

Model3.fit(X_train,y_train)
Predicted = Model3.predict(X_test)

accuracy = accuracy_score(y_test,Predicted)
print('Accuracy of GaussianNB',accuracy)
f_score=fbeta_score(y_test,Predicted,0.5,average=None)
print('Fbeta score of GaussianNB',f_score)

Accuracy of GaussianNB 0.8005780346820809
Fbeta score of GaussianNB [0.95566502 0.59304703 0.68965517 0.40909091]


In [112]:
Model4 =  DecisionTreeClassifier()

Model4.fit(X_train,y_train)
Predicted = Model4.predict(X_test)

accuracy = accuracy_score(y_test,Predicted)
print('Accuracy of  Decision Tree',accuracy)
f_score=fbeta_score(y_test,Predicted,0.5,average=None)
print('Fbeta score of  Decision Tree',f_score)

Accuracy of  Decision Tree 0.9682080924855492
Fbeta score of  Decision Tree [0.97853014 0.94164456 0.9375     0.96774194]


**Choosing the best model for model tuning which is decision trees in this case due to high accuracy and f score too**

**Thus,our best model is the Model4 which is the decision tree classifier which gives an accuracy score of 0.95
and is able to precisely  predict cars of each class with a good Fbeta score.**
