In [None]:
#Import the necessary libraries.
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics , model_selection
from mlxtend.plotting import plot_confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix 
from sklearn.tree import export_graphviz
import warnings
warnings.filterwarnings('ignore')

In [None]:
cars_data = pd.read_csv("../input/car-evaluation-datacset/car.csv")
cars_data=cars_data[[
    "buying",
    "maint",
    "doors",
    "persons",
    "lug_boot",
    "safety",
    "class"
]].dropna(axis=0, how='any')
cars_data.head()

In [None]:
#The target variable is marked as class in the dataframe. 
#The values are present in string format.We can convert the string nominal values into numerics values using factorize method of the pandas library.
cars_data['class'],class_names = pd.factorize(cars_data['class'])
cars_data['buying'],_ = pd.factorize(cars_data['buying'])
cars_data['maint'],_ = pd.factorize(cars_data['maint'])
cars_data['doors'],_ = pd.factorize(cars_data['doors'])
cars_data['persons'],_ = pd.factorize(cars_data['persons'])
cars_data['lug_boot'],_ = pd.factorize(cars_data['lug_boot'])
cars_data['safety'],_ = pd.factorize(cars_data['safety'])
cars_data.head()

In [None]:
#Select the feature and target variable
#X = cars_data.iloc[:,:-1]
#y = cars_data.iloc[:,-1]
X = cars_data[cars_data.columns[: -1]]
y = cars_data['class']

#To split data (e.g. test=200)
#X_train200, X_test200, y_train200, y_test200 = model_selection.train_test_split(X, y, test_size=0.08)

#To split data in order (e.g. First 1589 records Train= 1589 Test=139)
X_train, X_test200, y_train, y_test200 = model_selection.train_test_split(X, y, test_size=0.08)


#To split data in order (e.g. First 1261 records)
#train_pct_index = int(0.73 * len(X))
#X_train, X_test = X[:train_pct_index], X[train_pct_index:]
#y_train, y_test = y[:train_pct_index], y[train_pct_index:]

#To split data in order (e.g. First 760 records)
#train_pct_index = int(0.44 * len(X))
#X_train, X_test = X[:train_pct_index], X[train_pct_index:]
#y_train, y_test = y[:train_pct_index], y[train_pct_index:]

#To split data in order (e.g. First 501 records)
#train_pct_index = int(0.29 * len(X))
#X_train, X_test = X[:train_pct_index], X[train_pct_index:]
#y_train, y_test = y[:train_pct_index], y[train_pct_index:]

In [None]:
#Training / model fitting
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test200)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test200, y_pred))
count_misclassified = (y_test200 != y_pred).sum()
print('Misclassified samples: {}'.format(count_misclassified))

In [None]:
print(metrics.classification_report(y_test200, y_pred))
confusion_mc = pd.DataFrame(
    confusion_matrix(y_test200, y_pred),
    columns=['unacc', 'acc', 'vgood',  'good'],
    index=['unacc', 'acc', 'vgood',  'good']
)
print(confusion_mc)

In [None]:
# To avoid Zero Devision Warning
#from sklearn.metrics import f1_score
#metrics.f1_score(y_test, y_pred,average='weighted', labels=np.unique(y_pred))

In [None]:
#class_names = ['unacc', 'acc', 'vgood',  'good']
confusion_mc = confusion_matrix(y_test200, y_pred)
fig, ax = plot_confusion_matrix(conf_mat=confusion_mc,
                                colorbar=True,
                                show_absolute=False,
                                show_normed=True)
plt.title('Decision Tree \nAccuracy:{0:.3f}'.format(metrics.accuracy_score(y_test200, y_pred)))
plt.figure(figsize=(10,8))
plt.show()


In [None]:
#Visualization of the decision graph 
feat_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
targ_names = ['unacc', 'acc', 'vgood',  'good']
export_graphviz(tree, out_file='tree_limited.dot', feature_names = feat_names,
                class_names = targ_names,
                rounded = True, proportion = False, precision = 2, filled = True)
!dot -Tpng tree_limited.dot -o tree_limited.png -Gdpi=600
from IPython.display import Image
Image(filename = 'tree_limited.png')

In [None]:
confusion_mc = confusion_mc.astype('float') / confusion_mc.sum(axis=1)[:, np.newaxis]
print("Per Class Classification Matrix:",confusion_mc.diagonal())

In [None]:
plt.bar(range(4),confusion_mc.diagonal(),color = 'black', align = 'center')
plt.title("Per Class Classification ") 
plt.style.use('ggplot')
plt.show()