In [None]:
## SET 1 - 2nd Ques
### Step 1: Importing required libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import tree, metrics
from sklearn.model_selection import train_test_split
import pylab

### Step 2: Load the dataset
data=pd.read_csv('/content/cardata.csv',names=['buying','maint','doors','persons','lug_boot','safety','class'])
print(data)

### Step 3: Basic info about the data
data.head()
data.info()

### Step 4: Encode target variable (class)
data['class'],class_names = pd.factorize(data['class'])
print(class_names)
print(data['class'].unique())

### Step 5: Encode other categorical columns
data['buying'],_ = pd.factorize(data['buying'])
data['maint'],_ = pd.factorize(data['maint'])
data['doors'],_ = pd.factorize(data['doors'])
data['persons'],_ = pd.factorize(data['persons'])
data['lug_boot'],_ = pd.factorize(data['lug_boot'])
data['safety'],_ = pd.factorize(data['safety'])

### Step 6: Display the encoded data
print(data)
data.head()
data.info()

### Step 7: Separate features and target
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

# Step 8: Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Step 9: Train the Decision Tree model
dtree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
dtree.fit(X_train, y_train)

#Step 10: Make predictions
y_pred = dtree.predict(X_test)

# Step 11: Evaluate performance
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
print(cnf_matrix)

### Step 12: Count misclassified samples
count_misclassified = (y_test != y_pred).sum()
print('Misclassified samples: {}'.format(count_misclassified))
    
### Step 13: Calculate model accuracy
accuracy = metrics.accuracy_score(y_test, y_pred)
print('Accuracy: {:.2f}'.format(accuracy))

plt.figure(figsize=(12,8))
tree.plot_tree(dtree, feature_names=X.columns, class_names=class_names, filled=True)
plt.show()