In [1]:
# Import the pandas library to work with tabular data
import pandas as pd

In [2]:
# Load the car evaluation dataset directly from the GitHub URL
file_url = 'https://raw.githubusercontent.com/PacktWorkshops/The-Applied-Artificial-Intelligence-Workshop/master/Datasets/car.csv'
df = pd.read_csv(file_url)


In [3]:
# Display the first few rows to understand the structure of the dataset
df.head()

Unnamed: 0,buying,maintenance,doors,persons,luggage_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [4]:
# -----------------------------
# STEP 1: ENCODING CATEGORICAL DATA
# -----------------------------

from sklearn import preprocessing

In [5]:
# Define a function that takes a dataframe and a column name.
# It uses LabelEncoder to convert categorical text values into numerical values.
def encode(data_frame, column):
    label_encoder = preprocessing.LabelEncoder()
    
    # Fit the encoder on the unique values of the column
    label_encoder.fit(data_frame[column].unique())
    
    # Transform column values to numbers and return the encoded column
    return label_encoder.transform(data_frame[column])

In [6]:
# Apply label encoding to every column in the dataset
for column in df.columns:
    df[column] = encode(df, column)

In [7]:
# Check the encoded dataset
df.head()

Unnamed: 0,buying,maintenance,doors,persons,luggage_boot,safety,class
0,3,3,0,0,2,1,2
1,3,3,0,0,2,2,2
2,3,3,0,0,2,0,2
3,3,3,0,0,1,1,2
4,3,3,0,0,1,2,2


In [8]:
# -----------------------------
# STEP 2: SPLITTING FEATURES AND LABEL
# -----------------------------

# Separate the target variable ('class') from the features
label = df.pop('class')

In [9]:
from sklearn import model_selection

In [10]:
# Split the dataset into training (90%) and testing (10%) sets
# random_state ensures the split is reproducible
features_train, features_test, label_train, label_test = model_selection.train_test_split(
    df, label, test_size=0.1, random_state=88
)

In [11]:
# -----------------------------
# STEP 3: BUILDING THE DECISION TREE MODEL
# -----------------------------

from sklearn.tree import DecisionTreeClassifier

In [12]:
# Create a Decision Tree model using default settings
decision_tree = DecisionTreeClassifier()

In [15]:
# Train (fit) the model using the training data
decision_tree.fit(features_train, label_train)

In [16]:
# Check accuracy of the model on the test data
decision_tree.score(features_test, label_test)

0.953757225433526

In [17]:
# Generate classification performance metrics
from sklearn.metrics import classification_report


In [18]:
# Print precision, recall, F1-score for each category
print(classification_report(label_test, decision_tree.predict(features_test)))

              precision    recall  f1-score   support

           0       0.87      0.98      0.92        42
           1       0.89      0.89      0.89         9
           2       0.99      0.96      0.98       114
           3       1.00      0.75      0.86         8

    accuracy                           0.95       173
   macro avg       0.94      0.89      0.91       173
weighted avg       0.96      0.95      0.95       173

