Importing Libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt

Load the dataset

In [3]:
df = pd.read_csv("dataset/car_evaluation.csv")

# Display the first few rows of the dataset
df.head()

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


Rename columns

In [4]:
df.columns = ['buying_price', 'maintenance_cost', 'num_doors', 'num_persons', 'lug_boot', 'safety', 'decision']

In [5]:
df

Unnamed: 0,buying_price,maintenance_cost,num_doors,num_persons,lug_boot,safety,decision
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc
...,...,...,...,...,...,...,...
1722,low,low,5more,more,med,med,good
1723,low,low,5more,more,med,high,vgood
1724,low,low,5more,more,big,low,unacc
1725,low,low,5more,more,big,med,good


In [6]:
df.isnull()

Unnamed: 0,buying_price,maintenance_cost,num_doors,num_persons,lug_boot,safety,decision
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
1722,False,False,False,False,False,False,False
1723,False,False,False,False,False,False,False
1724,False,False,False,False,False,False,False
1725,False,False,False,False,False,False,False


In [7]:
df.isnull().sum()

buying_price        0
maintenance_cost    0
num_doors           0
num_persons         0
lug_boot            0
safety              0
decision            0
dtype: int64

Label encode categorical variables

In [8]:
le = LabelEncoder()

# Dictionary to store label encoding for each column
label_mappings = {}

# Apply label encoding and store the mappings
for column in df.columns:
    df[column] = le.fit_transform(df[column])
    label_mappings[column] = dict(zip(le.classes_, le.transform(le.classes_)))

# Display the encoded dataset
print(df.head())

   buying_price  maintenance_cost  num_doors  num_persons  lug_boot  safety  \
0             3                 3          0            0         2       2   
1             3                 3          0            0         2       0   
2             3                 3          0            0         1       1   
3             3                 3          0            0         1       2   
4             3                 3          0            0         1       0   

   decision  
0         2  
1         2  
2         2  
3         2  
4         2  


In [9]:
df

Unnamed: 0,buying_price,maintenance_cost,num_doors,num_persons,lug_boot,safety,decision
0,3,3,0,0,2,2,2
1,3,3,0,0,2,0,2
2,3,3,0,0,1,1,2
3,3,3,0,0,1,2,2
4,3,3,0,0,1,0,2
...,...,...,...,...,...,...,...
1722,1,1,3,2,1,2,1
1723,1,1,3,2,1,0,3
1724,1,1,3,2,0,1,2
1725,1,1,3,2,0,2,1


Display the label mappings

In [10]:
for column, mapping in label_mappings.items():
    print(f"Column: {column}")
    for original, encoded in mapping.items():
        print(f"  {original}: {encoded}")
    print()

Column: buying_price
  high: 0
  low: 1
  med: 2
  vhigh: 3

Column: maintenance_cost
  high: 0
  low: 1
  med: 2
  vhigh: 3

Column: num_doors
  2: 0
  3: 1
  4: 2
  5more: 3

Column: num_persons
  2: 0
  4: 1
  more: 2

Column: lug_boot
  big: 0
  med: 1
  small: 2

Column: safety
  high: 0
  low: 1
  med: 2

Column: decision
  acc: 0
  good: 1
  unacc: 2
  vgood: 3



Split the data into features and target

In [11]:
# Features and Target
X = df.drop('decision', axis=1)  # Features (all columns except 'decision')
y = df['decision']  # Target variable ('decision')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Train the Random Forest Classifier

In [12]:
# Initialize the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [13]:
# Train the model
rf_model.fit(X_train, y_train)

Make predictions and evaluate the model

In [14]:
y_pred = rf_model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Classification report for detailed metrics
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 96.24%
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.94      0.94        77
           1       0.91      0.67      0.77        15
           2       0.99      1.00      0.99       237
           3       0.79      0.88      0.83        17

    accuracy                           0.96       346
   macro avg       0.91      0.87      0.88       346
weighted avg       0.96      0.96      0.96       346



In [15]:
# Create a DataFrame for the actual and predicted values
results_df = pd.DataFrame({
    'Actual': y_test,
    'Predicted': y_pred
})

# Display the first few rows of the DataFrame
print(results_df.head())

      Actual  Predicted
599        2          2
932        2          2
628        2          2
1497       0          0
1262       2          2


In [16]:
label_mappings = {
    0: 'good',
    1: 'unacc',
    2: 'vgood'
}

# Map the encoded values back to the original labels
results_df['Actual'] = results_df['Actual'].map(label_mappings)
results_df['Predicted'] = results_df['Predicted'].map(label_mappings)

# Display the first few rows of the DataFrame
print(results_df.sample(20))


     Actual Predicted
1157  vgood     vgood
413   vgood     vgood
210   vgood     vgood
1693  unacc     unacc
1281  unacc     unacc
522   vgood     vgood
1083  vgood     vgood
1483    NaN       NaN
69    vgood     vgood
1480    NaN      good
1160  vgood     vgood
497   vgood     vgood
938   vgood     vgood
682   vgood     vgood
845   vgood     vgood
1118  vgood     vgood
1591    NaN       NaN
842   vgood     vgood
297   vgood     vgood
1228  unacc      good
