In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder


# Load the data
columns = ['buying', 'maint', 'doors','persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data", names=columns)

# Map class labels to numerical values
class_mapping = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
data['class'] = data['class'].map(class_mapping)

# Convert categorical features into one-hot encoded representation
categorical_columns = ['buying', 'maint','doors', 'persons', 'lug_boot', 'safety']
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Initialize list to store accuracy scores
accuracy_scores = []

# Repeat the exercise 20 times
for i in range(20):
 # Split data into features and target
 X = data_encoded.drop('class', axis=1)
 y = data_encoded['class']



# Split data into train and test sets
 X_train, X_test, y_train, y_test = train_test_split(
 X, y, test_size=0.4, stratify=y)

# Initialize decision tree classifier
 clf_ent_20 = DecisionTreeClassifier(criterion='entropy')

# Train the classifier
 clf_ent_20.fit(X_train, y_train)

# Test the classifier
 y_pred = clf_ent_20.predict(X_test)

# Calculate accuracy and store it
 accuracy = accuracy_score(y_test, y_pred)
 accuracy_scores.append(accuracy)
 print(f"Iteration {i+1}: Accuracy = {accuracy}")

# Calculate the average accuracy
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)

print("Average Accuracy over 20 iterations:",
average_accuracy)

Iteration 1: Accuracy = 0.976878612716763
Iteration 2: Accuracy = 0.9552023121387283
Iteration 3: Accuracy = 0.976878612716763
Iteration 4: Accuracy = 0.9725433526011561
Iteration 5: Accuracy = 0.9566473988439307
Iteration 6: Accuracy = 0.9523121387283237
Iteration 7: Accuracy = 0.9725433526011561
Iteration 8: Accuracy = 0.9653179190751445
Iteration 9: Accuracy = 0.9667630057803468
Iteration 10: Accuracy = 0.976878612716763
Iteration 11: Accuracy = 0.9739884393063584
Iteration 12: Accuracy = 0.9638728323699421
Iteration 13: Accuracy = 0.9421965317919075
Iteration 14: Accuracy = 0.9696531791907514
Iteration 15: Accuracy = 0.9725433526011561
Iteration 16: Accuracy = 0.9494219653179191
Iteration 17: Accuracy = 0.9609826589595376
Iteration 18: Accuracy = 0.9552023121387283
Iteration 19: Accuracy = 0.9725433526011561
Iteration 20: Accuracy = 0.9653179190751445
Average Accuracy over 20 iterations: 0.9648843930635838


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import OneHotEncoder

# Load the data
columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data", names=columns)

# Map class labels to numerical values
class_mapping = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
data['class'] = data['class'].map(class_mapping)

# Get unique class labels
class_labels = data['class'].unique()

# Convert categorical features into one-hot encoded representation
categorical_columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Split data into features and target
X = data_encoded.drop('class', axis=1)
y = data_encoded['class']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, stratify=y)

# Initialize decision tree classifier
clf_ent = DecisionTreeClassifier(criterion='entropy')

# Train the classifier
clf_ent.fit(X_train, y_train)

# Test the classifier
y_pred_train = clf_ent.predict(X_train)
y_pred_test = clf_ent.predict(X_test)

# Evaluate the classifier
conf_matrix_train = confusion_matrix(y_train, y_pred_train)
conf_matrix_test = confusion_matrix(y_test, y_pred_test)
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
f1_train = f1_score(y_train, y_pred_train, average="weighted")
f1_test = f1_score(y_test, y_pred_test, average="weighted")

# Print confusion matrix with class names for both training and test sets
print("Training Confusion Matrix:")
print(pd.DataFrame(conf_matrix_train, index=class_labels, columns=class_labels))
print("\nTraining F1 Score:", f1_train)
print("\nTraining Accuracy:", accuracy_train)

print("\nTest Confusion Matrix:")
print(pd.DataFrame(conf_matrix_test, index=class_labels, columns=class_labels))
print("\nTest F1 Score:", f1_test)
print("\nTest Accuracy:", accuracy_test)


Training Confusion Matrix:
     0    1   3   2
0  726    0   0   0
1    0  230   0   0
3    0    0  41   0
2    0    0   0  39

Training F1 Score: 1.0

Training Accuracy: 1.0

Test Confusion Matrix:
     0    1   3   2
0  475    6   3   0
1    4  147   2   1
3    0    0  27   1
2    0    3   0  23

Test F1 Score: 0.9713422609731355

Test Accuracy: 0.9710982658959537


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import OneHotEncoder

# Load the data
columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data", names=columns)

# Map class labels to numerical values
class_mapping = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
data['class'] = data['class'].map(class_mapping)

# Get unique class labels
class_labels = data['class'].unique()

# Convert categorical features into one-hot encoded representation
categorical_columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Split data into features and target
X = data_encoded.drop('class', axis=1)
y = data_encoded['class']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, stratify=y)

# Initialize decision tree classifier
clf_ent = DecisionTreeClassifier(criterion='gini')

# Train the classifier
clf_ent.fit(X_train, y_train)

# Test the classifier
y_pred_train = clf_ent.predict(X_train)
y_pred_test = clf_ent.predict(X_test)

# Evaluate the classifier
conf_matrix_train = confusion_matrix(y_train, y_pred_train)
conf_matrix_test = confusion_matrix(y_test, y_pred_test)
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
f1_train = f1_score(y_train, y_pred_train, average="weighted")
f1_test = f1_score(y_test, y_pred_test, average="weighted")

# Print confusion matrix with class names for both training and test sets
print("Training Confusion Matrix:")
print(pd.DataFrame(conf_matrix_train, index=class_labels, columns=class_labels))
print("\nTraining F1 Score:", f1_train)
print("\nTraining Accuracy:", accuracy_train)

print("\nTest Confusion Matrix:")
print(pd.DataFrame(conf_matrix_test, index=class_labels, columns=class_labels))
print("\nTest F1 Score:", f1_test)
print("\nTest Accuracy:", accuracy_test)


Training Confusion Matrix:
     0    1   3   2
0  726    0   0   0
1    0  230   0   0
3    0    0  41   0
2    0    0   0  39

Training F1 Score: 1.0

Training Accuracy: 1.0

Test Confusion Matrix:
     0    1   3   2
0  473    9   2   0
1    4  145   2   3
3    0    1  27   0
2    0    1   1  24

Test F1 Score: 0.9671344730451169

Test Accuracy: 0.9667630057803468


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

9

# Load the data
columns = ['buying', 'maint', 'doors','persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data", names=columns)

# Map class labels to numerical values
class_mapping = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
data['class'] = data['class'].map(class_mapping)

# Convert categorical features into one-hot encoded representation
categorical_columns = ['buying', 'maint','doors', 'persons', 'lug_boot', 'safety']
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Initialize list to store accuracy scores
accuracy_scores = []

# Repeat the exercise 20 times
for i in range(20):
 # Split data into features and target
 X = data_encoded.drop('class', axis=1)
 y = data_encoded['class']



# Split data into train and test sets
 X_train, X_test, y_train, y_test = train_test_split(
 X, y, test_size=0.4, stratify=y)

# Initialize decision tree classifier
 clf_ent_20 = DecisionTreeClassifier(criterion='gini')

# Train the classifier
 clf_ent_20.fit(X_train, y_train)

# Test the classifier
 y_pred = clf_ent_20.predict(X_test)

# Calculate accuracy and store it
 accuracy = accuracy_score(y_test, y_pred)
 accuracy_scores.append(accuracy)
 print(f"Iteration {i+1}: Accuracy = {accuracy}")

# Calculate the average accuracy
average_accuracy = sum(accuracy_scores) / len(accuracy_scores)

print("Average Accuracy over 20 iterations:",
average_accuracy)

Iteration 1: Accuracy = 0.9624277456647399
Iteration 2: Accuracy = 0.9552023121387283
Iteration 3: Accuracy = 0.9754335260115607
Iteration 4: Accuracy = 0.9725433526011561
Iteration 5: Accuracy = 0.9653179190751445
Iteration 6: Accuracy = 0.958092485549133
Iteration 7: Accuracy = 0.9667630057803468
Iteration 8: Accuracy = 0.9609826589595376
Iteration 9: Accuracy = 0.9494219653179191
Iteration 10: Accuracy = 0.9523121387283237
Iteration 11: Accuracy = 0.9638728323699421
Iteration 12: Accuracy = 0.9739884393063584
Iteration 13: Accuracy = 0.9667630057803468
Iteration 14: Accuracy = 0.9667630057803468
Iteration 15: Accuracy = 0.953757225433526
Iteration 16: Accuracy = 0.9638728323699421
Iteration 17: Accuracy = 0.9508670520231214
Iteration 18: Accuracy = 0.9552023121387283
Iteration 19: Accuracy = 0.9566473988439307
Iteration 20: Accuracy = 0.958092485549133
Average Accuracy over 20 iterations: 0.9614161849710984


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import OneHotEncoder

# Load the data
columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data", names=columns)

# Map class labels to numerical values
class_mapping = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
data['class'] = data['class'].map(class_mapping)

# Get unique class labels
class_labels = data['class'].unique()

# Convert categorical features into one-hot encoded representation
categorical_columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Split data into features and target
X = data_encoded.drop('class', axis=1)
y = data_encoded['class']

# Test sizes to iterate over
test_sizes = [0.3, 0.2]

for test_size in test_sizes:
    print(f"\nTest Size: {test_size}")

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, stratify=y)

    # Initialize decision tree classifier
    clf_ent = DecisionTreeClassifier(criterion='gini')

    # Train the classifier
    clf_ent.fit(X_train, y_train)

    # Test the classifier
    y_pred_train = clf_ent.predict(X_train)
    y_pred_test = clf_ent.predict(X_test)

    # Evaluate the classifier
    conf_matrix_train = confusion_matrix(y_train, y_pred_train)
    conf_matrix_test = confusion_matrix(y_test, y_pred_test)
    accuracy_train = accuracy_score(y_train, y_pred_train)
    accuracy_test = accuracy_score(y_test, y_pred_test)
    f1_train = f1_score(y_train, y_pred_train, average="weighted")
    f1_test = f1_score(y_test, y_pred_test, average="weighted")

    # Print results
    print("\n Confusion Matrix:")
    print(pd.DataFrame(conf_matrix_test, index=class_labels, columns=class_labels))
    print("\n F1 Score:", f1_test)
    print("\n Accuracy:", accuracy_test)



Test Size: 0.3

 Confusion Matrix:
     0    1   3   2
0  360    3   0   0
1    6  102   7   0
3    0    5  16   0
2    0    0   0  20

 F1 Score: 0.9596491569516618

 Accuracy: 0.9595375722543352

Test Size: 0.2

 Confusion Matrix:
     0   1   3   2
0  242   0   0   0
1    4  73   0   0
3    0   0  14   0
2    0   0   2  11

 F1 Score: 0.9825040272908178

 Accuracy: 0.9826589595375722


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the data
columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data", names=columns)

# Map class labels to numerical values
class_mapping = {'unacc': 0, 'acc': 1, 'good': 2, 'vgood': 3}
data['class'] = data['class'].map(class_mapping)

# Convert categorical features into one-hot encoded representation
categorical_columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Initialize list to store accuracy scores
accuracy_scores = []

# Test sizes to iterate over
test_sizes = [0.3, 0.2]

for test_size in test_sizes:
    print(f"\nTest Size: {test_size}")

    # Repeat the exercise 20 times
    for i in range(20):
        # Split data into features and target
        X = data_encoded.drop('class', axis=1)
        y = data_encoded['class']

        # Split data into train and test sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, stratify=y)

        # Initialize decision tree classifier
        clf_gini_20 = DecisionTreeClassifier(criterion='entropy')

        # Train the classifier
        clf_gini_20.fit(X_train, y_train)

        # Test the classifier
        y_pred = clf_gini_20.predict(X_test)

        # Calculate accuracy and store it
        accuracy = accuracy_score(y_test, y_pred)
        accuracy_scores.append(accuracy)
        # print(f"Iteration {i+1}: Accuracy = {accuracy}")

    # Calculate the average accuracy
    average_accuracy = sum(accuracy_scores) / len(accuracy_scores)

    print("Average Accuracy over 20 iterations:", average_accuracy)



Test Size: 0.3
Average Accuracy over 20 iterations: 0.9694605009633909

Test Size: 0.2
Average Accuracy over 20 iterations: 0.9712186897880539
