In [9]:
%pip install seaborn

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score
import numpy as np 

game = pd.read_csv("subset.csv")

game = game.set_axis(['Winner', 'White_Weighted_Value', 'Black_Weighted_Value',
                     'White_Centre_Weighted_Value', 'Black_Centre_Weighted_Value',
                     'White_King_Safety', 'Black_King_Safety',
                     'White_Piece_Count', 'Black_Piece_Count',
                     'White_Center_Piece_Count', 'Black_Center_Piece_Count',
                     'White_Opponent_Moves', 'Black_Opponent_Moves',
                     'White_Rook_Positions', 'White_Queen_Positions',
                     'White_Knight_Positions', 'White_King_Positions', 'White_Bishop_Positions',
                     'Black_Rook_Positions', 'Black_Queen_Positions',
                     'Black_Knight_Positions', 'Black_King_Positions', 'Black_Bishop_Positions'], axis=1)

# Pre-processing our engineered features (Y/N->0/1)
encoder = LabelEncoder()
game["White_King_Safety"] = encoder.fit_transform(game["White_King_Safety"])
game["Black_King_Safety"] = encoder.fit_transform(game["Black_King_Safety"])
game["Winner"] = encoder.fit_transform(game["Winner"])

# X(predictors), y (target)
X = game.drop(columns=["Winner"])
y = game["Winner"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create an Original Decision Tree (J48) classifier
original_j48_classifier = DecisionTreeClassifier(random_state=42)

# Train the Original Decision Tree on the training data
original_j48_classifier.fit(X_train, y_train)

# Create a Pruned Decision Tree
pruned_j48_classifier = DecisionTreeClassifier(random_state=42)

# Cost Complexity Pruning Path for the Original Decision Tree
path = original_j48_classifier.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

# Cross-validated errors for each alpha
errors = []
for alpha in ccp_alphas:
    pruned_tree = DecisionTreeClassifier(random_state=42, ccp_alpha=alpha)
    scores = cross_val_score(pruned_tree, X_train, y_train, cv=5, scoring='accuracy')
    errors.append(1 - scores.mean())

# Find the alpha with the minimum cross-validated error
best_alpha = ccp_alphas[np.argmin(errors)]

# Prune the tree with the best alpha
pruned_j48_classifier.set_params(ccp_alpha=best_alpha)
pruned_j48_classifier.fit(X_train, y_train)

# Plot Original and Pruned Decision Trees, Confusion Matrices, and Feature Importance in a single plot
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 12))

# Plot Original Decision Tree
axes[0, 0].set_title('Original Decision Tree')
plot_tree(original_j48_classifier, filled=True, feature_names=X.columns, class_names=["0", "1"], ax=axes[0, 0])
axes[0, 0].text(0.5, 1.0, "Original Decision Tree", fontsize=12, ha="center", va="bottom", bbox=dict(boxstyle="round", alpha=0.1))

# Plot Pruned Decision Tree
axes[0, 1].set_title('Pruned Decision Tree')
plot_tree(pruned_j48_classifier, filled=True, feature_names=X.columns, class_names=["0", "1"], ax=axes[0, 1])
axes[0, 1].text(0.5, 1.0, "Pruned Decision Tree", fontsize=12, ha="center", va="bottom", bbox=dict(boxstyle="round", alpha=0.1))

# Plot Confusion Matrix for Original Decision Tree
y_pred_original = original_j48_classifier.predict(X_test)
conf_matrix_original = confusion_matrix(y_test, y_pred_original)
sns.heatmap(conf_matrix_original, annot=True, fmt="d", cmap="Blues", xticklabels=["0", "1"], yticklabels=["0", "1"],
            ax=axes[0, 2])
axes[0, 2].set_title('Confusion Matrix (Original Decision Tree)')

# Plot Confusion Matrix for Pruned Decision Tree
y_pred_pruned = pruned_j48_classifier.predict(X_test)
conf_matrix_pruned = confusion_matrix(y_test, y_pred_pruned)
sns.heatmap(conf_matrix_pruned, annot=True, fmt="d", cmap="Blues", xticklabels=["0", "1"], yticklabels=["0", "1"],
            ax=axes[1, 0])
axes[1, 0].set_title('Confusion Matrix (Pruned Decision Tree)')

# Get feature importances for the Original Decision Tree
feature_importances_original = original_j48_classifier.feature_importances_

# Plot feature importances for Original Decision Tree
sns.barplot(x=feature_importances_original, y=X.columns, ax=axes[1, 1])
axes[1, 1].set_title('Feature Importances (Original Decision Tree)')

# Get feature importances for the Pruned Decision Tree
feature_importances_pruned = pruned_j48_classifier.feature_importances_

# Plot feature importances for Pruned Decision Tree
sns.barplot(x=feature_importances_pruned, y=X.columns, ax=axes[1, 2])
axes[1, 2].set_title('Feature Importances (Pruned Decision Tree)')

plt.tight_layout()
plt.show()

# Print the tree depths for both Original and Pruned Decision Trees
tree_depth_original = original_j48_classifier.get_depth()
tree_depth_pruned = pruned_j48_classifier.get_depth()

print("Original Decision Tree Depth:", tree_depth_original)
print("Pruned Decision Tree Depth:", tree_depth_pruned)
