In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("king_rook_vs_king.csv")
df = pd.DataFrame(data)
df.head()

Unnamed: 0,white_king_file,white_king_rank,white_rook_file,white_rook_rank,black_king_file,black_king_rank,white_depth_of_win
0,a,1,b,3,c,2,draw
1,a,1,c,1,c,2,draw
2,a,1,c,1,d,1,draw
3,a,1,c,1,d,2,draw
4,a,1,c,2,c,1,draw


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28056 entries, 0 to 28055
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   white_king_file     28056 non-null  object
 1   white_king_rank     28056 non-null  int64 
 2   white_rook_file     28056 non-null  object
 3   white_rook_rank     28056 non-null  int64 
 4   black_king_file     28056 non-null  object
 5   black_king_rank     28056 non-null  int64 
 6   white_depth_of_win  28056 non-null  object
dtypes: int64(3), object(4)
memory usage: 1.5+ MB


In [4]:
df.isnull().sum()

white_king_file       0
white_king_rank       0
white_rook_file       0
white_rook_rank       0
black_king_file       0
black_king_rank       0
white_depth_of_win    0
dtype: int64

In [5]:
df_encoded = pd.get_dummies(df, columns=['white_king_file', 'white_rook_file','black_king_file'], drop_first=True)

In [6]:
df_encoded.head()

Unnamed: 0,white_king_rank,white_rook_rank,black_king_rank,white_depth_of_win,white_king_file_b,white_king_file_c,white_king_file_d,white_rook_file_b,white_rook_file_c,white_rook_file_d,...,white_rook_file_f,white_rook_file_g,white_rook_file_h,black_king_file_b,black_king_file_c,black_king_file_d,black_king_file_e,black_king_file_f,black_king_file_g,black_king_file_h
0,1,3,2,draw,False,False,False,True,False,False,...,False,False,False,False,True,False,False,False,False,False
1,1,1,2,draw,False,False,False,False,True,False,...,False,False,False,False,True,False,False,False,False,False
2,1,1,1,draw,False,False,False,False,True,False,...,False,False,False,False,False,True,False,False,False,False
3,1,1,2,draw,False,False,False,False,True,False,...,False,False,False,False,False,True,False,False,False,False
4,1,2,1,draw,False,False,False,False,True,False,...,False,False,False,False,True,False,False,False,False,False


In [7]:
depth_mapping = {
    'draw': 0,
    'zero': 1, 'one': 1, 'two': 1, 'three': 1, 'four': 1,
    'five': 2, 'six': 2, 'seven': 2, 'eight': 2,
    'nine': 3, 'ten': 3, 'eleven': 3, 'twelve': 3,
    'thirteen': 4, 'fourteen': 4, 'fifteen': 4, 'sixteen': 4
}

# Apply the transformation
df_encoded['win_depth_level'] = df_encoded['white_depth_of_win'].map(depth_mapping)

# Display the transformed DataFrame
df_encoded

Unnamed: 0,white_king_rank,white_rook_rank,black_king_rank,white_depth_of_win,white_king_file_b,white_king_file_c,white_king_file_d,white_rook_file_b,white_rook_file_c,white_rook_file_d,...,white_rook_file_g,white_rook_file_h,black_king_file_b,black_king_file_c,black_king_file_d,black_king_file_e,black_king_file_f,black_king_file_g,black_king_file_h,win_depth_level
0,1,3,2,draw,False,False,False,True,False,False,...,False,False,False,True,False,False,False,False,False,0
1,1,1,2,draw,False,False,False,False,True,False,...,False,False,False,True,False,False,False,False,False,0
2,1,1,1,draw,False,False,False,False,True,False,...,False,False,False,False,True,False,False,False,False,0
3,1,1,2,draw,False,False,False,False,True,False,...,False,False,False,False,True,False,False,False,False,0
4,1,2,1,draw,False,False,False,False,True,False,...,False,False,False,True,False,False,False,False,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28051,1,7,5,sixteen,True,False,False,False,False,False,...,True,False,False,False,False,True,False,False,False,4
28052,1,7,6,sixteen,True,False,False,False,False,False,...,True,False,False,False,False,True,False,False,False,4
28053,1,7,7,sixteen,True,False,False,False,False,False,...,True,False,False,False,False,True,False,False,False,4
28054,1,7,5,sixteen,True,False,False,False,False,False,...,True,False,False,False,False,False,True,False,False,4


In [8]:
df_encoded = df_encoded.drop(columns=['white_depth_of_win'])
df_encoded.shape

(28056, 21)

In [9]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report

In [10]:
# Splitting data into features (X) and target (y)
X = df_encoded.drop(columns=['win_depth_level'])
y = df_encoded['win_depth_level']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()

X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [14]:
models = [
    ('Logistic Regression', LogisticRegression()),
    ('Decision Tree', DecisionTreeClassifier()),
    ('Random Forest', RandomForestClassifier())
]

for model_name, model in models:
    
    model.fit(X_train,y_train)
    
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    train_accuracy = accuracy_score(y_train,y_train_pred)
    test_accuracy = accuracy_score(y_test,y_test_pred)

    print(model_name)
    print('Train_accuracy :',train_accuracy)
    print('Test_accuracy :\n',test_accuracy)    

Logistic Regression
Train_accuracy : 0.6545624665834967
Test_accuracy :
 0.6683891660727014
Decision Tree
Train_accuracy : 1.0
Test_accuracy :
 0.8991446899501069
Random Forest
Train_accuracy : 1.0
Test_accuracy :
 0.8611903064861012


In [15]:
dt_classifier = DecisionTreeClassifier(random_state=42)

In [16]:
param_grid = {
    'criterion': ['gini', 'entropy','log_loss'],            # Measure for splitting nodes
    'max_depth': [None, 10, 20, 30],              # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],              # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],                # Minimum number of samples required at a leaf node
    'max_features': [None, 'sqrt', 'log2'],      # Number of features to consider for the best split
    'splitter': ['best', 'random'],              # Strategy used to split at each node
    'class_weight': [None, 'balanced'],           # Weights associated with classes
    'max_leaf_nodes': [None, 10, 20, 30],         # Maximum number of leaf nodes in the tree
}

In [17]:
grid_search = GridSearchCV(estimator=dt_classifier, param_grid=param_grid, cv=4, n_jobs=-1, verbose=2, scoring='accuracy')

In [18]:
grid_search.fit(X_train, y_train)

Fitting 4 folds for each of 5184 candidates, totalling 20736 fits


In [19]:
best_params = grid_search.best_params_

print(f"Best Hyperparameters: {best_params}")

best_dt_classifier = grid_search.best_estimator_

Best Hyperparameters: {'class_weight': None, 'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}


In [20]:
cv_scores = cross_val_score(best_dt_classifier, X_train, y_train, cv=4)

In [21]:
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean cross-validation score: {cv_scores.mean()}")

Cross-validation scores: [0.86276956 0.85296739 0.84922474 0.85047229]
Mean cross-validation score: 0.8538584922473712


In [22]:
best_dt_classifier.fit(X_train, y_train)

In [23]:
y_pred = best_dt_classifier.predict(X_test)
y_train_pred = best_dt_classifier.predict(X_train)

In [24]:
train_accuracy = accuracy_score(y_train,y_train_pred)
test_accuracy = accuracy_score(y_test,y_pred)
conf_matrix = confusion_matrix(y_test,y_pred)
classfication_report = classification_report(y_test, y_pred)

In [25]:
print("Train Accuracy:\n", train_accuracy)
print("Test Accuracy:\n", test_accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classfication_report)

Train Accuracy:
 1.0
Test Accuracy:
 0.8888096935138988
Confusion Matrix:
 [[ 389    6   45   61   48]
 [  11  106   11    2    0]
 [  38    6  533   55    4]
 [  59    2   52 1874   85]
 [  48    0    4   87 2086]]
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.71      0.71       549
           1       0.88      0.82      0.85       130
           2       0.83      0.84      0.83       636
           3       0.90      0.90      0.90      2072
           4       0.94      0.94      0.94      2225

    accuracy                           0.89      5612
   macro avg       0.85      0.84      0.85      5612
weighted avg       0.89      0.89      0.89      5612

