# Pycaret Install

In [7]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-2.3.5-py3-none-any.whl (288 kB)
[?25l[K     |█▏                              | 10 kB 22.7 MB/s eta 0:00:01[K     |██▎                             | 20 kB 26.1 MB/s eta 0:00:01[K     |███▍                            | 30 kB 27.5 MB/s eta 0:00:01[K     |████▌                           | 40 kB 28.1 MB/s eta 0:00:01[K     |█████▊                          | 51 kB 30.1 MB/s eta 0:00:01[K     |██████▉                         | 61 kB 32.1 MB/s eta 0:00:01[K     |████████                        | 71 kB 32.2 MB/s eta 0:00:01[K     |█████████                       | 81 kB 32.7 MB/s eta 0:00:01[K     |██████████▏                     | 92 kB 34.3 MB/s eta 0:00:01[K     |███████████▍                    | 102 kB 35.9 MB/s eta 0:00:01[K     |████████████▌                   | 112 kB 35.9 MB/s eta 0:00:01[K     |█████████████▋                  | 122 kB 35.9 MB/s eta 0:00:01[K     |██████████████▊                 | 133 kB 35.9 MB/s eta 0

# Data Load

Import King-Rook vs. King-Pawn Dataset

10% removed as unseen data to later test the model

In [22]:
# Jacob Hazzard - 991324091
# December 12th 2021
# Sheridan College

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from pycaret.utils import enable_colab
from pycaret.classification import *
from pycaret.datasets import get_data

enable_colab()
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/chess/king-rook-vs-king-pawn/kr-vs-kp.data')

data = df.sample(frac=0.9)
data_unseen = df.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Colab mode enabled.
Data for Modeling: (2876, 37)
Unseen Data For Predictions: (319, 37)


In [23]:
class_setup = setup(data=data, target='won')

Unnamed: 0,Description,Value
0,session_id,5672
1,Target,won
2,Target Type,Binary
3,Label Encoded,"nowin: 0, won: 1"
4,Original Data,"(2876, 37)"
5,Missing Values,False
6,Numeric Features,0
7,Categorical Features,36
8,Ordinal Features,False
9,High Cardinality Features,False


# All models

Comparing all models

In [9]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9911,0.9995,0.9934,0.9898,0.9915,0.982,0.9821,0.106
dt,Decision Tree Classifier,0.9886,0.9884,0.9915,0.987,0.9892,0.9771,0.9772,0.018
et,Extra Trees Classifier,0.9856,0.9983,0.9896,0.9832,0.9864,0.9711,0.9712,0.514
rf,Random Forest Classifier,0.9836,0.9986,0.9859,0.9831,0.9845,0.9671,0.9671,0.553
gbc,Gradient Boosting Classifier,0.9732,0.9972,0.967,0.9819,0.9743,0.9462,0.9465,0.211
ada,Ada Boost Classifier,0.9613,0.9952,0.9671,0.9599,0.9634,0.9222,0.9224,0.138
lr,Logistic Regression,0.9612,0.9933,0.9642,0.9626,0.9633,0.9222,0.9224,0.339
svm,SVM - Linear Kernel,0.9523,0.0,0.9595,0.9538,0.9554,0.9041,0.9069,0.021
ridge,Ridge Classifier,0.9374,0.0,0.9482,0.9345,0.9411,0.8743,0.8749,0.016
lda,Linear Discriminant Analysis,0.9374,0.985,0.9482,0.9345,0.9411,0.8743,0.8749,0.026


# K-Nearest Neighbour

In [10]:
# K-Nearest Neighbour Algorithm Model

knn = create_model('knn')
print(knn)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9307,0.9815,0.972,0.9043,0.9369,0.8602,0.863
1,0.9356,0.9856,0.9533,0.9273,0.9401,0.8706,0.871
2,0.9257,0.9753,0.934,0.9252,0.9296,0.851,0.8511
3,0.9254,0.9802,0.9151,0.9417,0.9282,0.8505,0.8509
4,0.9104,0.953,0.9528,0.886,0.9182,0.8196,0.8222
5,0.9303,0.9803,0.9528,0.9182,0.9352,0.86,0.8607
6,0.9552,0.9833,0.9434,0.9709,0.9569,0.9103,0.9107
7,0.9254,0.9656,0.934,0.9252,0.9296,0.8502,0.8503
8,0.9552,0.9899,0.9717,0.945,0.9581,0.91,0.9104
9,0.9652,0.9926,0.9811,0.9541,0.9674,0.93,0.9304


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
                     weights='uniform')


In [30]:
# Tuned KNN Model

tuned_knn = tune_model(knn)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9158,0.9735,0.9126,0.9216,0.9171,0.8317,0.8317
1,0.9307,0.9825,0.9126,0.9495,0.9307,0.8614,0.8621
2,0.9455,0.9901,0.9615,0.9346,0.9479,0.8909,0.8913
3,0.9204,0.9823,0.9223,0.9223,0.9223,0.8407,0.8407
4,0.9303,0.9853,0.9417,0.9238,0.9327,0.8605,0.8607
5,0.9303,0.9854,0.9515,0.9159,0.9333,0.8605,0.8612
6,0.9204,0.9836,0.9029,0.9394,0.9208,0.8409,0.8415
7,0.9403,0.9801,0.9515,0.9333,0.9423,0.8805,0.8806
8,0.9453,0.9797,0.9612,0.934,0.9474,0.8904,0.8908
9,0.9552,0.9932,0.9417,0.97,0.9557,0.9105,0.9109


In [31]:
evaluate_model(tuned_knn)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

# Decision Tree

In [14]:
# Decision Tree Algorithm Model

dt = create_model('dt')
print(dt)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9901,0.9895,1.0,0.9817,0.9907,0.9801,0.9803
1,0.9901,0.9901,0.9907,0.9907,0.9907,0.9801,0.9801
2,0.9901,0.9896,1.0,0.9815,0.9907,0.9801,0.9803
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,0.9652,0.9653,0.9623,0.9714,0.9668,0.9302,0.9302
5,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,0.9801,0.9789,1.0,0.9636,0.9815,0.96,0.9608
7,0.99,0.9895,1.0,0.9815,0.9907,0.98,0.9802
8,0.99,0.9906,0.9811,1.0,0.9905,0.9801,0.9803
9,0.99,0.9906,0.9811,1.0,0.9905,0.9801,0.9803


DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=5293, splitter='best')


In [28]:
# Tuned DT

tuned_dt = tune_model(dt)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9257,0.9836,0.8641,0.9889,0.9223,0.8518,0.8589
1,0.9356,0.9789,0.9029,0.9688,0.9347,0.8714,0.8735
2,0.9505,0.9918,0.9904,0.9196,0.9537,0.9007,0.9035
3,0.9751,0.9885,0.9612,0.99,0.9754,0.9503,0.9507
4,0.9303,0.9837,0.9709,0.9009,0.9346,0.8603,0.8631
5,0.9104,0.9669,0.9612,0.8761,0.9167,0.8203,0.8245
6,0.9502,0.984,0.9709,0.9346,0.9524,0.9003,0.9011
7,0.9453,0.9891,1.0,0.9035,0.9493,0.8902,0.8956
8,0.9701,0.9841,0.9612,0.9802,0.9706,0.9403,0.9405
9,0.9652,0.9888,0.9612,0.9706,0.9659,0.9303,0.9304


In [43]:
evaluate_model(tuned_dt)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

# Logistic Regression

In [16]:
# Logistic Regression Algorithm Model

lr = create_model('lr')
print(lr)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9653,0.9949,0.9813,0.9545,0.9677,0.9303,0.9307
1,0.9653,0.9967,0.9626,0.9717,0.9671,0.9305,0.9305
2,0.9703,0.9964,0.9811,0.963,0.972,0.9404,0.9406
3,0.9602,0.9933,0.9528,0.9712,0.9619,0.9202,0.9204
4,0.9453,0.986,0.9623,0.9358,0.9488,0.89,0.8904
5,0.9453,0.9905,0.9434,0.9524,0.9479,0.8903,0.8903
6,0.9602,0.9921,0.9717,0.9537,0.9626,0.9201,0.9203
7,0.9851,0.9965,0.9811,0.9905,0.9858,0.9701,0.9701
8,0.9403,0.992,0.934,0.9519,0.9429,0.8804,0.8805
9,0.9751,0.9951,0.9717,0.981,0.9763,0.9501,0.9502


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=5293, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


In [17]:
# Tuned LR

tuned_lr = tune_model(lr)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9653,0.9963,0.9626,0.9717,0.9671,0.9305,0.9305
1,0.9653,0.9968,0.9626,0.9717,0.9671,0.9305,0.9305
2,0.9752,0.9965,0.9811,0.972,0.9765,0.9503,0.9504
3,0.9652,0.9968,0.9434,0.9901,0.9662,0.9303,0.9315
4,0.9602,0.9868,0.9623,0.9623,0.9623,0.9202,0.9202
5,0.9602,0.9937,0.9528,0.9712,0.9619,0.9202,0.9204
6,0.9552,0.993,0.9717,0.945,0.9581,0.91,0.9104
7,0.9851,0.9977,0.9811,0.9905,0.9858,0.9701,0.9701
8,0.9552,0.9945,0.9434,0.9709,0.9569,0.9103,0.9107
9,0.9751,0.9973,0.9717,0.981,0.9763,0.9501,0.9502


In [19]:
evaluate_model(tuned_lr)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

# Naive Bayes

In [33]:
# Naive Bayes Algorithm

nb = create_model('nb')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.5842,0.8991,0.9806,0.5519,0.7063,0.1547,0.2608
1,0.797,0.9155,0.9612,0.7279,0.8285,0.5913,0.6261
2,0.7772,0.8927,0.9423,0.7153,0.8133,0.5497,0.5824
3,0.7662,0.8929,0.8932,0.7188,0.7965,0.5291,0.5466
4,0.7512,0.8975,0.9029,0.6992,0.7881,0.4985,0.5227
5,0.7264,0.8491,0.8932,0.6765,0.7699,0.4479,0.4747
6,0.8209,0.9398,0.9417,0.7638,0.8435,0.6394,0.6587
7,0.7413,0.9009,0.932,0.6809,0.7869,0.4773,0.5165
8,0.791,0.919,0.9417,0.7293,0.822,0.5787,0.6068
9,0.8259,0.953,0.9515,0.7656,0.8485,0.6494,0.6707


In [34]:
# Tuned NB

tuned_nb = tune_model(nb)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8267,0.9304,0.8447,0.8208,0.8325,0.6531,0.6534
1,0.8465,0.9364,0.9223,0.8051,0.8597,0.692,0.6999
2,0.8465,0.9257,0.9038,0.8174,0.8584,0.6918,0.696
3,0.8109,0.9217,0.8447,0.7982,0.8208,0.6211,0.6222
4,0.8408,0.9227,0.8932,0.8142,0.8519,0.6806,0.684
5,0.7811,0.8907,0.8738,0.7438,0.8036,0.56,0.5693
6,0.8358,0.9452,0.8641,0.8241,0.8436,0.671,0.6719
7,0.796,0.9267,0.8835,0.7583,0.8161,0.5901,0.5988
8,0.8209,0.9332,0.8738,0.7965,0.8333,0.6407,0.6439
9,0.8657,0.9619,0.932,0.8276,0.8767,0.7303,0.7365


In [35]:
evaluate_model(tuned_nb)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

# Finalized Models

In [36]:
final_knn = finalize_model(tuned_knn)
final_dt = finalize_model(tuned_dt)
final_lr = finalize_model(tuned_lr)
final_nb = finalize_model(tuned_nb)

# Unseen Data Prediction

KNN

In [37]:
unseen_predictions_knn = predict_model(final_knn, data = data_unseen)
unseen_predictions_knn.head()

Unnamed: 0,f,f.1,f.2,f.3,f.4,f.5,f.6,f.7,f.8,f.9,f.10,f.11,l,f.12,n,f.13,f.14,t,f.15,f.16,f.17,f.18,f.19,f.20,f.21,t.1,f.22,f.23,f.24,f.25,f.26,f.27,f.28,t.2,t.3,n.1,won,Label,Score
0,f,f,f,f,t,f,f,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.7143
1,f,f,f,f,t,f,t,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.7857
2,f,f,f,f,f,f,f,f,t,f,t,f,l,f,n,f,f,t,f,t,f,t,f,f,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.6429
3,f,f,f,f,t,f,t,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.8571
4,f,f,f,f,f,f,f,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,t,f,t,f,f,f,f,t,f,f,t,t,n,won,won,1.0


Decision Tree

In [38]:
unseen_predictions_dt = predict_model(final_dt, data = data_unseen)
unseen_predictions_dt.head()

Unnamed: 0,f,f.1,f.2,f.3,f.4,f.5,f.6,f.7,f.8,f.9,f.10,f.11,l,f.12,n,f.13,f.14,t,f.15,f.16,f.17,f.18,f.19,f.20,f.21,t.1,f.22,f.23,f.24,f.25,f.26,f.27,f.28,t.2,t.3,n.1,won,Label,Score
0,f,f,f,f,t,f,f,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.8994
1,f,f,f,f,t,f,t,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.8994
2,f,f,f,f,f,f,f,f,t,f,t,f,l,f,n,f,f,t,f,t,f,t,f,f,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.8994
3,f,f,f,f,t,f,t,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.8994
4,f,f,f,f,f,f,f,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,t,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.8994


Logistic Regression

In [42]:
unseen_predictions_lr = predict_model(final_lr, data = data_unseen)
unseen_predictions_lr.head()

Unnamed: 0,f,f.1,f.2,f.3,f.4,f.5,f.6,f.7,f.8,f.9,f.10,f.11,l,f.12,n,f.13,f.14,t,f.15,f.16,f.17,f.18,f.19,f.20,f.21,t.1,f.22,f.23,f.24,f.25,f.26,f.27,f.28,t.2,t.3,n.1,won,Label,Score
0,f,f,f,f,t,f,f,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.904
1,f,f,f,f,t,f,t,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.8431
2,f,f,f,f,f,f,f,f,t,f,t,f,l,f,n,f,f,t,f,t,f,t,f,f,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.8907
3,f,f,f,f,t,f,t,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.9858
4,f,f,f,f,f,f,f,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,t,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.9962


Naive Bayes

In [40]:
unseen_predictions_nb = predict_model(final_nb, data = data_unseen)
unseen_predictions_nb.head()

Unnamed: 0,f,f.1,f.2,f.3,f.4,f.5,f.6,f.7,f.8,f.9,f.10,f.11,l,f.12,n,f.13,f.14,t,f.15,f.16,f.17,f.18,f.19,f.20,f.21,t.1,f.22,f.23,f.24,f.25,f.26,f.27,f.28,t.2,t.3,n.1,won,Label,Score
0,f,f,f,f,t,f,f,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.8708
1,f,f,f,f,t,f,t,f,f,f,t,f,l,f,n,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.7651
2,f,f,f,f,f,f,f,f,t,f,t,f,l,f,n,f,f,t,f,t,f,t,f,f,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.986
3,f,f,f,f,t,f,t,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,f,f,t,f,f,f,f,f,f,f,t,t,n,won,won,0.9966
4,f,f,f,f,f,f,f,f,f,f,t,f,l,f,w,f,f,t,f,f,f,f,f,t,f,t,f,f,f,f,t,f,f,t,t,n,won,won,0.9997
