# 03 - Baseline Model

## Setup

In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display, Markdown

sns.set(style="darkgrid")
pd.set_option('display.max_columns', None)  

import sys, os, yaml

DATASET = "Tic-Tac-Toe"
COLAB = 'google.colab' in sys.modules

DEBUG = False
SEED = 666

In [76]:
COLAB = 'google.colab' in sys.modules

if COLAB:
  from google.colab import drive
  if not os.path.isdir("/content/gdrive"):
    drive.mount("/content/gdrive")
    d = "/content/gdrive/MyDrive/datasets"
    if not os.path.isdir(ROOT+d): os.makedirs(ROOT+d)
  ROOT = f"/content/gdrive/MyDrive/datasets/{DATASET.replace(' ','_')}/"
  if not os.path.isdir(ROOT): os.makedirs(ROOT)
else:
  ROOT = "./"

def makedirs(d):
  if COLAB:
    if not os.path.isdir(ROOT+d): os.makedirs(ROOT+d)
  else:
    if not os.path.isdir(ROOT+d): os.makedirs(ROOT+d, mode=0o777, exist_ok=True)

for d in ['orig','data','output']: makedirs(d)

## Load Dataset

In [77]:
df = pd.read_pickle(f"{ROOT}/data/data.pkl")
print(df.shape)
df.head()

(958, 10)


Unnamed: 0,Top-left-square,Top-middle-square,Top-right-square,Middle-left-square,Middle-middle-square,Middle-right-square,Bottom-left-square,Bottom-middle-square,Bottom-right-square,Score
0,1,1,1,1,-1,-1,1,-1,-1,1
1,1,1,1,1,-1,-1,-1,1,-1,1
2,1,1,1,1,-1,-1,-1,-1,1,1
3,1,1,1,1,-1,-1,-1,0,0,1
4,1,1,1,1,-1,-1,0,-1,0,1


## Preprocessing Data

In [78]:
target = "Score"
print(f"target = {target}")

cat_features = [c for c in df.select_dtypes("category").columns if c!= target]
print(f"\nCategorical features ({len(cat_features)}): {cat_features}")


target = Score

Categorical features (9): ['Top-left-square', 'Top-middle-square', 'Top-right-square', 'Middle-left-square', 'Middle-middle-square', 'Middle-right-square', 'Bottom-left-square', 'Bottom-middle-square', 'Bottom-right-square']


In [79]:
X = df[cat_features]
y = df[target]

## Train/Test Split

In [80]:
y.value_counts(normalize=True)

Score
1     0.653445
-1    0.346555
Name: proportion, dtype: float64

In [81]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y, train_size=0.80, stratify=y, random_state=SEED)

print(X_train.shape,X_test.shape)
y.value_counts(normalize=True)

(766, 9) (192, 9)


Score
1     0.653445
-1    0.346555
Name: proportion, dtype: float64

In [102]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder()

classifiers = {
    "KNN" : KNeighborsClassifier(),
    "KNN(3)" : KNeighborsClassifier(3),
    "DT" : DecisionTreeClassifier(),
    "DT(max_depth=5)" : DecisionTreeClassifier(max_depth=5),
    "MPL" : MLPClassifier(max_iter=1000),
    "GaussianNB" : GaussianNB(),
    "Perceptron" : Perceptron(),
}

In [110]:
from sklearn.metrics import accuracy_score

for name, model in classifiers.items():
    
    if name == 'MPL':
        X_mpl_train, X_mpl_test= X_train.copy(),X_test.copy()

        ohe.fit(X_mpl_train)

        X_mpl_train = ohe.transform(X_mpl_train)
        X_mpl_test = ohe.transform(X_mpl_test)
        
        model.fit(X_mpl_train, y_train)
        # Scoring on UNSEEN data - important
        y_pred = model.predict(X_mpl_test)
        test_accuracy = accuracy_score(y_test, y_pred)

    else:    
        model.fit(X_train, y_train)
        # Scoring on UNSEEN data - important
        y_pred = model.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_pred)
    
    print(f"{name:20s} accuracy\ttest = {test_accuracy:.2%}")

KNN                  accuracy	test = 99.48%
KNN(3)               accuracy	test = 100.00%
DT                   accuracy	test = 95.83%
DT(max_depth=5)      accuracy	test = 86.98%
MPL                  accuracy	test = 99.48%
GaussianNB           accuracy	test = 66.15%
Perceptron           accuracy	test = 98.96%
