# Predicting outcome of chess games based on my performance this year

In [10]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score

from time import time

from keras.wrappers.scikit_learn import KerasClassifier

from dummies_bins_test_train_cv import get_Xy_train_test
from different_xy import diff_bins, xy_custom

## If new data is added, uncomment and run the next line

In [11]:
from clean_chess_game_log import main_cleanup
_,_,_ = main_cleanup('../data/dest.pgn')

In [12]:
df = pd.read_csv('../data/use_for_predictions.csv')

In [14]:
df = df.loc[df['result'] != 0.5].copy()
df.reset_index(inplace=True)
df.drop(columns=['index'], inplace=True)

In [15]:
df, y = clean_df_y(df)

NameError: name 'clean_df_y' is not defined

## Creating the classifier:
__The _Fun_ Part!!__

### Ran the classifier with new data every time I played on Chess.com.

Which is how I ended up with the following steps:

- __Step 1__
    - Add the sequential layer
- __Step 2__
    - Add the __Dense__ layer with following parameters:
        - Units = 128
        - Initializer = Uniform
        - Activation = Softmax (Since the outcome is either 1 or 0, Softmax works wonders)
        - Input Dim = the second part of the X Shape (# of columns)
- __Step 3__
    - Add the second __Dense__ layer with following parameters:
        - Units = 256 (Double it up!!)
        - Initializer = Normal (Throw a curve ball!)
        - Activation = ReLu (Prevents Vanashing Gradiant problem)
- __Step 4__
    - Add the third __Dense__ layer with following parameters:
        - Units = 64 (Make it easier for the compiling)
        - Initializer = Uniform
        - Activation = Softmax
- __Step 5__
    - Add the final __Dense__ layer with following parameters:
        - Units = 1 (Just need 1 column for output)
        - Initializer = Normal (#MuscleConfusion. I kid!)
        - Activation = Sigmoid
- __Step 6__
    - Only think left is to __COMPILE__ it. From all the parameters I've tested. These work the best:
        - Optimizer = Nadam (Nesterov Adam optimizer - Nadam is Adam RMSprop with Nesterov momentum)
        - loss = Mean Absolute Error (Didn't get great results with binary_crossentropy, even though it was my first choice. Mean Squared Error, made error very small, since the loss is between 0 and 1.)
        - metrics = Binary Accuracy (Only have 0 or 1 to predict)

In [16]:
def _classifier():

    classifier = Sequential()
    
    classifier.add(Dense(units=64, activation='softmax', input_dim=X.shape[1]))
    
    classifier.add(Dense(units=128, activation='relu'))
    
    classifier.add(Dense(units=32, activation='softmax'))
    
    classifier.add(Dense(units=1, activation='sigmoid'))
    
    classifier.compile(
        optimizer='nadam', loss='binary_crossentropy', metrics=['accuracy'])

    return classifier

## Load the data to run predictions

## Create Train and Test sets, along with clean the data
### Also, see the shape to know the data is in right shape

In [7]:
all_cols_try = [
    ['diff_bin'], ['color'], ['game_time'], ['time_bin'], ['weekday'],
    ['diff_bin', 'color'], ['diff_bin', 'time_bin'], ['diff_bin', 'game_time'],
    ['diff_bin', 'weekday'], ['diff_bin', 'color', 'time_bin'],
    ['diff_bin', 'color', 'game_time'], ['diff_bin', 'color', 'weekday'],
    ['diff_bin', 'time_bin', 'game_time'], ['diff_bin', 'time_bin', 'weekday'],
    ['diff_bin', 'game_time', 'weekday'],
    ['diff_bin', 'color', 'time_bin', 'game_time'],
    ['diff_bin', 'color', 'time_bin', 'weekday'],
    ['diff_bin', 'color', 'game_time', 'weekday'],
    ['diff_bin', 'time_bin', 'game_time', 'weekday'],
    ['diff_bin', 'color', 'game_time', 'time_bin', 'weekday']]

In [8]:
results = {}

In [7]:
for clm in all_cols_try:
    st = time()
    X_train, X_test, y_train, y_test, X = xy_custom(df, y, 100, clm)
    std_sclr = StandardScaler()
    X_train = std_sclr.fit_transform(X_train)
    X_test = std_sclr.fit_transform(X_test)
    for bs in [6, 12, 24, 36, 48, 96]:
        for ep in [48, 96, 192]:
            classifier = _classifier()
            classifier.fit(X_train, y_train, batch_size=bs, epochs=ep,
                          class_weight='balanced', shuffle=False, verbose=2)
            y_pred = classifier.predict(X_test)
            y_pred = (y_pred > 0.5)
            cm = confusion_matrix(y_test, y_pred)
            results[(f'cols:{clm}, bs:{bs}, ep:{ep} (cm)')] = list(cm)
            results[(f'cols:{clm}, bs:{bs}, ep:{ep} (accuracy)')
                    ] = (f'{((cm[0][0]+cm[1][1])/cm.sum()*100).round(1)}%')
        print(clm)
    print(results)
    print(time() - st)

y Shape: (2117,)
X Shape: (2117, 1)
X_train Shape: (2011, 1)
X_test Shape: (106, 1)
y_train Shape: (2011,)
y_test Shape: (106,)
Epoch 1/96
Epoch 2/96
Epoch 3/96
Epoch 4/96
Epoch 5/96
Epoch 6/96
Epoch 7/96
Epoch 8/96
Epoch 9/96
Epoch 10/96
Epoch 11/96
Epoch 12/96
Epoch 13/96
Epoch 14/96
Epoch 15/96
Epoch 16/96
Epoch 17/96
Epoch 18/96
Epoch 19/96
Epoch 20/96
Epoch 21/96
Epoch 22/96
Epoch 23/96
Epoch 24/96
Epoch 25/96
Epoch 26/96
Epoch 27/96
Epoch 28/96
Epoch 29/96
Epoch 30/96
Epoch 31/96
Epoch 32/96
Epoch 33/96
Epoch 34/96
Epoch 35/96
Epoch 36/96
Epoch 37/96
Epoch 38/96
Epoch 39/96
Epoch 40/96
Epoch 41/96
Epoch 42/96
Epoch 43/96
Epoch 44/96
Epoch 45/96
Epoch 46/96
Epoch 47/96
Epoch 48/96
Epoch 49/96
Epoch 50/96
Epoch 51/96
Epoch 52/96
Epoch 53/96
Epoch 54/96
Epoch 55/96
Epoch 56/96
Epoch 57/96
Epoch 58/96
Epoch 59/96
Epoch 60/96
Epoch 61/96
Epoch 62/96
Epoch 63/96
Epoch 64/96
Epoch 65/96
Epoch 66/96
Epoch 67/96
Epoch 68/96
Epoch 69/96
Epoch 70/96
Epoch 71/96
Epoch 72/96
Epoch 73/96
Epoch

Epoch 65/196
Epoch 66/196
Epoch 67/196
Epoch 68/196
Epoch 69/196
Epoch 70/196
Epoch 71/196
Epoch 72/196
Epoch 73/196
Epoch 74/196
Epoch 75/196
Epoch 76/196
Epoch 77/196
Epoch 78/196
Epoch 79/196
Epoch 80/196
Epoch 81/196
Epoch 82/196
Epoch 83/196
Epoch 84/196
Epoch 85/196
Epoch 86/196
Epoch 87/196
Epoch 88/196
Epoch 89/196
Epoch 90/196
Epoch 91/196
Epoch 92/196
Epoch 93/196
Epoch 94/196
Epoch 95/196
Epoch 96/196
Epoch 97/196
Epoch 98/196
Epoch 99/196
Epoch 100/196
Epoch 101/196
Epoch 102/196
Epoch 103/196
Epoch 104/196
Epoch 105/196
Epoch 106/196
Epoch 107/196
Epoch 108/196
Epoch 109/196
Epoch 110/196
Epoch 111/196
Epoch 112/196
Epoch 113/196
Epoch 114/196
Epoch 115/196
Epoch 116/196
Epoch 117/196
Epoch 118/196
Epoch 119/196
Epoch 120/196
Epoch 121/196
Epoch 122/196
Epoch 123/196
Epoch 124/196
Epoch 125/196
Epoch 126/196
Epoch 127/196
Epoch 128/196
Epoch 129/196
Epoch 130/196
Epoch 131/196
Epoch 132/196
Epoch 133/196
Epoch 134/196
Epoch 135/196
Epoch 136/196
Epoch 137/196
Epoch 138/196

Epoch 28/96
Epoch 29/96
Epoch 30/96
Epoch 31/96
Epoch 32/96
Epoch 33/96
Epoch 34/96
Epoch 35/96
Epoch 36/96
Epoch 37/96
Epoch 38/96
Epoch 39/96
Epoch 40/96
Epoch 41/96
Epoch 42/96
Epoch 43/96
Epoch 44/96
Epoch 45/96
Epoch 46/96
Epoch 47/96
Epoch 48/96
Epoch 49/96
Epoch 50/96
Epoch 51/96
Epoch 52/96
Epoch 53/96
Epoch 54/96
Epoch 55/96
Epoch 56/96
Epoch 57/96
Epoch 58/96
Epoch 59/96
Epoch 60/96
Epoch 61/96
Epoch 62/96
Epoch 63/96
Epoch 64/96
Epoch 65/96
Epoch 66/96
Epoch 67/96
Epoch 68/96
Epoch 69/96
Epoch 70/96
Epoch 71/96
Epoch 72/96
Epoch 73/96
Epoch 74/96
Epoch 75/96
Epoch 76/96
Epoch 77/96
Epoch 78/96
Epoch 79/96
Epoch 80/96
Epoch 81/96
Epoch 82/96
Epoch 83/96
Epoch 84/96
Epoch 85/96
Epoch 86/96
Epoch 87/96
Epoch 88/96
Epoch 89/96
Epoch 90/96
Epoch 91/96
Epoch 92/96
Epoch 93/96
Epoch 94/96
Epoch 95/96
Epoch 96/96
120.88299632072449
{"cols:['diff_bin'], bs:12, ep:96 (cm)": array([[24, 22],
       [13, 47]]), "cols:['diff_bin'], bs:12, ep:96 (accuracy)": '67.0%', "cols:['diff_bin'], b

KeyboardInterrupt: 

In [None]:
# X_train, X_test, y_train, y_test, X, y = 

In [4]:
# X_train, X_test, y_train, y_test = get_Xy_train_test(
#     df, .98, .99)

y Shape: (2117,)
X Shape: (2117, 1)
X_train Shape: (2093, 1)
X_test Shape: (24, 1)
y_train Shape: (2093,)
y_test Shape: (24,)


## Scale the data for easier computations

In [6]:
# std_sclr = StandardScaler()
# X_train = std_sclr.fit_transform(X_train)
# X_test = std_sclr.fit_transform(X_test)

### See the scale of each value if interested

In [7]:
# std_sclr.scale_

array([10.59603896])

## Fitting the training data

### I wanted to test different batch sizes to get the results on the real games. 

I started testing fitting classifier __For Loop__ with the following parameters:

1. Batch Size:
    1. 20
    2. 16
    3. 12
    4. 8
    5. 6
2. Epochs:
    1. 50
    2. 100
    3. 200

In [9]:
results = {}

In [10]:
for bs in [20, 16, 12, 8, 6]:
    for ep in [50, 100, 200]:
        classifier = _classifier()
        classifier.fit(X_train, y_train, batch_size=bs, epochs=ep,
                       class_weight='balanced', shuffle=False)
        y_pred = classifier.predict(X_test)
        y_pred = (y_pred > 0.5)
        cm = confusion_matrix(y_test, y_pred)
        results[(f'bs:{bs}, ep:{ep} (cm)')] = cm
        results[(f'bs:{bs}, ep:{ep} (accuracy)')
                ] = f'{((cm[0][0]+cm[1][1])/cm.sum()*100).round(1)}%'
        print(results)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
{'bs:20, ep:50 (cm)': array([[ 8,  2],
       [ 4, 10]]), 'bs:20, ep:50 (accuracy)': '75.0%'}
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100

Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200

Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
{'bs:20, ep:50 (cm)': array([[ 8,  2],
       [ 4, 10]]), 'bs:20, ep:50 (accuracy)': '75.0%', 'bs:20, ep:100 (cm)': array([[ 8,  2],
       [ 4, 10]]), 'bs:20, ep:100 (accuracy)': '75.0%', 'bs:20, ep:200 (cm)': array([[ 8,  2],
       [ 4, 10]]), 'bs:20, ep:200 (accuracy)': '75.0%'}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 2

KeyboardInterrupt: 

### Here are the best & the worst combinations:

### The best:
1. batch Size = 12, Epochs = 50
    - CM = [__11__ 2][4 __13__]
    - Accuracy = 80%
2. batch Size = 12, Epochs = 100
    - CM = [__11__ 2][4 __13__]
    - Accuracy = 80%
3. batch Size = 20, Epochs = 100
    - CM = [__11__ 2][4 __13__]
    - Accuracy = 80%
4. batch Size = 8, Epochs = 50
    - CM = [10 3][4 __13__]
    - Accuracy = 76.67%

### The worst:
1. batch Size = 8, Epochs = 100
    - CM = [11 2] [__7__ 10]
    - Accuracy = 70%
2. batch Size = 16, Epochs = 50
    - CM = [10 3] [__6__ 11]
    - Accuracy = 70%

#### The rest were at 73.33%

NOTE:
- All batches of 6 were at 73%
- All Epochs of 200 were at 73%


In [16]:
classifier = _classifier()
classifier.fit(X_train, y_train, batch_size=20, epochs=100,
               class_weight='balanced', shuffle=False)
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
cm = confusion_matrix(y_test, y_pred)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [17]:
print(cm)
print(f'accuracy: {((cm[0][0]+cm[1][1])/cm.sum()*100).round(1)}%')

[[10  6]
 [ 6 15]]
accuracy: 67.6%
