In [1]:
import warnings
warnings.filterwarnings('ignore')

# Import dependencies
import pandas as pd
import numpy as np
from config import db_password
from sqlalchemy import create_engine
import psycopg2
import pandas.io.sql as sqlio
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt

In [2]:
#create database connection variable 
conn = psycopg2.connect(user="postgres", password=db_password, host="localhost", database="lichess_data")

In [3]:
#execute query and save it to a variable
query="select * from chess_data"
chess_df = sqlio.read_sql_query(query,conn)

In [4]:
# drop extraneous id column
chess_df.drop(['id'],axis=1, inplace=True)

In [5]:
# Split moves column into moves df
moves_df = chess_df['moves'].str.split(' ', n=10, expand=True)

# Drop column 10 and rename columns
moves_df=moves_df.drop(10,axis=1)
moves_df.columns= ["Wm1","Bm1","Wm2","Bm2","Wm3","Bm3","Wm4","Bm4","Wm5","Bm5"]

moves_df["outcome"] = chess_df["winner"]

# drop na
moves_df = moves_df.dropna()

In [6]:
# Changing moves to numbers
for col in moves_df.columns:
    print(col,end=' ')
    
    # Get list of unique values
    values = list(set(moves_df[col].values))
    
    # Create numerical dictionary
    values_with_indexes = {}
    for i, v in enumerate(values):
        values_with_indexes[v] = i
    
    # Replace column
    moves_df.replace({col: values_with_indexes},inplace=True)

moves_df.head()

Wm1 Bm1 Wm2 Bm2 Wm3 Bm3 Wm4 Bm4 Wm5 Bm5 outcome 

Unnamed: 0,Wm1,Bm1,Wm2,Bm2,Wm3,Bm3,Wm4,Bm4,Wm5,Bm5,outcome
0,17,14,60,34,162,242,267,426,133,634,1
1,17,11,77,150,184,30,411,282,330,410,0
2,3,7,36,144,99,283,392,370,105,489,1
3,17,14,84,114,229,51,381,374,565,15,1
4,3,7,84,144,299,11,16,253,267,639,1


In [7]:
# Export csv containing sample data to be imported into ML
# moves_df.to_csv("ML_sample_data.csv")

In [8]:
# Split our preprocessed data into our features and target arrays
y = moves_df["outcome"].values
X = moves_df.drop("outcome",1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 1)
number_input_features = len(X_train[0])

# Stage 1 - Creating the model

### Model 1: sigmoid input, sigmoid output

In [81]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_20 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [82]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [83]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [84]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6819 - acc: 0.4983
Loss: 0.6818634756792873, Accuracy: 0.4983481466770172


### Model 2: relu input, sigmoid output

In [85]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="relu", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [86]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [87]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [88]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6922 - acc: 0.4983
Loss: 0.6922125508361225, Accuracy: 0.49832427501678467


### Model 3: tanh input, sigmoid output

In [89]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_23 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_24 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [90]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [91]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [92]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6819 - acc: 0.4983
Loss: 0.6818692683180706, Accuracy: 0.4983481466770172


### Model 4: sigmoid input, linear output

In [93]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [94]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [95]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [96]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6826 - acc: 0.4983
Loss: 0.6825688758876884, Accuracy: 0.4983282685279846


### Model 5: relu input, linear output

In [97]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="relu", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_27 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [98]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [99]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [100]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 6.5152 - acc: 0.4983
Loss: 6.51520274151468, Accuracy: 0.49834415316581726


### Model 6: tanh input, linear output

In [101]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_29 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [102]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [103]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [104]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6820 - acc: 0.4983
Loss: 0.6819516978605651, Accuracy: 0.4983481466770172


# Stage 1 Analysis

For the first stage of this analysis, we tried 6 models of varying parameters. The loss, accuracy scores, and parameters of our 6 models are shown below:

| Model | Loss | Accuracy | Parameters |
| :---- | :--- | :------- | :--------- |
| 1 | 0.6819 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 5 epochs |
| 2 | 0.6922 | 0.4983 | Relu input with 5 nodes, sigmoid output, 5 epochs |
| 3 | 0.6819 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 5 epochs |
| 4 | 0.6826 | 0.4983 | Sigmoid input with 5 nodes, linear output, 5 epochs |
| 5 | 6.5152 | 0.4983 | Relu input wiht 5 nodes, linear output, 5 epochs |
| 6 | 0.6820 | 0.4983 | Tanh input with 5 nodes, linear output, 5 epochs |

The accuracy score for all 6 models was identical at 0.4983. An accuracy score of exactly 0.5 is the probability of randomly guessing the correct winner of a given game of chess, so the accuracy score of our models may reflect the difficulty of predicting a winner from only the first 10 moves of a game. Since games typically last far longer than 10 turns and the possible combinations of moves grow increasingly complex, this is not a surprising conclusion.

Loss function, on the other hand, was not constant. The two models with relu inputs (Models 2 and 5) had the greatest loss score, indicating that the relu activation function is likely not the best choice for our model. Additionally, the models with sigmoid outputs (Models 1, 2, and 3) had lower loss functions than their counterpart models with linear outputs (Models 4, 5, and 6 respectively). This is expected, as output from a sigmoid function tends to be very close to either 0 or 1. We are posing a question with a binary answer, so a sigmoid function is the best choice for an activation function for our output layer.

Given this information, our course of action for the final steps of completing this model will be to explore more varied input layers. This will be accomplished by varying the activation function (using sigmoid and tanh, leaving relu out), number of layers, number of nodes within each layer, and number of epochs used to train the model. We hypothesize that the sigmoid activation function will be the more appropriate choice for the input layer(s) for the same reasons it is the best choice for the output layer.

# Stage 2 - Refining the model by adding an additional input layer

### Model 7 - sigmoid + sigmoid input layers, 5 nodes each

In [9]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the second input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 5)                 55        
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 6         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6817 - acc: 0.4983
Loss: 0.681683825464082, Accuracy: 0.4983481466770172


### Model 8 - sigmoid + tanh input layers, 5 nodes each

In [13]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the second input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [15]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [16]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6819 - acc: 0.4983
Loss: 0.6818749250321505, Accuracy: 0.4983481466770172


### Model 9 - tanh + sigmoid input layers, 5 nodes each

In [17]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the second input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 6         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [19]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6816 - acc: 0.4983
Loss: 0.6815962515790198, Accuracy: 0.4983481466770172


### Model 10 - tanh + tanh input layers, 5 nodes each

In [21]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the second input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_10 (Dense)             (None, 5)                 30        
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 6         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [23]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6817 - acc: 0.4983
Loss: 0.6816705080182166, Accuracy: 0.4983481466770172


# Stage 2 Analysis

The next step of our analysis was to determine whether adding an additional input layer would assist in lowering loss or raising accuracy in our model. The loss, accuracy, and parameters for Models 7-10 are shown below:

| Model | Loss | Accuracy | Parameters |
| :---- | :--- | :------- | :--------- |
| 7 | 0.6817 | 0.4983 | Two sigmoid input layers with 5 nodes each, sigmoid output, 5 epochs |
| 8 | 0.6919 | 0.4983 | Sigmoid first input layer and tanh second input layer with 5 nodes each, sigmoid output, 5 epochs |
| 9 | 0.6816 | 0.4983 | Tanh first input layer and sigmoid second input layer with 5 nodes each, sigmoid output, 5 epochs |
| 10 | 0.6817 | 0.4983 | Two tanh input layers with 5 nodes each, sigmoid output, 5 epochs |

Accuracy continued to remain constant across all models, even when an additional input layer was added. Loss varied slightly, but not to a significant extent; the greatest difference in loss between the best models from Stage 1 and Stage 2 was 0.0003. As such, we can conclude that adding a second input layer is not likely to significantly improve the ability of our machine learning model to predict the winner of a chess game from the first 10 moves. Additionally, we cannot draw any further conclusions regarding the efficacy of the sigmoid and tanh activation functions in input layers given the very similar results between the two.

# Stage 3 - Refining the model by varying the number of nodes

### Model 11 - sigmoid input layer, 2 nodes

In [25]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=2, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 2)                 22        
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 3         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________


In [26]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [27]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [28]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6817 - acc: 0.4983
Loss: 0.6817401380583206, Accuracy: 0.4983481466770172


### Model 12 - sigmoid input layer, 8 nodes

In [29]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=8, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 8)                 88        
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 9         
Total params: 97
Trainable params: 97
Non-trainable params: 0
_________________________________________________________________


In [30]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [31]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [32]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6817 - acc: 0.4984
Loss: 0.6816825410157995, Accuracy: 0.4983521103858948


### Model 13 - tanh input layer, 2 nodes

In [33]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=2, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 2)                 22        
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 3         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________


In [34]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [35]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [36]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6818 - acc: 0.4983
Loss: 0.6818061287186948, Accuracy: 0.4983481466770172


### Model 14 - tanh input layer, 8 nodes

In [37]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=8, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 8)                 88        
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 9         
Total params: 97
Trainable params: 97
Non-trainable params: 0
_________________________________________________________________


In [38]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [39]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [40]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6818 - acc: 0.4983
Loss: 0.6818064010055367, Accuracy: 0.4983481466770172


# Stage 3 Analysis

To determine whether the number of nodes affected the performance of our models, we ran single-layer models with varied numbers of nodes. The loss, accuracy, and parameters for Models 11-14 (as well as Models 1 and 3 for comparison) are shown below:

| Model | Loss | Accuracy | Parameters |
| :---- | :--- | :------- | :--------- |
| 1 | 0.6819 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 5 epochs |
| 11 | 0.6817 | 0.4983 | Sigmoid input with 2 nodes, sigmoid output, 5 epochs |
| 12 | 0.6817 | 0.4984 | Sigmoid input with 8 nodes, sigmoid output, 5 epochs |
| 3 | 0.6818 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 5 epochs |
| 13 | 0.6818 | 0.4983 | Tanh input with 2 nodes, sigmoid output, 5 epochs |
| 14 | 0.6818 | 0.4983 | Tanh input with 8 nodes, sigmoid output, 5 epochs |

Once again, loss and accuracy are more or less uniform. Model 12 is the first to show an accuracy score different from any other model, but a difference of 0.0001 is not significant. Given the results of Stage 3, we are unable to attribute number of nodes to model performance. Sigmoid and tanh input layers also continue to show no appreciable difference.

# Stage 4 - Refining the model by adjusting the number of epochs

### Model 15 - sigmoid input layer, 2 epochs

In [41]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_21 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [42]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [43]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=2)

Epoch 1/2
Epoch 2/2


In [44]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6817 - acc: 0.4983
Loss: 0.6817277191875162, Accuracy: 0.4983481466770172


### Model 16 - sigmoid input layer, 10 epochs

In [45]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="sigmoid", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_23 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [46]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [47]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [48]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6816 - acc: 0.4983
Loss: 0.6815956441493135, Accuracy: 0.4983481466770172


### Model 17 - tanh input layer, 2 epochs

In [49]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_25 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [50]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [51]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=2)

Epoch 1/2
Epoch 2/2


In [52]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6819 - acc: 0.4983
Loss: 0.6818530754724361, Accuracy: 0.49834415316581726


### Model 18 - tanh input layer, 10 epochs

In [53]:
# Create the keras sequential model
nn_model = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_26 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 6         
Total params: 61
Trainable params: 61
Non-trainable params: 0
_________________________________________________________________


In [54]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [55]:
# Train the model
fit_model = nn_model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [56]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: 0.6818 - acc: 0.4983
Loss: 0.6818354549266634, Accuracy: 0.4983481466770172


# Stage 4 Analysis

Finally, we tested the effect of number of epochs on model performance. The loss, accuracy, and parameters for Models 15-18 (as well as Models 1 and 3 for comparison) are shown below:

| Model | Loss | Accuracy | Parameters |
| :---- | :--- | :------- | :--------- |
| 1 | 0.6819 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 5 epochs |
| 15 | 0.6817 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 2 epochs |
| 16 | 0.6816 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 10 epochs |
| 3 | 0.6818 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 5 epochs |
| 17 | 0.6819 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 2 epochs |
| 18 | 0.6818 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 10 epochs |

As with all other parameters, adjusting the number of epochs had little to no impact on the performance of our machine learning models. Input function continued to show no impact as well.

# Conclusions

For reference, the loss, accuracy, and parameters for all models are shown again below:

| Model | Loss | Accuracy | Parameters |
| :---- | :--- | :------- | :--------- |
| 1 | 0.6819 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 5 epochs |
| 2 | 0.6922 | 0.4983 | Relu input with 5 nodes, sigmoid output, 5 epochs |
| 3 | 0.6819 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 5 epochs |
| 4 | 0.6826 | 0.4983 | Sigmoid input with 5 nodes, linear output, 5 epochs |
| 5 | 6.5152 | 0.4983 | Relu input wiht 5 nodes, linear output, 5 epochs |
| 6 | 0.6820 | 0.4983 | Tanh input with 5 nodes, linear output, 5 epochs |
| 7 | 0.6817 | 0.4983 | Two sigmoid input layers with 5 nodes each, sigmoid output, 5 epochs |
| 8 | 0.6919 | 0.4983 | Sigmoid first input layer and tanh second input layer with 5 nodes each, sigmoid output, 5 epochs |
| 9 | 0.6816 | 0.4983 | Tanh first input layer and sigmoid second input layer with 5 nodes each, sigmoid output, 5 epochs |
| 10 | 0.6817 | 0.4983 | Two tanh input layers with 5 nodes each, sigmoid output, 5 epochs |
| 11 | 0.6817 | 0.4983 | Sigmoid input with 2 nodes, sigmoid output, 5 epochs |
| 12 | 0.6817 | 0.4984 | Sigmoid input with 8 nodes, sigmoid output, 5 epochs |
| 13 | 0.6818 | 0.4983 | Tanh input with 2 nodes, sigmoid output, 5 epochs |
| 14 | 0.6818 | 0.4983 | Tanh input with 8 nodes, sigmoid output, 5 epochs |
| 15 | 0.6817 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 2 epochs |
| 16 | 0.6816 | 0.4983 | Sigmoid input with 5 nodes, sigmoid output, 10 epochs |
| 17 | 0.6819 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 2 epochs |
| 18 | 0.6818 | 0.4983 | Tanh input with 5 nodes, sigmoid output, 10 epochs |

All four stages of this analysis showed that varying the parameters of our machine learning model had virtually no impact on both loss and accuracy. We were only able to draw conclusions regarding two of the five parameters examined: ruling out relu as an activation function and determining that sigmoid is the most appropriate output function. No conclusive evidence was found for the other three parameters. A summary of results can be found in the table below:

| Parameter | Findings |
| :---- | :--- | 
| Activation function | Relu ruled out, no evidence that either sigmoid or tanh is better than the other |
| Output function | Sigmoid deemed better than linear |
| Number of layers | No evidence that either 1 layer or 2 layers is better than the other |
| Number of nodes | No evidence that any of 2, 5, or 8 nodes is better than the others |
| Number of epochs | No evidence that any of 2, 5, or 10 epochs is better than the others |

All in all, the results of this study point to neural networks not being an effective means of predicting the winner of a game of chess given the first 10 moves. We supposed this was the case following the Stage 1 analysis, and Stages 2-4 supported the hypothesis. We cannot conclusively say that a neural network is entirely ineffective when trying to answer the question posed in this project since there are still many more combinations of parameters that could be tested, but the 18 models we examined gave us little reason to believe this is a path worth following.

As previously stated, chess is a complex game. The possible outcomes are difficult to map as each individual move begets an exponentially branching tree of possibilities. Since games of chess tend to last far longer than 10 turns, our analysis is only able to scratch the surface. We were limited by a number of factors, most notably time and computing power. In theory, we could have extended our analysis far past the first 10 turns. However, adding an extra turn creates another branch on the tree and multiplies the number of computations necessary to perform this analysis. As such, we decided to examine whether determining a winner given 10 turns is possible with resources accessible to a wider range of people. Since accuracy scores remained very close to the probability of guessing a winner entirely at random throughout the entire project, the ultimate answer to our question is negative.

# Future Analysis

The only type of machine learning used in this project was a neural network. While this method proved ineffective, other types of machine learning may be more useful in answering our question. For example, the flow of a game of chess closely mimics a quickly branching set of choices ultimately culminating in a winner. A decision tree may be an interesting way to map out chess games, though said tree would have to be almost impossibly large to accurately capture every possible combination of moves in a game of chess.

Principal Component Analysis could possibly be used to incorporate more than 10 turns into the study without the number of input features reaching a problematically large number. This may be difficult given the qualitative nature of our input data, but it is an avenue to be explored.

Finally, a repetition of our analysis with more input features could be performed given more time and access to faster computing. It is not realistic for a student using a personal computer to perform this analysis with the first 50 turns of a game of chess using a sufficiently large dataset since it would likely take too long and use up too many resources, but given enough time and computing power, this could either corroborate our conclusions or show that neural networks may be viable solution after all.