In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from config import db_password
from sqlalchemy import create_engine
import psycopg2
import pandas.io.sql as sqlio
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#create database connection variable 
conn = psycopg2.connect(user="postgres", password=db_password, host="localhost", database="lichess_data")

In [3]:
#execute query and save it to a variable
query="select * from chess_data"
chess_df = sqlio.read_sql_query(query,conn)

In [4]:
chess_df.drop(['id'],axis=1, inplace=True)

In [5]:
# Split moves column into moves df
moves_df = chess_df['moves'].str.split(' ', n=10, expand=True)

# Drop column 10 and rename columns
moves_df=moves_df.drop(10,axis=1)
moves_df.columns= ["Wm1","Bm1","Wm2","Bm2","Wm3","Bm3","Wm4","Bm4","Wm5","Bm5"]

moves_df["outcome"] = chess_df["winner"]

# drop na
moves_df = moves_df.dropna()

In [6]:
# Changing moves to numbers
for col in moves_df.columns:
    print(col,end=' ')
    
    # Get list of unique values
    values = list(set(moves_df[col].values))
    
    # Create numerical dictionary
    values_with_indexes = {}
    for i, v in enumerate(values):
        values_with_indexes[v] = i
    
    # Replace column
    moves_df.replace({col: values_with_indexes},inplace=True)

moves_df.head()

Wm1 Bm1 Wm2 Bm2 Wm3 Bm3 Wm4 Bm4 Wm5 Bm5 outcome 

Unnamed: 0,Wm1,Bm1,Wm2,Bm2,Wm3,Bm3,Wm4,Bm4,Wm5,Bm5,outcome
0,10,9,17,88,86,17,430,428,314,64,0
1,10,15,69,116,214,57,229,261,304,145,2
2,9,14,1,46,310,36,30,285,102,320,0
3,10,9,6,81,223,117,404,260,236,557,0
4,9,14,6,46,255,290,252,394,251,392,0


In [7]:
# Export csv containing sample data to be imported into ML
# moves_df.to_csv("ML_sample_data.csv")

In [8]:
# Split our preprocessed data into our features and target arrays
y = moves_df["outcome"].values
X = moves_df.drop("outcome",1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 1)

  This is separate from the ipykernel package so we can avoid doing imports until


In [9]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras import layers

## First model - one feature layer (sigmoid)

In [46]:
# Create the keras sequential model
number_input_features = len(X_train_scaled[0])
nn_model = tf.keras.models.Sequential()

In [47]:
# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid", input_dim = number_input_features))

In [48]:
# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

In [49]:
# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 1)                 11        
_________________________________________________________________
dense_21 (Dense)             (None, 1)                 2         
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________


In [50]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [51]:
# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [52]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 2s - loss: -9.7934e+01 - acc: 0.4983
Loss: -97.93367988120384, Accuracy: 0.4983481466770172


In [53]:
# Train the model with more epochs
fit_model_b = nn_model.fit(X_train_scaled, y_train, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [19]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
# predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

251535/251535 - 1s - loss: -3.9078e+02 - acc: 0.4983
Loss: -390.7823153511997, Accuracy: 0.4983481466770172


## Second model - one feature layer (relu)

In [21]:
# Create the keras sequential model
nn_model_2 = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model_2.add(tf.keras.layers.Dense(units=1, activation="relu", input_dim = number_input_features))

# Add the ouput layer that uses a probability activation function
nn_model_2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model_2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 1)                 11        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 2         
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Compile the model and customize metrics
nn_model_2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [23]:
# Train the model
fit_model_2 = nn_model_2.fit(X_train_scaled, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
#predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss_2, model_accuracy_2 = nn_model_2.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss_2}, Accuracy: {model_accuracy_2}")

251535/251535 - 2s - loss: -3.5649e+05 - acc: 0.4983
Loss: -356490.4129010376, Accuracy: 0.4983481466770172


## Third model - two feature layers (relu + relu) and more nodes

In [25]:
# Create the keras sequential model
nn_model_3 = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model_3.add(tf.keras.layers.Dense(units=5, activation="relu", input_dim = number_input_features))

# Add the second input layer
nn_model_3.add(tf.keras.layers.Dense(units=5, activation="relu"))

# Add the ouput layer that uses a probability activation function
nn_model_3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model_3.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 6         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [26]:
# Compile the model and customize metrics
nn_model_3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [27]:
# Train the model
fit_model_3 = nn_model_3.fit(X_train_scaled, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [28]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
#predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss_3, model_accuracy_3 = nn_model_3.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss_3}, Accuracy: {model_accuracy_3}")

251535/251535 - 2s - loss: -3.7928e+09 - acc: 0.4983
Loss: -3792791266.657221, Accuracy: 0.4983481466770172


## fourth model - same as above with new training/testing data

In [29]:
# Split the preprocessed data into a training and testing dataset
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X,y,random_state = 1)

# Create a StandardScaler instances
scaler_2 = StandardScaler()

# Fit the StandardScaler
X_scaler_2 = scaler_2.fit(X_train_2)

# Scale the data
X_train_scaled_2 = X_scaler_2.transform(X_train_2)
X_test_scaled_2 = X_scaler_2.transform(X_test_2)

In [30]:
# Create the keras sequential model
nn_model_4 = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model_4.add(tf.keras.layers.Dense(units=5, activation="relu", input_dim = number_input_features))

# Add the second input layer
nn_model_4.add(tf.keras.layers.Dense(units=5, activation="relu"))

# Add the ouput layer that uses a probability activation function
nn_model_4.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Create a summary to check the structure of the sequential model
nn_model_4.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 5)                 55        
_________________________________________________________________
dense_10 (Dense)             (None, 5)                 30        
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 6         
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [31]:
# Compile the model and customize metrics
nn_model_4.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [32]:
# Train the model
fit_model_4 = nn_model_4.fit(X_train_scaled_2, y_train_2, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [35]:
4# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
#predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss_4, model_accuracy_4 = nn_model_4.evaluate(X_test_2,y_test_2,verbose=2)
print(f"Loss: {model_loss_4}, Accuracy: {model_accuracy_4}")

251535/251535 - 2s - loss: -3.8802e+09 - acc: 0.4983
Loss: -3880210284.250033, Accuracy: 0.4983481466770172


## fifth model - new training/testing data, third layer, softmax output layer

In [41]:
# Split the preprocessed data into a training and testing dataset
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(X,y,random_state = 1)

# Create a StandardScaler instances
scaler_3 = StandardScaler()

# Fit the StandardScaler
X_scaler_3 = scaler_3.fit(X_train_3)

# Scale the data
X_train_scaled_3 = X_scaler_3.transform(X_train_3)
X_test_scaled_3 = X_scaler_3.transform(X_test_3)

In [42]:
# Create the keras sequential model
nn_model_5 = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model_5.add(tf.keras.layers.Dense(units=6, activation="relu", input_dim = number_input_features))

# Add the second input layer
nn_model_5.add(tf.keras.layers.Dense(units=8, activation="relu"))

# Add the third input layer
nn_model_5.add(tf.keras.layers.Dense(units=6, activation="relu"))

# Add the ouput layer that uses a probability activation function
nn_model_5.add(tf.keras.layers.Dense(units=1, activation="softmax"))

# Create a summary to check the structure of the sequential model
nn_model_5.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 6)                 66        
_________________________________________________________________
dense_17 (Dense)             (None, 8)                 56        
_________________________________________________________________
dense_18 (Dense)             (None, 6)                 54        
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 7         
Total params: 183
Trainable params: 183
Non-trainable params: 0
_________________________________________________________________


In [43]:
# Compile the model and customize metrics
nn_model_5.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [44]:
# Train the model
fit_model_5 = nn_model_5.fit(X_train_scaled_3, y_train_3, epochs=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [45]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
#predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss_5, model_accuracy_5 = nn_model_5.evaluate(X_test_3,y_test_3,verbose=2)
print(f"Loss: {model_loss_5}, Accuracy: {model_accuracy_5}")

251535/251535 - 2s - loss: -6.5150e+00 - acc: 0.4983
Loss: -6.514957813184685, Accuracy: 0.4983481466770172


## sixth model - trying kerastuner

In [73]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model_n = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model_n.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=30,
        step=5), activation=activation, input_dim=10))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model_n.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))
    
    nn_model_n.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model_n.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model_n

In [74]:
tuner = kt.Hyperband(
    create_model,
    objective=kt.Objective("val_acc", direction="max"),
    max_epochs=20,
    hyperband_iterations=2,
    overwrite=True)

In [75]:
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 23 Complete [00h 01m 34s]
val_acc: 0.4983481466770172

Best val_acc So Far: 0.4983481466770172
Total elapsed time: 00h 26m 31s

Search: Running Trial #24

Value             |Best Value So Far |Hyperparameter
relu              |relu              |activation
21                |6                 |first_units
2                 |2                 |num_layers
26                |21                |units_0
16                |1                 |units_1
26                |None              |units_2
21                |None              |units_3
7                 |3                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
1                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Train on 754604 samples, validate on 251535 samples
Epoch 1/7
Epoch 2/7

KeyboardInterrupt: 

force stopped after roughly 25 minutes for time purposes

"best value so far" field had gone unchanged for a while beforehand

In [76]:
# Get top three model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(3)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 6, 'num_layers': 2, 'units_0': 21, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0, 'units_1': 1}
{'activation': 'tanh', 'first_units': 21, 'num_layers': 3, 'units_0': 26, 'units_1': 26, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0, 'units_2': 1}
{'activation': 'tanh', 'first_units': 6, 'num_layers': 4, 'units_0': 26, 'units_1': 6, 'units_2': 26, 'tuner/epochs': 3, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0, 'units_3': 1}


## seventh model - no standardscaler

In [14]:
number_input_features = len(X_train[0])

In [24]:
# Create the keras sequential model
nn_model_7 = tf.keras.models.Sequential()

# Add the first layer including input layer
nn_model_7.add(tf.keras.layers.Dense(units=6, activation="tanh", input_dim = number_input_features))

# Add the second input layer
nn_model_7.add(tf.keras.layers.Dense(units=8, activation="tanh"))

# Add the third input layer
nn_model_7.add(tf.keras.layers.Dense(units=6, activation="tanh"))

# Add the ouput layer that uses a probability activation function
nn_model_7.add(tf.keras.layers.Dense(units=1, activation="tanh"))

# Create a summary to check the structure of the sequential model
nn_model_7.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 6)                 66        
_________________________________________________________________
dense_12 (Dense)             (None, 8)                 56        
_________________________________________________________________
dense_13 (Dense)             (None, 6)                 54        
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 7         
Total params: 183
Trainable params: 183
Non-trainable params: 0
_________________________________________________________________


In [25]:
# Compile the model and customize metrics
nn_model_7.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [26]:
# Train the model
fit_model_7 = nn_model_7.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [27]:
# now that our deep learning model is properly trained, we can evaluate the model's performance by testing its
#predictive capabilities on our testing dataset
# Evaluate the model using the test data
model_loss_7, model_accuracy_7 = nn_model_7.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss_7}, Accuracy: {model_accuracy_7}")

251535/251535 - 2s - loss: 0.1534 - acc: 0.0384
Loss: 0.15337868017855263, Accuracy: 0.03838034346699715
