In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from config import db_password
from sqlalchemy import create_engine
import psycopg2
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [2]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [3]:
file_dir = "/Users/joeyarney/Desktop/DataBootCamp/Group_1_Final_Project/Chess Datasets/games.csv"

In [4]:
# 
db_string = f"postgresql://postgres:{db_password}@localhost:5432/games"

In [5]:
# Create Engine database
engine = create_engine(db_string)

In [6]:
# Create a table in sql with the imported csv data
# games_df.to_sql(name='games_df', con=engine)

In [7]:
# Connecting to postgres server and print successful if successful
def connect(db_string):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(db_string)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
#         sys.exit(1) 
    print("Connection successful")
    return conn

In [8]:
#Connect a database and put data in a python df
def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturally we get a list of tupples
    tupples = cursor.fetchall()
    cursor.close()
    
    # We just need to turn it into a pandas dataframe
    df = pd.DataFrame(tupples, columns=column_names)
    return df

In [9]:
# Connect to the database and print the df
conn = connect(db_string)
column_names = ["index","id","rated","created_at","last_move_at","turns","victory_status","winner","increment_code","white_id","white_rating","black_id","black_rating","moves","opening_eco","opening_name","opening_ply"]
# Execute the "SELECT *" query
df = postgresql_to_dataframe(conn, "select * from games_df", column_names)
df.head()

Connecting to the PostgreSQL database...
Connection successful


Unnamed: 0,index,id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply
0,0,TZJHLljE,False,1500000000000.0,1500000000000.0,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...,D10,Slav Defense: Exchange Variation,5
1,1,l1NXvwaE,True,1500000000000.0,1500000000000.0,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...,B00,Nimzowitsch Defense: Kennedy Variation,4
2,2,mIICvQHh,True,1500000000000.0,1500000000000.0,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...,C20,King's Pawn Game: Leonardis Variation,3
3,3,MsoDV9wj,False,1500000000000.0,1500000000000.0,5,draw,draw,10+0,trelynn17,1250,franklin14532,1002,e4 c5 Nf3 Qa5 a3,B27,Sicilian Defense: Mongoose Variation,4
4,4,qwU9rasv,True,1500000000000.0,1500000000000.0,33,resign,white,10+0,capa_jr,1520,daniel_likes_chess,1423,d4 d5 e4 dxe4 Nc3 Nf6 f3 exf3 Nxf3 Nc6 Bb5 a6 ...,D00,Blackmar-Diemer Gambit: Pietrowsky Defense,10


In [12]:
chess_df=pd.read_csv("/Users/joeyarney/Desktop/DataBootCamp/Group_1_Final_Project/Chess Datasets/games.csv")

In [13]:
# Split moves column into moves df
moves_df = chess_df['moves'].str.split(' ', n=10, expand=True)

# Drop column 10 and rename columns
moves_df=moves_df.drop(10,axis=1)
moves_df.columns= ["Wm1","Bm1","Wm2","Bm2","Wm3","Bm3","Wm4","Bm4","Wm5","Bm5"]

moves_df["Outcome"] = chess_df["winner"]

# drop na
moves_df = moves_df.dropna()
moves_df

Unnamed: 0,Wm1,Bm1,Wm2,Bm2,Wm3,Bm3,Wm4,Bm4,Wm5,Bm5,Outcome
0,d4,d5,c4,c6,cxd5,e6,dxe6,fxe6,Nf3,Bb4+,white
1,d4,Nc6,e4,e5,f4,f6,dxe5,fxe5,fxe5,Nxe5,black
2,e4,e5,d3,d6,Be3,c6,Be2,b5,Nd2,a5,white
4,d4,d5,e4,dxe4,Nc3,Nf6,f3,exf3,Nxf3,Nc6,white
6,e4,e5,Bc4,Nc6,Nf3,Nd4,d3,Nxf3+,Qxf3,Nf6,black
...,...,...,...,...,...,...,...,...,...,...,...
18295,d4,f5,e3,e6,Nf3,Nf6,Nc3,b6,Be2,Bb7,white
18296,d4,d6,Bf4,e5,Bg3,Nf6,e3,exd4,exd4,d5,black
18297,d4,d5,Bf4,Nc6,e3,Nf6,c3,e6,Nf3,Be7,white
18298,e4,d6,d4,Nf6,e5,dxe5,dxe5,Qxd1+,Kxd1,Nd5,white


In [14]:
# Changing moves to numbers
for col in moves_df.columns:
    
    # Get list of unique values
    values = list(set(moves_df[col].values))
    
    # Create numerical dictionary
    values_with_indexes = {}
    for i, v in enumerate(values):
        values_with_indexes[v] = i
    
    # Replace column
    moves_df.replace({col: values_with_indexes},inplace=True)

moves_df

Unnamed: 0,Wm1,Bm1,Wm2,Bm2,Wm3,Bm3,Wm4,Bm4,Wm5,Bm5,Outcome
0,18,2,0,18,142,78,260,84,305,163,1
1,18,18,68,10,164,177,167,271,253,182,2
2,17,14,10,80,156,49,239,92,10,7,1
4,18,2,68,0,61,199,166,221,131,227,1
6,17,14,74,19,98,58,139,149,48,200,2
...,...,...,...,...,...,...,...,...,...,...,...
18295,18,4,48,31,98,199,92,77,255,269,1
18296,18,12,31,10,138,199,237,148,297,30,2
18297,18,2,31,19,157,199,182,103,305,83,1
18298,17,12,73,82,36,139,167,229,180,69,1


In [15]:
# Split our preprocessed data into our features and target arrays
y = moves_df["Outcome"].values
X = moves_df.drop("Outcome",1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 1)

  X = moves_df.drop("Outcome",1).values


In [16]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [17]:
pip install --upgrade tensorflow

Note: you may need to restart the kernel to use updated packages.


In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [19]:
# Create the keras sequential model
number_input_features = len(X_train_scaled[0])
nn_model = tf.keras.models.Sequential()

2022-05-07 13:05:33.627453: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [20]:
# Add the first layer including input layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid", input_dim = number_input_features))

In [21]:
# Add the ouput layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid", input_dim = number_input_features))

In [22]:
# Create a summary to check the structure of the sequential model
nn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 11        
                                                                 
 dense_1 (Dense)             (None, 1)                 2         
                                                                 
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________


In [23]:
# Compile the model and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [24]:
# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
