# Connect 4 Dataset

## 1: Clean Data, Save Case Base and Models

In [1]:
import pickle
import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, Activation

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix

Using TensorFlow backend.


In [2]:
df = pd.read_table("connect-4.data", sep=",")

In [3]:
# Save target y and start prepping X matrix
y = df["win"]
del df["win"]

y = y.map({'win': 2, 'loss': 1, 'draw': 0})
df = pd.get_dummies(df)

In [4]:
# Scale the Training Data Matrix for neural network
scaler = MinMaxScaler(feature_range=(-1, 1), copy=True)
scaler.fit(df.values)
X = scaler.transform(df.values)
y = y.values



## Make Case Base: 

In [5]:
# Save modified df (to save column names etc.)
df.to_csv("processed_df.csv", index=False)

In [6]:
# Make case base
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [7]:
# One hot encode for network
from keras.utils import to_categorical
oh_y_train = to_categorical(y_train)
oh_y_test = to_categorical(y_test)

## Train a Keras MLP

In [25]:
# Create model => Just a one layer NN
model = Sequential()
model.add(Dense(X_train.shape[1], input_dim=X_train.shape[1]))
model.add(Activation("relu"))
model.add(Dense(3))
model.add(Activation("softmax"))

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model
model.fit(X_train, oh_y_train, epochs=10, batch_size=8)

# evaluate the model
scores = model.evaluate(X_train, oh_y_train)
print("Training Set:", "\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

scores = model.evaluate(X_test, oh_y_test)
print("Test Set:", "\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training Set: 
acc: 85.11%
Test Set: 
acc: 83.69%


In [26]:
# Use brute for maximum reliability in experiments
knn_clf = KNeighborsClassifier(n_neighbors=1, algorithm="brute") 
knn_clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

In [27]:
knn_predictions_test = knn_clf.predict(X_test)
print("k-NN Accuracy Test:", accuracy_score(y_test, knn_predictions_test))

k-NN Accuracy Test: 0.6626702190645353


In [28]:
confusion_matrix(y_test, knn_predictions_test, labels=None, sample_weight=None)

array([[ 164,  165,  300],
       [ 218,  787,  621],
       [ 332,  643, 3526]])

In [29]:
confusion_matrix(y_test, model.predict_classes(X_test), labels=None, sample_weight=None)

array([[ 112,  219,  298],
       [  45, 1309,  272],
       [  56,  212, 4233]])

In [30]:
pickle.dump(knn_clf, open('k-nn_model.sav', 'wb'))

In [31]:
model.save("NN.h5")

In [32]:
np.save("X_train", X_train)
np.save("X_test", X_test)
np.save("y_train", y_train)
np.save("y_test", y_test)

np.save("oh_y_train", oh_y_train)
np.save("oh_y_test", oh_y_test)