# Imports

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.data import Dataset
import numpy as np
import matplotlib.pyplot as plt




# Preprocess Database
Source: https://www.kaggle.com/datasets/marcpaulo/connect4-2-step-lookahead-moves-100k?resource=download

In [4]:
df = pd.read_csv('./2stepLA_moves.csv')
df.head()

Unnamed: 0,rows,columns,inarow,flat_board,mark,action
0,6,7,4,#100000012000002100000220000012020102211011,2,4
1,6,7,4,#020022001002100220120211011112112221212121,1,3
2,6,7,4,#000002000200212011012101222122211212111211,2,4
3,6,7,4,#000000000000000000000000010000002002210100,1,2
4,6,7,4,#000000000000000001000000102020021202121211,1,4


In [19]:
# Reference: https://www.kaggle.com/code/marcpaulo/connect4-convnet-imitates-2-stepla-agent/notebook#Connect4:-ConvNet-imitates-2-Step-Lookahead-Agent
# First, remove the '#' initial character
flat_boards = df['flat_board'].map(lambda b: b[1:])
# Turn the values from strings into lists of integers
flat_boards = flat_boards.map(lambda b: [int(bb) for bb in b])
# Turn the lists into Numpy arrays and reshape them
rows = df['rows'].unique()[0]
columns = df['columns'].unique()[0]
numpy_2d_boards_ = flat_boards.map(lambda b: np.array(b).reshape(rows, columns))
# Stack the arrays to have the desired shape
numpy_2d_boards = np.stack(numpy_2d_boards_.values)

# New encoding: {0: empty, 1: active player, -1: opponent}
for board, mark in zip(numpy_2d_boards, df['mark']):
    if mark == 1:
        board[board == 2] = -1
    else:  # a.k.a mark == 2:
        board[board == 1] = -1
        board[board == 2] = 1

numpy_2d_boards = numpy_2d_boards.reshape((len(numpy_2d_boards), rows, columns, 1))

In [20]:
# Parameters
train_size = 90000
val_size = 5000
test_size = 5000

BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

train_data = Dataset.from_tensor_slices((numpy_2d_boards[0:train_size], df['action'].values[0:train_size]))
val_data = Dataset.from_tensor_slices((numpy_2d_boards[train_size:train_size+val_size], df['action'].values[train_size:train_size+val_size]))
test_data = Dataset.from_tensor_slices((numpy_2d_boards[train_size+val_size:train_size+val_size+test_size], df['action'].values[train_size+val_size:train_size+val_size+test_size]))

train_data = train_data.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
val_data = val_data.batch(BATCH_SIZE)
test_data = test_data.batch(BATCH_SIZE)

In [5]:
from tensorflow.keras import layers, models

input_shape = (6,7, 1)
model = models.Sequential([
    layers.Conv2D(64, (2, 2), activation='relu', input_shape=input_shape),
    layers.Conv2D(128, (2, 2), activation='relu'),
    layers.Conv2D(32, (2, 2), activation='relu'),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(7, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [7]:
from keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True, dpi=300, show_layer_activations=True, rankdir='LR')

AttributeError: module 'pydot' has no attribute 'InvocationException'

In [None]:
model.summary()

# Training

In [33]:
# Parameters
EPOCHS = 512

model.fit(train_data, validation_data=val_data, epochs = EPOCHS, use_multiprocessing=True, workers=2)

Epoch 1/512




Epoch 2/512
Epoch 3/512
Epoch 4/512
Epoch 5/512
Epoch 6/512
Epoch 7/512
Epoch 8/512
Epoch 9/512
Epoch 10/512
Epoch 11/512
Epoch 12/512
Epoch 13/512
Epoch 14/512
Epoch 15/512
Epoch 16/512
Epoch 17/512
Epoch 18/512
Epoch 19/512
Epoch 20/512
Epoch 21/512
Epoch 22/512
Epoch 23/512
Epoch 24/512
Epoch 25/512
Epoch 26/512
Epoch 27/512
Epoch 28/512
Epoch 29/512
Epoch 30/512
Epoch 31/512
Epoch 32/512
Epoch 33/512
Epoch 34/512
Epoch 35/512
Epoch 36/512
Epoch 37/512
Epoch 38/512
Epoch 39/512
Epoch 40/512
Epoch 41/512
Epoch 42/512
Epoch 43/512
Epoch 44/512
Epoch 45/512
Epoch 46/512
Epoch 47/512
Epoch 48/512
Epoch 49/512
Epoch 50/512
Epoch 51/512
Epoch 52/512
Epoch 53/512
Epoch 54/512
Epoch 55/512
Epoch 56/512
Epoch 57/512
Epoch 58/512
Epoch 59/512
Epoch 60/512
Epoch 61/512
Epoch 62/512
Epoch 63/512
Epoch 64/512
Epoch 65/512
Epoch 66/512
Epoch 67/512
Epoch 68/512
Epoch 69/512
Epoch 70/512
Epoch 71/512
Epoch 72/512
Epoch 73/512
Epoch 74/512
Epoch 75/512
Epoch 76/512
Epoch 77/512
Epoch 78/512
Epoch

<keras.src.callbacks.History at 0x1b38c6ed580>

In [39]:
model.save('connect4_model.h5')

  saving_api.save_model(


In [36]:
model.evaluate(test_data)

 1/79 [..............................] - ETA: 1s - loss: 6.1705 - accuracy: 0.7031



[4.3769402503967285, 0.7979999780654907]

# Evaluation

In [50]:
import numpy as np
import pandas as pd
from IPython.display import display

class connect_x:

    def __init__(self):
        self.board_height = 6
        self.board_width = 7
        self.board_state = np.zeros([self.board_height, self.board_width], dtype=np.int8)
        self.players = {'p1': 1, 'p2': -1}
        self.isDone = False
        self.reward = {'win': 1, 'draw': 0.5, 'lose': -1}
    
    def render(self):
        rendered_board_state = self.board_state.copy().astype(str)
        rendered_board_state[self.board_state == 0] = ' '
        rendered_board_state[self.board_state == 1] = 'O'
        rendered_board_state[self.board_state == -1] = 'X'
        display(pd.DataFrame(rendered_board_state))
    
    def reset(self):
        self.__init__()
        
    def get_available_actions(self):
        available_cols = []
        for j in range(self.board_width):
            if np.sum([self.board_state[:, j] == 0]) != 0:
                available_cols.append(j)
        return available_cols
    
    def check_game_done(self, player):
        if player == 'p1':
            check = '1 1 1 1'
        else:
            check = '-1 -1 -1 -1'
        
        # check vertically then horizontally
        for j in range(self.board_width):
            if check in str(self.board_state[:, j]):
                self.isDone = True
        for i in range(self.board_height):
            if check in str(self.board_state[i, :]):
                self.isDone = True
        
        # check left diagonal and right diagonal
        for k in range(0, self.board_height - 4 + 1):
            left_diagonal = np.array([self.board_state[k + d, d] for d in \
                            range(min(self.board_height - k, min(self.board_height, self.board_width)))])
            right_diagonal = np.array([self.board_state[d + k, self.board_width - d - 1] for d in \
                            range(min(self.board_height - k, min(self.board_height, self.board_width)))])
            if check in str(left_diagonal) or check in str(right_diagonal):
                self.isDone = True
        for k in range(1, self.board_width - 4 + 1):
            left_diagonal = np.array([self.board_state[d, d + k] for d in \
                            range(min(self.board_width - k, min(self.board_height, self.board_width)))])
            right_diagonal = np.array([self.board_state[d, self.board_width - 1 - k - d] for d in \
                            range(min(self.board_width - k, min(self.board_height, self.board_width)))])
            if check in str(left_diagonal) or check in str(right_diagonal):
                self.isDone = True
        
        if self.isDone:
            return self.reward['win']
        # check for draw
        elif np.sum([self.board_state == 0]) == 0:
            self.isDone = True
            return self.reward['draw']
        else:
            return 0.
        
    def make_move(self, a, player):
        # check if move is valid
        if a in self.get_available_actions():
            i = np.sum([self.board_state[:, a] == 0]) - 1
            self.board_state[i, a] = self.players[player]
        else:
            print('Move is invalid')
            self.render()

        reward = self.check_game_done(player)
        
        # give feedback as new state and reward
        return self.board_state.copy(), reward

env = connect_x()

In [72]:
env.reset()

In [71]:
env.make_move(3,'p2')

(array([[ 0,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  0, -1,  0,  0,  0],
        [ 0,  0,  0, -1,  0,  0,  0],
        [ 0,  0,  0, -1,  0,  0,  0],
        [ 0,  0,  0, -1,  0,  0,  0]], dtype=int8),
 1)

In [80]:
env.reset()
state = env.board_state.copy().reshape(1, 6, 7, 1)
reward = 0
while reward != 1:
    action = np.argmax(model.predict(state))
    state, reward = env.make_move(action, 'p1')
    state = state.reshape(1, 6, 7, 1)
    print(env.board_state)
    env.make_move(int(input('Enter your move: ')), 'p2')

[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0]]
[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [-1  0  0  1  1  0  0]]
[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [-1 -1  0  1  1  1  0]]
[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [-1 -1  1  1  1  1 -1]]


ValueError: invalid literal for int() with base 10: ''

In [76]:
model.predict(state)



array([[4.5384244e-17, 1.4368956e-36, 2.4640730e-26, 1.0000000e+00,
        2.0217451e-29, 6.0259550e-26, 3.1349790e-18]], dtype=float32)