In [1]:
import numpy as np
from tictaczero.board import Board
from tictaczero.player import BasePlayer, RandomPlayer
from tictaczero.games import play_a_game

import pandas as pd

%load_ext autoreload
%autoreload 2

EMPTY = 0
CROSS = 1
CIRCLE = 2

In [2]:
from tictaczero.board import WIN_MASKS

class SmartPlayer(BasePlayer):
    """
    A Tic tac toe player which make 3 when there is the opportunity to make 3.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    
    def move(self, board):
        optimal_square = 100
        for connect_3 in WIN_MASKS:
            combination = board.state[connect_3]
            if (np.array_equal(combination, [self.side, self.side, 0]) |
                np.array_equal(combination, [self.side, 0, self.side]) |
                np.array_equal(combination, [0, self.side, self.side])):
                # There is a winning combination!
                index_0 = [i for i, x in enumerate(combination) if x == 0][0] # Get empty square loc.
                optimal_square = [i for i, x in enumerate(connect_3) if x][index_0]
        
        if optimal_square != 100:
            print(f"{self.side}: We've found an optimal square, we'll win")
            _, res, finished = board.move(optimal_square, self.side)
        else:
            print("There was no optimal square.")
            _, res, finished = board.move(board.random_empty_square(), self.side)

        return _, res, finished
        
        

In [10]:
play_a_game(SmartPlayer, SmartPlayer, print_result=True)

There was no optimal square.
There was no optimal square.
There was no optimal square.
There was no optimal square.
1: We've found an optimal square, we'll win
Game won by 1
-----
|  X|
| XO|
|X O|
-----


array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
       [0, 0, 0, 0, 0, 2, 1, 0, 0, 1],
       [0, 0, 0, 0, 1, 2, 1, 0, 0, 1],
       [0, 0, 0, 0, 1, 2, 1, 0, 2, 1],
       [0, 0, 1, 0, 1, 2, 1, 0, 2, 1]])

In [None]:
states = pd.DataFrame(columns=["00", "01", "02", "10", "11", "12", "20", "21", "22", "result"])

In [5]:
def play_x_games(Player1, Player2, x=1000):
    states = pd.DataFrame(columns=["00", "01", "02", "10", "11", "12", "20", "21", "22", "result"])
    for i in range(x):
        board_history = play_a_game(Player1, Player2)
        
        states = pd.concat([states, board_history])
        
    return states

        
        
# df = play_x_games(RandomPlayer, RandomPlayer, x=1000)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.shape

Now we need a state.

We need an agent (player)

Agent can make an action

Agent gets a reward.

Network architecture:

Input layer:
$$B + S = 9+1 = 10 \text{ Nodes}$$

Where $B$ is the board state and $S$ is the side. 

Output layer:
$$B + S = 9+1 = 10 \text{ Nodes}$$

In [None]:
from tensorflow import keras

In [None]:
class BrainPlayer(BasePlayer):
    """
    Our Agent with a neural network as a brain.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    
    def move(self, board):
        _, res, finished = board.move(board.random_empty_square(), self.side)
        return _, res, finished

In [None]:
brain = keras.models.Sequential([
    keras.layers.Dense(56, input_shape=[10], activation='relu'),
    keras.layers.Dense(56, activation='relu'),
    keras.layers.Dense(10, activation='sigmoid')
])



In [None]:
brain.predict(np.array([np.concatenate((board.state, [1]))]))

# OPTION 1

Have the neural network predict the move.

# OPTION 2 

Have the neural network predict the outcome of the game. And let a Q-learning agent decide.

# OPTION 2 - Network predicts outcome.


## The Network
In this section we're going to build a neural network which predicts the outcome of a game of tic tac toe give a certain board state. First we'll have to generate enough board states with outcomes.

In total there are three outcomes:

1 - CROSS WINS

2 - CIRCLE WINS

3 - DRAW

So the output layer consists of three nodes. The input layer consists of the board state. We don't give the turn because CROSS always starts.

In [None]:
model = keras.models.Sequential([
    keras.layers.Dense(28, input_shape = [9], activation = 'relu'),
    keras.layers.Dense(28, activation = 'relu'),
    keras.layers.Dense(4, activation = 'softmax')
])

In [None]:
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

## Generating the data.

In total there are only $3^9=19683$ different possible tic-tac-toe positions. With mirror or rotation permutation even less. Let's try to generate 10.000 games and save each result of the game and put in the following dataframe.


In [None]:
df = (df
      .assign(draw=lambda d: np.where(d['result'] == 2, 1, 0))
      .assign(circle=lambda d: np.where(d['result'] == 1, 1, 0))
      .assign(cross=lambda d: np.where(d['result'] == 0, 1, 0))
      .drop("result", axis = 1)
     )

In [57]:
df.head()

Unnamed: 0,00,01,02,10,11,12,20,21,22,draw,circle,cross
0,0,0,0,0,1,0,0,0,0,0,1,0
0,0,0,0,0,1,0,0,2,0,0,1,0
0,0,0,0,1,1,0,0,2,0,0,1,0
0,0,0,0,1,1,0,0,2,2,0,1,0
0,0,0,0,1,1,1,0,2,2,0,1,0


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
y_train.values.astype('uint32')

In [58]:
tf.convert_to_tensor(X_train.values/2.0, np.float32)

<tf.Tensor: id=1443, shape=(5712, 9), dtype=float32, numpy=
array([[0. , 0.5, 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0.5, ..., 1. , 1. , 0. ],
       [0.5, 0.5, 1. , ..., 0.5, 1. , 0.5],
       ...,
       [0.5, 0. , 1. , ..., 0. , 0. , 0.5],
       [0. , 0.5, 1. , ..., 0. , 0.5, 1. ],
       [0. , 1. , 0.5, ..., 0. , 0. , 0. ]], dtype=float32)>

In [59]:
import tensorflow as tf
xtrain_tf = tf.convert_to_tensor(X_train.values/2.0, np.float32)
ytrain_tf = tf.convert_to_tensor(y_train.values.astype('uint32'), np.int32)

In [60]:
model.fit(xtrain_tf, ytrain_tf, epochs=10)

Train on 5712 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1423a0850>

In [None]:
model.predict()

In [69]:
y_pred = model.predict_classes(tf.convert_to_tensor(X_test.values/2.0, np.float32))

In [70]:
y_pred[0]

1

In [71]:
y_pred

array([1, 1, 1, ..., 1, 1, 1])

In [73]:
from collections import Counter
counts = Counter(y_pred)

In [74]:
counts

Counter({1: 1849, 2: 50, 3: 6})

In [75]:
Counter(y_test)

Counter({3: 318, 1: 1061, 2: 526})

In [76]:
results = pd.DataFrame({"y_pred": y_pred, "y_test": y_test})

In [79]:
df.head()

Unnamed: 0,00,01,02,10,11,12,20,21,22,draw,circle,cross
0,0,0,0,0,1,0,0,0,0,0,1,0
0,0,0,0,0,1,0,0,2,0,0,1,0
0,0,0,0,1,1,0,0,2,0,0,1,0
0,0,0,0,1,1,0,0,2,2,0,1,0
0,0,0,0,1,1,1,0,2,2,0,1,0


KeyError: "None of [Int64Index([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0], dtype='int64')] are in the [columns]"