In [1]:
import tensorflow

In [2]:
import random

In [3]:
class TicTacToe:
    def __init__(self, board, turn):
        self.board = board
        self.turn = turn
    
    def getNextBoards(self):
        boards = []
        piece = 1 if self.turn % 2 == 0 else -1
        for x in range(3):
            for y in range(3):
                if self.board[x + 3 * y] != 0:
                    continue
                board = self.board.copy()
                board[x + 3 * y] = piece
                boards += [board]
        return boards
    
    def winner(self):
        if self.turn == 9:
            return 0
        if (self.board[0 + 3 *0]==self.board[1 + 3 *1] and \
            self.board[0 + 3 *0]==self.board[2 + 3 *2] or \
            self.board[0 + 3 *2]==self.board[1 + 3 *1] and \
            self.board[0 + 3 *2]==self.board[2 + 3 *0]) and \
            self.board[1 + 3 *1]!=0:
                return self.board[1 + 3 *1]
        for rowOrCol in range(3):
            if  self.board[rowOrCol + 3 *0]==self.board[rowOrCol + 3 *1] and \
                self.board[rowOrCol + 3 *0]==self.board[rowOrCol + 3 *2] and \
                self.board[rowOrCol + 3 *0]!=0:
                    return self.board[rowOrCol + 3 *0]
            if  self.board[0 + 3 *rowOrCol]==self.board[1 + 3 *rowOrCol] and \
                self.board[0 + 3 *rowOrCol]==self.board[2 + 3 *rowOrCol] and \
                self.board[0 + 3 *rowOrCol]!=0:
                    return self.board[0 + 3 *rowOrCol]
        return None
    
    def __str__(self):
        s = ""
        for x in range(3):
            for y in range(3):
                if self.board[x + 3*y] == 1:
                    s += 'X'
                elif self.board[x + 3*y] == -1:
                    s += 'O'
                else:
                    s += '_'
            s += '\n'
        return s

In [4]:
def argmax(l):
    index, max_val = 0, l[0]
    for i in range(1, len(l)):
        if l[i] > max_val:
            index, max_val = i, l[i]
    return index

In [5]:
def argmin(l):
    return argmax([-i for i in l])

In [6]:
def getModel():
    m = tensorflow.keras.models.Sequential()
    m.add(tensorflow.keras.layers.Dense(8, input_dim=9, activation='relu'))
    m.add(tensorflow.keras.layers.Dense(8, input_dim=8, activation='relu'))
    m.add(tensorflow.keras.layers.Dense(8, input_dim=8, activation='relu'))
    m.add(tensorflow.keras.layers.Dense(1, activation='sigmoid'))
    m.compile(loss='mse', optimizer=tensorflow.keras.optimizers.Adam(learning_rate=1e-2))
    return m

In [7]:
X = getModel()

In [8]:
O = getModel()

In [47]:
for game in range(100):
    data = {
        X: [[], []],
        O: [[], []],
    }
    for actualgames in range(20):
        board = TicTacToe([0] * 9, 0)
        while board.winner() == None:
            Xplaying = board.turn % 2 == 0
            nextBoards = board.getNextBoards()
            player = X if Xplaying else O
            opponent = O if Xplaying else X
            values = opponent.predict([b for b in nextBoards])
            for i, b in enumerate(nextBoards):
                b = TicTacToe(b, board.turn + 1)
                w = b.winner()
                if w is not None:
                    values[i] = 0.5 if w == 0 else 0 # Tie or win (opponent loses -> 0)
            bestIndex = argmin(values) # Minimize value of opponent
            bestVal = values[bestIndex]
            data[player][0] += [board.board]
            data[player][1] += [1-float(bestVal)] # your value will be 1 - opponent value
            if random.random() > 0.8:
                board = TicTacToe(nextBoards[random.randint(0, len(values)-1)], board.turn + 1)
            else:
                board = TicTacToe(nextBoards[bestIndex], board.turn + 1)
    print(*[f"{player.fit(datapoints[0], datapoints[1], verbose = False).history['loss'][0]:.3f}" for player, datapoints in data.items()])

0.025 0.028
0.017 0.017
0.011 0.014
0.013 0.008
0.023 0.017
0.028 0.025
0.024 0.026
0.019 0.027
0.019 0.020
0.013 0.023
0.017 0.019
0.021 0.023
0.015 0.018
0.015 0.022
0.011 0.013
0.011 0.008
0.015 0.024
0.021 0.016
0.011 0.005
0.010 0.012
0.013 0.011
0.012 0.020
0.014 0.021
0.012 0.024
0.009 0.010
0.019 0.010
0.010 0.013
0.009 0.013
0.014 0.017
0.015 0.017
0.005 0.004
0.009 0.014
0.005 0.012
0.012 0.011
0.007 0.022
0.014 0.019
0.010 0.017
0.010 0.020
0.007 0.011
0.017 0.012
0.006 0.018
0.010 0.010
0.010 0.010
0.011 0.014
0.009 0.009
0.009 0.013
0.009 0.014
0.009 0.005
0.006 0.011
0.007 0.004
0.004 0.009
0.007 0.010
0.011 0.012
0.007 0.015
0.012 0.016
0.005 0.009
0.002 0.005
0.009 0.020
0.004 0.005
0.005 0.009
0.008 0.009
0.016 0.009
0.004 0.010
0.007 0.018
0.012 0.011
0.014 0.009
0.008 0.016
0.008 0.017
0.009 0.008
0.002 0.009
0.014 0.008
0.008 0.011
0.011 0.007
0.019 0.032
0.006 0.006
0.007 0.008
0.008 0.029
0.019 0.031
0.010 0.009
0.016 0.009
0.009 0.007
0.008 0.008
0.019 0.031
0.01

In [48]:
class Player():
    def __init__(self, random = False, human=False):
        self.random = random
        self.human = human
    
    def move(self, board):
        if self.human:
            print(board)
            move = input()
            x, y = move.split()
            x, y = int(x), int(y)
            newboard = board.board
            newboard[x + 3 * y] = 1 if board.turn % 2 == 0 else -1
            newboard = TicTacToe(newboard, board.turn + 1)
            print(newboard)
            return newboard
        else: # else is faster in python
            nextBoards = board.getNextBoards()
            if self.random:
                return TicTacToe(nextBoards[random.randint(0, len(nextBoards)-1)], board.turn + 1)
            else:
                opponent = O if board.turn % 2 == 0 else X
                values = [opponent.predict([b])[0][0] for b in nextBoards]
                for i, b in enumerate(nextBoards):
                    w = TicTacToe(b, board.turn + 1).winner()
                    if w is not None:
                        values[i] = 0.5 if w == 0 else 0 # Tie or opponent wins
                        break
                bestIndex = argmin(values)
                return TicTacToe(nextBoards[bestIndex], board.turn + 1)

In [49]:
def play(p1, p2, verbose = False):
    players = [p1, p2]
    turn = 0
    board = TicTacToe([0] * 9, turn)
    while board.winner() == None:
        player = players[board.turn % 2]
        board = player.move(board)
    w = board.winner()
    if verbose:
        if w == 1:
            print('X')
        elif w == -1:
            print('O')
        else:
            print('-')
    return (w + 1) / 2

In [50]:
k = 100

In [53]:
play(Player(), Player())

1.0

In [51]:
sum([play(Player(), Player(True)) for i in range(k)]) / k

0.935

In [52]:
1 - sum([play(Player(True), Player()) for i in range(k)]) / k

0.69