In [272]:
import numpy as np

In [273]:
class TicTacToe():

    def __init__(self, agent1, agent2):

        self.state = np.zeros(9)
        self.agent1 = agent1
        self.agent1.tag = 1
        self.agent2 = agent2
        self.agent2.tag = -1
        
        self.winner = None


    def print_game(self):
        cur_state = self.state.copy().tolist()
        for i, pos in enumerate(cur_state):
            if pos == 1:
                cur_state[i] = "X"
            elif pos == -1:
                cur_state[i] = "O"
            else:
                cur_state[i] = " "
        game = f"""
        \r|{cur_state[0]}|{cur_state[1]}|{cur_state[2]}|
        \r|{cur_state[3]}|{cur_state[4]}|{cur_state[5]}|
        \r|{cur_state[6]}|{cur_state[7]}|{cur_state[8]}|"""

        print(game)
    
    def check_winner(self):

        win_conditions = np.array([[0, 1, 2], [3, 4, 5], \
                                   [6, 7, 8], [0, 3, 6], \
                                   [1, 4, 7], [2, 5, 8], \
                                   [0, 4, 8], [2, 4, 6]])
        cur_conditions = self.state[win_conditions]
        wins = [[-1, -1, -1], [1, 1, 1]]

        for win in wins:
            if win in cur_conditions.tolist():
                self.winner = win[0]
                break
        else:
            if 0 not in self.state:
                self.winner = 'draw'
    
    def play(self, silent=True):   
        while True:
            self.state = self.agent1.make_move(self.state)
            self.check_winner()
            if not silent:
                self.print_game()
            if self.winner:
                break

            self.state = self.agent2.make_move(self.state)
            self.check_winner()
            if not silent:
                self.print_game()
            if self.winner:
                break
        
        if not silent:
            if self.winner == 1:
                winner = "X"
            elif self.winner == -1:
                winner = 'O'
            else:
                winner = 'Draw'
                
            print('Game is over the winner is :', winner)




In [274]:
class Agent():

    def __init__(self):
        self.tag = None

    def get_mask(self, state):
        mask = np.zeros_like(state)
        mask[state == 0] = 1
        self.mask = mask

    def make_move(self, state):
        return state

In [275]:
class RandomAgent(Agent):

    def __init__(self):
        super().__init__()

    def make_move(self, state):
        self.get_mask(state)
        valid_moves = np.nonzero(self.mask)[0]
        move = np.random.choice(valid_moves)
        state[move] = self.tag
        return super().make_move(state)

In [276]:
from keras.models import load_model

class DeepAgent(Agent):

    def __init__(self):
        super().__init__()

    def load_model(self, path):
        self.model = load_model(path)

    def softmax(x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def make_move(self, state):
        self.get_mask(state)
        preds = self.model.predict(np.expand_dims(state, axis=0), verbose=0)
        preds = (preds + np.absolute(np.min(preds))) * self.mask
        print(self.mask)
        print(preds)
        move = np.argmax(preds)
        print(move)
        state[move] = self.tag
        return super().make_move(state) 

In [277]:
a1 = DeepAgent()
a1.load_model('tic_100eps.keras')
a2 = RandomAgent()
tic = TicTacToe(a1, a2)
tic.play(silent=False)


[1. 1. 1. 1. 1. 1. 1. 1. 1.]
[[0.07545769 0.2757411  0.17320129 0.18014845 0.28599623 0.
  0.01758504 0.33059669 0.18405341]]
7

| | | | 
| | | | 
| |X| | 

| | | | 
| | |O| 
| |X| | 
[1. 1. 1. 1. 1. 0. 1. 0. 1.]
[[0.41003373 0.92858195 0.62975556 0.5727706  0.90417373 0.
  0.         0.         0.6852321 ]]
1

| |X| | 
| | |O| 
| |X| | 

| |X| | 
| | |O| 
| |X|O| 
[1. 0. 1. 1. 1. 0. 1. 0. 0.]
[[0.         0.         0.76426333 0.50817579 0.15371346 0.
  0.97246677 0.         0.        ]]
6

| |X| | 
| | |O| 
|X|X|O| 

| |X|O| 
| | |O| 
|X|X|O| 
Game is over the winner is : O
