<a href="https://colab.research.google.com/github/RussAbbott/TicTacToe/blob/master/Reinforcement_learning_for_tic_tac_toe_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reinforcement learning for tic-tac-toe

In which we solve the problem of teaching a neural network to play tic-tac-toe.

Based on Daniel Sauble's [article](https://medium.com/swlh/tic-tac-toe-and-deep-neural-networks-ea600bc53f51) and Github [repo](https://github.com/djsauble/tic-tac-toe-ai/blob/master/reinforcement_learning_for_tic-tac-toe.ipynb).

In [None]:
#@title Imports
import random
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.backend import reshape
from keras.utils.np_utils import to_categorical

In [None]:
#@title Basic classes and functions

# ==============================================================================
# Two simple classes.

class Config:
    '''
    The static information about the game
    '''
    def __init__(self, board_side=3):
        self.board_side = board_side
        self.triples = collect_the_triples(board_side)
        self.x = 1
        self.o = -1
        self.blank = 0
        self.draw = 0
        self.player_symbol = {self.o: ' O ', self.draw: '   ', self.x: ' X ', }
        # For minimax
        self.depth = 5

class Obs:
    '''
    Obs is for Observation. The dynamic information about the game.
    '''
    def __init__(self, config):
        self.board = new_board(config.board_side, config.blank)
        self.history = []
        self.player_to_move = config.x


# ==============================================================================


def child_board(board, r, c, player):
    next_board = board.copy()
    next_board[r][c] = player
    return next_board


def collect_the_triples(board_side):
    '''
    Generate all 3-in-a-row cell sequences
    Each cell sequence will be returned as [(r, c), (r, c), (r, c)],
    where the (r, c)'s are the cell coordinates. (Each cell will appear in a number of sequences.)
    '''
    row_triples = [[(r, c) for c in range(board_side)] for r in range(board_side)]
    col_triples = [[(r, c)  for r in range(board_side)] for c in range(board_side)]
    pos_diag = [(rc, rc) for rc in range(board_side)]                  # [(0, 0), (1, 1), (2, 2)]]
    neg_diag = [(r, (board_side - 1) - r) for r in range(board_side)]  # [(0, 2), (1, 1), (2, 0)]]
                    
    all_triples = row_triples + col_triples + [pos_diag, neg_diag]
    return all_triples


def game_status(board, config, requested_counts={3: 0, 2: 0, 1: 0}):
    '''
    returns: game_over, counts

    The counts are dictionary items() of numbers of triples with exactly 1, 2, or 3 of a kind with the rest blanks.
    The o's counts are counted negatively to facilitate minimax.
    '''
    triples = config.triples
    counts = requested_counts.copy()
    any_triples_in_play = False
    x = config.x
    o = config.o

    for triple in triples:
        triple_with_values = [board[r][c] for (r, c) in triple]
        xs_in_triple, os_in_triple = triple_with_values.count(x), triple_with_values.count(o)

        # A triple is in play if it has no more than one player and that player has not won the triple
        
        # The following is about the |= (in-place 'or') operation.
        # |= does not short-circuit even if the LHS is already true. That is,
        #                   x |= expression
        # will evaluate expression even if x is already true. Hence:
        if not any_triples_in_play:
            any_triples_in_play = ( (xs_in_triple == 0 or os_in_triple == 0) and
                                    (xs_in_triple + os_in_triple < 3)
                                  )

        for count in requested_counts:
            if xs_in_triple == count and os_in_triple == 0: 
                counts[count] += 1

            # Count the opponents scores negatively
            if os_in_triple == count and xs_in_triple == 0: 
                counts[count] -= 1

    game_over = counts[3] != 0 or not any_triples_in_play

    return game_over, list(counts.items())


def get_winner(counts, config):
    winner = config.x if counts[3] > 0 else config.o if counts[3] < 0 else config.draw
    return winner
               
               
def moves_to_board(moves, config, verbose=False):
    '''
    Reconstruct the board from a list of moves
    '''
    board = new_board(config.board_side, config.blank)
    for player, (row, col) in moves:
        board[row, col] = player
        if verbose:
            print_board(board, config)
            print('\n')
    return board


def new_board(board_side, blank):
    board = np.full( (board_side, board_side), blank, dtype='int')
    return board


def new_game():

    config  = Config()
    obs = Obs(config)
    return obs, config


def other_player(player):
    return player * (-1)


def print_board(board, config, title=''):
    '''
    Print the current board
    '''
    player_symbol = config.player_symbol
    rows = 'ABC'
    print(f'\n{title}')
    print('    1   2   3')
    for r in range(len(board)):
        print(f' {rows[r]} ', end='')
        for c in range(len(board[r])):
            mark = player_symbol[board[r][c]]
            last_in_row = c == len(board[r]) - 1
            print(f'{mark}{"" if last_in_row else "|"}', end='\n' if last_in_row else '')
        if (r < len(board) - 1):
            print("   -----------")

    game_over, counts = game_status(board, config)
    winner = get_winner(dict(counts), config)
    if game_over:
        mark = player_symbol[winner]
        caption = '    ' + mark + " wins" if winner in [config.x, config.o] else "      Draw" 
        dots = "   ..........."
        print(dots)
        print(caption)
        print(dots)

def unzip(zipped):
    [a, b] = zip(*zipped)
    return list(a), list(b)
    

def valid_moves(board):
    '''
    Get a list of valid moves
    '''
    board_side = len(board)
    moves = [(r, c) for r in range(board_side) for c in range(board_side) if board[r][c] == 0]
    return moves


In [None]:
# @title Test the basic methods

_obs, config = new_game()
config = Config()

b = 0
x = 1
o = -1

history = [(x, (0, 0)), (x, (1, 1)), (x, (2, 2)), # X wins
           (o, (2, 2)), (o, (1, 2)), (o, (0, 2)), # O wins
           (x, (1, 2)), (o, (1, 0)), (x, (2, 1)), (o, (0, 1)), # Draw
           ]

# Make it an assignment statement so that no value is displayed at the end.
_ = moves_to_board(history, config, verbose=True)


In [None]:
# @title A game simulator and move strategies

def explain_move_input():
    print(f'Indicate a move as follows.\n')
    print('   1     2    3')
    # print()
    print('A  A1 |  A2 | A3')
    print('  ---------------')
    print('B  B2 |  B2 | B3')
    print('  ---------------')
    print('C  C3 |  C2 | C3')
    print('   Lower case a, b, c are also ok.\n')


def get_input(board, mark, blank):
    c_to_n = {'A': 0, 'B': 1, 'C': 2, 'a': 0, 'b': 1, 'c': 2, '1': 0, '2': 1, '3': 2}
    valid_input = False
    while not valid_input:
        strng = input(f'\n{mark[1]}\'s move > ')
        digits = [c_to_n[c] for c in strng if c in c_to_n]

        if len(digits) != 2:
            print('Invalid input')
            continue

        [r, c] = digits 
        cell = board[r][c]
        if cell == blank:
            return r, c
        else:
            print(f'Cell {strng} already contains {config.player_symbol[cell]}\n')
            

def human_player(board, player, config):
    print()
    print_board(board, config)
    r, c = get_input(board, config.player_symbol[player], config.blank)
    return r, c


def minimax_moves_and_score(depth, board, player, config, verbose=False):
    v_moves = valid_moves(board)
    move_scores = [((r, c), minimax_score(depth-1, child_board(board, r, c, player), other_player(player), config))
                   for r, c in v_moves]
    is_maximizer = player == config.x
    min_or_max = max if is_maximizer else min
    best_move_score = min_or_max(move_scores, key=lambda m_s: m_s[1])
    best_score = best_move_score[1] 
    best_moves = [move for move, score in move_scores if score == best_score]

    # print(f'<== {depth=} {config.depth=}')
    if verbose and depth == config.depth:
        # print(f"\n\nplayer = {config.player_symbol[player][1]}.")
        print_board(board, config, title=f"\n\nplayer = {config.player_symbol[player][1]}.")
        print()
        for move, score in move_scores:
            print(f"{move} -> {score} {' (ok)' if score == best_score else ''}")
        print(f"{best_moves = }")

    return best_moves, best_score


def minimax_player(board, player, config, verbose=False):
    if np.count_nonzero(board==0) == 9:
         return random.choice([(0, 0), (0, 2), (2, 0), (2, 2)])       
    moves, _score = minimax_moves_and_score(config.depth, board, player, config, verbose=verbose)
    move = random.choice(moves)
    return move


def minimax_score(depth, board, player, config):
    game_over, score = game_status(board, config)
    if depth == 0 or game_over:
       return score
    
    _moves, score = minimax_moves_and_score(depth, board, player, config)
    return score


def model_player(board, player, model, config, verbose=False, rnd=0):
    '''
    Use the model to get the best next move for the given player at the given board position
    '''
    scores = []
    v_moves = valid_moves(board)
    # Make predictions for each possible move
    if verbose:
        print('\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
    for row, col in v_moves:
        # row, col = v_moves[i]
        next_board = np.array(board)
        next_board[row][col] = player
        next_board_1d = next_board.reshape((-1, 9))
        prediction = model.predict(next_board_1d, verbose=False)[0]

        if verbose:
            print_board(next_board, config, title=f'If {config.player_symbol[player]} moves to {(row, col)}')
            for label, prob in zip(["Draw", "X wins", "O wins"], prediction):
                print(label, '=', round(prob, 2))
                print(f'{label} = {round(prob, 2)}')

        if player == config.x:
            draw_prediction, win_prediction, loss_prediction = prediction
        elif player == config.o:
            draw_prediction, loss_prediction, win_prediction = prediction
        else:
            raise Exception(f'{player = } is neither {config.x = } nor {config.o = }.\n{player == config.x = }  {player == config.o = }')

        if win_prediction > loss_prediction:
            scores.append(win_prediction - loss_prediction)
        else:
            scores.append(draw_prediction - loss_prediction)

    # Choose the best move with a random factor
    best_moves_indices = np.flip(np.argsort(scores))
    move = None
    for i in range(len(best_moves_indices)):
        if random.random() * rnd < 0.5:
            move = v_moves[best_moves_indices[i]]

    if move == None:
        # Choose a move at random
        move = random.choice(v_moves)

    if verbose:
        print(f'\n==> {config.player_symbol[player]} moves to {move}.')
        print('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')

    return move


def random_player(board, _player, _config):
    '''
    Return a random (valid) move
    '''
    v_moves = valid_moves(board)
    move = random.choice(v_moves)
    return move


def select_a_move(board, player, move_strategy, config, verbose=False, rnd=0):
    '''
    player is either 1 for X or -1 for O
    '''
    if isinstance(move_strategy, Sequential):
        return model_player(board, player, move_strategy, config, verbose=verbose, rnd=rnd)

    if move_strategy in [minimax_player, win_block_player]:
        return minimax_player(board, player, config, verbose=verbose)

    if move_strategy == human_player:
        return human_player(board, player, config)

    return random_player(board, player, config)


def simulate_game(x_strategy=random_player, o_strategy=random_player, verbose=False, rnd=0):
    '''
    Simulate a game
    '''
    
    obs, config = new_game()

    move_history = []
    board_history = [obs.board.copy()]

    x = config.x
    o = config.o
    draw = config.draw

    if human_player in [x_strategy, o_strategy]:
        explain_move_input()

    # game_status returns (status, score), where status is True/False depending on whether the game is over. 
    # So, this says, repeat while status is not game_over.
    while not game_status(obs.board, config)[0]:

        # player_to_move will be either 1 or -1
        player_to_move = obs.player_to_move
        player_strategy = x_strategy if player_to_move == x else o_strategy   
        row, col = select_a_move(obs.board, player_to_move, player_strategy, config, verbose=verbose, rnd=rnd)
        # Make the move
        obs.board[row][col] = player_to_move

        # Add the move to history
        move_history.append((player_to_move, (row, col)))
        board_history.append(obs.board.copy())
        
        # Switch the active player
        obs.player_to_move = other_player(player_to_move)


    if human_player in [x_strategy, o_strategy]:
        print_board(obs.board, config, title='\n    Game over\n   ...........', )
        
    return move_history, board_history
    

def win_block_player(board, player, config, depth=1, verbose=False):
    '''
    The win_block_player is a minimax player limited to a search depth of 1.
    In other words, it looks ahead one move: what will its current move yield?
    
    This lets it determine whether it can win on its current move. 
    
    It also allows it to determine whether it is leaving the other player 
    2 in a row, which will let the other player win on its next move.
    '''
    config.depth = depth
    return minimax_player(board, player, config, verbose=verbose)


In [None]:
#@title Test the game simulator

random.seed()

x_strategy, o_strategy =  random_player, win_block_player

move_history, board_history = simulate_game(x_strategy=x_strategy, o_strategy=o_strategy, verbose=False)

if human_player not in [x_strategy, o_strategy]:
    config = Config()
    for board in board_history:
        print_board(board, config)




    1   2   3
 A    |   |   
   -----------
 B    |   |   
   -----------
 C    |   |   


    1   2   3
 A    |   |   
   -----------
 B    |   |   
   -----------
 C    | X |   


    1   2   3
 A    |   |   
   -----------
 B    | O |   
   -----------
 C    | X |   


    1   2   3
 A  X |   |   
   -----------
 B    | O |   
   -----------
 C    | X |   


    1   2   3
 A  X |   |   
   -----------
 B    | O |   
   -----------
 C  O | X |   


    1   2   3
 A  X |   |   
   -----------
 B    | O | X 
   -----------
 C  O | X |   


    1   2   3
 A  X |   | O 
   -----------
 B    | O | X 
   -----------
 C  O | X |   
   ...........
     O  wins
   ...........


In [None]:
# @title  Generate a collection of simulated games to calculate win/lose statistics for first and second players--and later to train our neural network. 
# 'X' (player 1) should have an edge due to first mover advantage.

def game_stats(x_strategy, o_strategy, nbr_games, games):
    '''
    Aggregate win/loss/draw stats for a player
    Input: a list of (move_history, board_history) pairs.
    We ignore the move_history and look only at the final board_history
    '''
    if isinstance(x_strategy, Sequential):
        x_strategy = model_player 

    if isinstance(o_strategy, Sequential):
        o_strategy = model_player 

    print(f'X ({x_strategy.__name__}) vs O ({o_strategy.__name__}). {nbr_games} games.')

    # Keep the stats from the perspective of the x player
    stats = {"x": 0, "o": 0, "draw": 0}
    for _move_history, board_history in games:
        _status, counts = game_status(board_history[-1], config)
        counts = dict(counts)
        if counts[3] > 0:
            stats["x"] += 1
        elif counts[3] < 0:
            stats["o"] += 1
        else:
            stats["draw"] += 1
    
    player_x_pct = stats["x"] / len(games) * 100
    player_o_pct = stats["o"] / len(games) * 100
    draw_pct = stats["draw"] / len(games) * 100

    print("Player\twins\tpct wins")
    print("------\t----\t--------")
    print(f"   x\t {stats['x']}\t  {round(player_x_pct)}%")
    print(f"   o\t {stats['o']}\t  {round(player_o_pct)}%")
    print(f" Draws\t {stats['draw']}\t  {round(draw_pct)}%")
    print()


x_strategy, o_strategy, nbr_games = random_player, random_player, 1000
games_rr = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(nbr_games)] 
game_stats(x_strategy, o_strategy, nbr_games, games_rr)

x_strategy, o_strategy, nbr_games = win_block_player, random_player, 100
games_wr = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(nbr_games)] 
game_stats(x_strategy, o_strategy, nbr_games, games_wr)

x_strategy, o_strategy, nbr_games = random_player, win_block_player, 100
games_rw = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(nbr_games)] 
game_stats(x_strategy, o_strategy, nbr_games, games_rw)

games = games_rr + games_wr*10 + games_rw*10

X (random_player) vs O (random_player). 1000 games.
Player	wins	pct wins
------	----	--------
   x	 582	  58%
   o	 286	  29%
 Draws	 132	  13%

X (win_block_player) vs O (random_player). 100 games.
Player	wins	pct wins
------	----	--------
   x	 100	  100%
   o	 0	  0%
 Draws	 0	  0%

X (random_player) vs O (win_block_player). 100 games.
Player	wins	pct wins
------	----	--------
   x	 0	  0%
   o	 86	  86%
 Draws	 14	  14%



In [None]:
# @title Functions to normalize and label game states.

def tuple_board(board):
    '''
    Convert a 3x3 board into a 3-tuple of 3-tuples
    '''
    list_of_tuples = [tuple(row.tolist()) for row in board]
    return tuple(list_of_tuples)


def normalize_board_state(board, board_state_mapping, board_states_seen):
    '''
    Input: a 3x3 board state
    Output: the distinguished board state that represents the input board state
    First check to see if the input board state already has an associated
    distinguished board state. If so, return that distinguished board state.
    If not, generate all 8 equivalent board states. Select one of the, arbitrarily
    the smallest under < . Point all 8 board states to the distiguished board state.
    Do all this with tuples because dictionaries require immutable keys.
    '''
    t_board = tuple_board(board)
    # Include the tuple version of the board to board_states_seen
    board_states_seen.add(t_board)
    n_board = board_state_mapping.get(t_board)
    if n_board is not None:
        return n_board

    t_boards = []
    for _ in range(4):
        t_boards.append(t_board)
        t_boards.append(tuple_board(np.transpose(t_board)))
        t_board = tuple_board(np.rot90(t_board))
    min_board = min(t_boards)

    for t_board in t_boards:
        if t_board not in board_state_mapping:
            board_state_mapping[t_board] = min_board

    return min_board


def board_history_to_labelled_board_states(board_history, board_state_mapping, board_states_seen):
    '''
    Input: a list of board positions for a game.
    Output: a list of pairs, where each board position is paired with (labeled by) the winner of the game.
    In the code above, a win by o is indicated by -1; a draw is indicated by 0; a win by x is indicated by 1.
    However, we will use a neural net that maps board states to categories. Categories must be
    non-negative integers. So we map (-1, 0, 1) -> (2, 0, 1) for category designation. 
    '''
    # Get the winner of the board_history by looking at the final board in board_history.
    _status, counts_as_tuples = game_status(board_history[-1], config)
    winner = get_winner(dict(counts_as_tuples), config)
    
    # winner will be: -1: o won; 0: draw; 1: x won
    # convert the winner to 2: o won; 0: draw; 1: x won
    # In categorical learning, the categories must all be non-negative integers
    winner_to_category = {0: 0, 1: 1, -1: 2}
    winner_category = winner_to_category[winner]
    normalized_board_states = [normalize_board_state(board, board_state_mapping, board_states_seen) for board in board_history]

    # Make a list of the winner category as long as the board_history
    list_of_winner_category = [winner_category] * len(board_history)
    # Zip the two lists together and return the result.
    return zip(normalized_board_states, list_of_winner_category)


def labeled_and_shuffled_game_histories(games):
    '''
    Input: a list of (move_history, board_history) pairs, one for each game.

    Produce a collection of board states labelled by who eventually won that game.
    Return X y where X and y are lists of board states and the correspoding labels/categories.
    The variable names X and y are the names used traditionally. X is upper case.

    1. Ignore move_history. Work only with board_history
    2. Use the last element in board_history to determine the game winner.
    3. Pair each board position in board_history with that winner as a category/label.
    4. Append the list of board positions to X and a list of length len(board_history) of winner to y   
    5. Shuffle X and y simultaneously so that the labelled board posistions are spread around. 
    '''

    # Because of symmetry, for every board state there are 7 equivalent board states. 
    # Select one of the 8 to represent them all.
    # Store the mapping from board_state to distinguished board_state in the following cache (dictionary).
    # Also use the dictionary to store the count of distinct board states.
    board_state_mapping = {}
    board_states_seen = set()

    # Will be a list of (board_position, label) pair.
    shuffled_Xs_ys = []

    for _move_history, board_history in games:
        shuffled_Xs_ys_for_a_game = board_history_to_labelled_board_states(board_history, board_state_mapping, board_states_seen)
        shuffled_Xs_ys += shuffled_Xs_ys_for_a_game

    # Shuffle the (board_position, winner) pairs so that not all the winners/losers occur first.
    # random.shuffle shuffles a list in place
    random.shuffle(shuffled_Xs_ys)

    print(f'A simple upper bound for the size of the tic-tac-toe state space is {3**9 = }. ')
    print('(Three states for each cell and nine cells.) ')
    print('This count includes many illegal positions, such as a position with five Xs and no Os')
    print('or a position in which both players have a row of three.\n')
    print('A more careful count, removing these illegal positions, gives 5,478.')
    print('When rotations and reflections are considered, there are 765 different positions.')
    print('Wikipedia: https://en.wikipedia.org/w/index.php?title=Game_complexity&oldid=950763371#Example:_tic-tac-toe_(noughts_and_crosses)\n')

    print(f'Summary. Valid board states: 5478.  After considering symmetries: 765.\n')
    print(f'Board states seen: {len(board_states_seen)}. Number of distinguished boards: {len(set(board_state_mapping.values()))} ')

    return shuffled_Xs_ys



In [None]:
# @title Label the board postions in the games abd shuffkle the labelled pairs.

shuffled_Xs_ys = labeled_and_shuffled_game_histories(games)

X, y = unzip(shuffled_Xs_ys)

X = np.array(X).reshape((-1, 9))
y = to_categorical(y, num_classes=3)

# Split out the train and test data
trainNum = int(len(X) * 0.8)
X_train, X_test, y_train, y_test = X[:trainNum], X[trainNum:], y[:trainNum], y[trainNum:]


A simple upper bound for the size of the tic-tac-toe state space is 3**9 = 19683. 
(Three states for each cell and nine cells.) 
This count includes many illegal positions, such as a position with five Xs and no Os
or a position in which both players have a row of three.

A more careful count, removing these illegal positions, gives 5,478.
When rotations and reflections are considered, there are 765 different positions.
Wikipedia: https://en.wikipedia.org/w/index.php?title=Game_complexity&oldid=950763371#Example:_tic-tac-toe_(noughts_and_crosses)

Summary. Valid board states: 5478.  After considering symmetries: 765.

Board states seen: 3433. Number of distinguished boards: 759 


* We choose a DNN architecture since we effectively want to predict the outcome of a game based on the given board state.
* The input for each cell is the board state, which we reshape into a flat array of 9 elements, each element of which can be a 0 (empty cell), 1 (player 1 move), or 2 (player 2 move).
* The output is the result of the game (win, loss, or draw). We use a one-hot encoded array for this.

In [None]:
#@title Prepare the neural net.

def build_model(board_side):
    '''
    Create a NN model
    '''
    num_cells = board_side * board_side
    outcomes = 3 # The number of possible outcomes in a game. (draw, X-wins, O-wins)
    model = Sequential([
                        Dense(200, input_shape=(num_cells, ), activation='relu'),
                        Dropout(0.2),
                        Dense(125, activation='relu'),
                        Dense(75, activation='relu'),
                        Dropout(0.1),
                        Dense(25, activation='relu'),
                        Dense(outcomes, activation='softmax'),
                        ])
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])
    print(model.summary())    

    return model




In [None]:
#@title Train the model on the tic-tac-toe games  generated earlier.

_obs, config = new_game()
model = build_model(config.board_side)


# Train the model
n_epochs = 500
batch_size = 100

# history output is not used
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=n_epochs, batch_size=batch_size)
print('\n\tThe model is trained\n')


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 200)               2000      
                                                                 
 dropout (Dropout)           (None, 200)               0         
                                                                 
 dense_1 (Dense)             (None, 125)               25125     
                                                                 
 dense_2 (Dense)             (None, 75)                9450      
                                                                 
 dropout_1 (Dropout)         (None, 75)                0         
                                                                 
 dense_3 (Dense)             (None, 25)                1900      
                                                                 
 dense_4 (Dense)             (None, 3)                 7

In [None]:
# @title First play a random player against itself and then the model against a random player.

x_strategy = random_player
o_strategy = random_player
games_rr = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(1000)]
game_stats(x_strategy, o_strategy, 100, games_rr)

x_strategy = model
o_strategy = random_player
games_mr = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(100)]
game_stats(x_strategy, o_strategy, 100, games_mr)

x_strategy = random_player
o_strategy = model
games_rm = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(100)]
game_stats(x_strategy, o_strategy, 100, games_rm)

x_strategy = model
o_strategy = model 
games_mm = [simulate_game(x_strategy=x_strategy, o_strategy=o_strategy) for _ in range(100)]
game_stats(x_strategy, o_strategy, 100, games_mm)

X (random_player) vs O (random_player). 100 games.
Player	wins	pct wins
------	----	--------
   x	 57	  57%
   o	 29	  29%
 Draws	 14	  14%

X (model_player) vs O (random_player). 100 games.
Player	wins	pct wins
------	----	--------
   x	 51	  51%
   o	 41	  41%
 Draws	 8	  8%

X (random_player) vs O (model_player). 100 games.
Player	wins	pct wins
------	----	--------
   x	 77	  77%
   o	 14	  14%
 Draws	 9	  9%

X (model_player) vs O (model_player). 100 games.
Player	wins	pct wins
------	----	--------
   x	 100	  100%
   o	 0	  0%
 Draws	 0	  0%



* In the original the random player won 76% of games as x; the model won 98%

* In the original the random player won 48% of games as o; the model won 96%.




In [None]:
x_strategy = human_player
o_strategy = model 
_ = simulate_game(x_strategy=x_strategy, o_strategy=o_strategy, verbose=True)


Indicate a move as follows.

   1     2    3
A  A1 |  A2 | A3
  ---------------
B  B2 |  B2 | B3
  ---------------
C  C3 |  C2 | C3
   Lower case a, b, c are also ok.




    1   2   3
 A    |   |   
   -----------
 B    |   |   
   -----------
 C    |   |   

X's move > a2

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

If  O  moves to (0, 0)
    1   2   3
 A  O | X |   
   -----------
 B    |   |   
   -----------
 C    |   |   
Draw = 0.39
Draw = 0.38999998569488525
X wins = 0.32
X wins = 0.3199999928474426
O wins = 0.29
O wins = 0.28999999165534973

If  O  moves to (0, 2)
    1   2   3
 A    | X | O 
   -----------
 B    |   |   
   -----------
 C    |   |   
Draw = 1.0
Draw = 1.0
X wins = 0.0
X wins = 0.0
O wins = 0.0
O wins = 0.0

If  O  moves to (1, 0)
    1   2   3
 A    | X |   
   -----------
 B  O |   |   
   -----------
 C    |   |   
Draw = 1.0
Draw = 1.0
X wins = 0.0
X wins = 0.0
O wins = 0.0
O wins = 0.0

If  O  moves to (1, 1)
    1   2   

KeyboardInterrupt: ignored

* We see that the game tends toward a draw, which is what we would expect from a couple of human players. Still, neither player is particularly good.

* As a final measure of skill, let's see how the length of the average game has changed. We would expect this to go down with an increased imbalance in skill levels.

In [None]:
# @title Compare the lengths of the games

def avg_game_length(games, prec=1):
    lengths_as_floats = [float(len(game)) for game in games]
    avg = np.mean(lengths_as_floats)
    return round(avg, prec)

print(f"Average length of fully random game is {avg_game_length(games)} moves")
print(f"Average length of game where x uses NN is {avg_game_length(games3)} moves")
print(f"Average length of game where o uses NN is {avg_game_length(games4)} moves")
print(f"Average length of game where both use NN is {avg_game_length(games5} moves")


* As shown above, the games are a move shorter for player 1 and a bit longer for player 2.

* Now, we play a game against our model and see how well it does. We let it make the first move.

In [None]:
# @title A human against the model

x_strategy = human_player
o_strategy = model
simulate_game(x_strategy=x_strategy, o_strategy=o_strategy, rnd=0.6)
print()

Indicate a move as follows.

   1     2    3
A  A1 |  A2 | A3
  ---------------
B  B2 |  B2 | B3
  ---------------
C  C3 |  C2 | C3
   Lower case a, b, c are also ok.




    1   2   3
 A    |   |   
   -----------
 B    |   |   
   -----------
 C    |   |   

X's move > a2



    1   2   3
 A    | X | O 
   -----------
 B    |   |   
   -----------
 C    |   |   


KeyboardInterrupt: ignored

In [None]:
from numpy.core.memmap import ndarray
arr = np.full((1, ), 0.38999998569488525)
zp = zip(['a[0]'], arr)
for label, value in zp:
    print(label, '=', round(value, 2))
    print(f'{label} = {round(value, 2)}')
    # print(arr[0], np.around(arr[0], 1), round(arr[0], 1), f'{np.around(arr[0], 1)}', f'{round(arr[0], 1)}')

a[0] = 0.39
a[0] = 0.39


In [None]:
# @title A human against the model

x_strategy = model
o_strategy = human_player 
simulate_game(x_strategy=x_strategy, o_strategy=o_strategy, rnd=0.6)
print()