### Playtime

The minimax agent plays a random agent 100 times on a $5 \times 5$ board, and that agent wins __4 out of every 5 games__. It makes sense that the 20 games that minimax fails to win are evenly split between losses and draws, because the random agent can accidentally gain an advantage when both players are making their first 7 random moves. (Scroll down to see the output under the code.)

In [1]:
"""sources"""
# winning_move() and its kernels -> https://stackoverflow.com/a/63991845
# in get_actions_ordered(): removing array elements from another array -> https://stackoverflow.com/a/66695755
# show_board() is modified code that M. Hahsler found here -> https://stackoverflow.com/questions/43971138/python-plotting-colored-grid-based-on-values

"""imports"""
import sys
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from scipy.signal import convolve2d

"""rng seeding"""
np.random.seed(0)

"""globals"""
DEBUG = 0 # 0 = off ... 1 = useful output ... 2 = OTHER useful output
HEIGHT = 5
WIDTH = 5
PRIORITY = None
TURNS = 0

"""kernels for winning_move()"""
horizontal_kernel = np.array([[ 1, 1, 1, 1]])
vertical_kernel = np.transpose(horizontal_kernel)
diag1_kernel = np.eye(4, dtype=np.uint8)
diag2_kernel = np.fliplr(diag1_kernel)
detection_kernels = [horizontal_kernel, vertical_kernel, diag1_kernel, diag2_kernel]

def show_board(board, fontsize = 10):
    """display a board (numpy array)"""
    global HEIGHT, WIDTH

    board = np.copy(board) # make a deep copy first so the original maze is not changed
    board = board + np.ones((HEIGHT, WIDTH), np.int8)

    fig, ax = plt.subplots()
    cmap = colors.ListedColormap(['yellow', 'white', 'red'])
    ax.imshow(board, cmap = cmap, norm = colors.BoundaryNorm(list(range(cmap.N + 1)), cmap.N))
    plt.show()

def empty_board(shape=(HEIGHT, WIDTH)):
    return np.zeros(shape, np.int8)

def winning_move(board, player):
    if(player>0):
        for kernel in detection_kernels:
            if (convolve2d(board>0,kernel,mode="valid")==4).any():
                return 1
    elif(player<0):
        for kernel in detection_kernels:
            if (convolve2d(board<0,kernel,mode="valid")==4).any():
                return 1
    return 0

def check_win(board):
    """check the board and return one of 1, -1, d (draw), or n (for next move)"""

    # check for win
    if(winning_move(board,1)): return 1
    if(winning_move(board,-1)): return -1

    # check for draw
    if(np.sum(board==0)<1): return 'd'

    return 'n'

def get_actions(board):
    """Returns non-full columns as a vector of indices"""
    global WIDTH
    top_row = board[0][0:WIDTH]
    actions = np.where(top_row == 0)[0]
    return actions

def other(player):
    if player > 0: return -1
    else: return 1

def result(state, player, action):
    """Add move to the board"""
    global HEIGHT
    state = state.copy()
    for i in range(HEIGHT):
        if(state[HEIGHT-i-1][action] == 0):
            state[HEIGHT-i-1][action] = player
            return state
    sys.exit('error -> column full')

def utility(state, player = 1):
    """check if a state is terminal and return the utility if it is. None means not a terminal mode."""
    goal = check_win(state)
    if goal == player: return +1
    if goal == 'd': return 0
    if goal == other(player): return -1  # loss is failure
    return None # continue

def is_terminal(state):
    """check is a state is a terminal state"""
    return check_win(state) != 'n'

def random_player(board, player = None):
    """Simple player that chooses a random unfilled column.
       The function argument named 'player' is unused."""
    action = np.random.choice(get_actions(board), size=None, replace=False)
    return action

def switch_player(player, r, y):
    if player > 0:
        return -1, y
    else:
        return 1, r

def play(r, y, priority, N = 1, board_size=(6,7)):
    global HEIGHT, WIDTH, PRIORITY, TURNS
    HEIGHT, WIDTH, PRIORITY = board_size[0], board_size[1], priority

    results = {'r': 0, 'y': 0, 'd': 0}
    for i in range(N):
        TURNS = 0

        board = empty_board()
        player, fun = 1, r

        while True:
            a = fun(board, player)
            board = result(board, player, a)

            win = check_win(board)
            if win != 'n':
                if   win==  1: win='r'
                elif win== -1: win='y'
                results[win] += 1
                break

            player, fun = switch_player(player, r, y)
    return results

"""globals"""
DEBUG_ = 0 # 1 ... count nodes, 2 ... debug each node
COUNT = 0

def get_actions_ordered(board):
    """return possible actions as a vector of priority-ordered indices"""
    global PRIORITY
    priority = PRIORITY

    all_actions = np.arange(WIDTH)
    valid_actions = get_actions(board)

    # get invalid actions
    indices = np.argwhere(np.isin(all_actions,valid_actions))
    invalid_actions = np.delete(all_actions,indices)

    # remove invalid actions from priority list
    indices = np.argwhere(np.isin(priority,invalid_actions))
    priority = np.delete(priority,indices)

    return priority

if(DEBUG_>2):
    board = empty_board()
    print(board)
    get_actions_ordered(board)

def alpha_beta_search(board, player = 1):
    """start the search."""
    global DEBUG_,COUNT
    COUNT = 0

    value, move = max_value_ab(board, player, -math.inf, +math.inf)

    if DEBUG_ >= 1: print(f"Number of nodes searched: {COUNT}")

    return value, move

def max_value_ab(state, player, alpha, beta):
    """player's best move."""
    global DEBUG_,COUNT
    COUNT+=1

    # return utility of state IF it is a terminal state
    v = utility(state, player)
    if DEBUG_ >= 2:
        print("max: ")
        print( str(state) + str([alpha, beta, v]) )
    if v is not None: return v, None

    v, move = -math.inf, None

    # check all possible actions in the state, update alpha and return move with the largest value
    for a in get_actions_ordered(state):
        v2, a2 = min_value_ab(result(state, player, a), player, alpha, beta)
        if v2 > v:
            v, move = v2, a
            alpha = max(alpha, v)
        if v >= beta: return v, move

    return v, move

def min_value_ab(state, player, alpha, beta):
    """opponent's best response."""
    global DEBUG_,COUNT
    COUNT+=1

    # return utility of state IF it is a terminal state
    v = utility(state, player)
    if DEBUG_ >= 2:
        print("min: ")
        print( str(state) + str([alpha, beta, v]) )
    if v is not None: return v, None

    v, move = +math.inf, None

    # check all possible actions in the state, update beta and return move with the smallest value
    for a in get_actions_ordered(state):
        v2, a2 = max_value_ab(result(state, other(player), a), player, alpha, beta)
        if v2 < v:
            v, move = v2, a
            beta = min(beta, v)
        if v <= alpha: return v, move

    return v, move

DEBUG__ = 0 # Useful output.

def minimax_ab_player(board, player = 1):
    """Minimax player that makes seven moves randomly, then plays based on searching."""
    global TURNS
    TURNS+=1
    if TURNS<=7:
        if DEBUG__: print("random")
        action = np.random.choice(get_actions(board), size=None, replace=False)
    else:
        utility, action = alpha_beta_search(board, player)
        if DEBUG__: print("util: ", utility)
    if DEBUG__: print("chosen: ", action)
    return action

priority = np.array([3,4,2,1,0])
%timeit -n 1 -r 1 display(play( minimax_ab_player, random_player, priority, 100, (5,5) ))

{'r': 80, 'y': 10, 'd': 10}

29.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
