In [136]:
# minmax algorithm
# player: try to maximum the final score 
# computer: try to minimum player's final score
# step 1
#              :                             0  
# player(max)  :           /                 |                 \
#              :          0                  0                 0
# computer(min):      /   |   \           /   |   \          /   |   \
#              :     1    1    0         1    1    0        1    1    0
# player(max)  :   / | \  / |   | \   / | \ / | \ / | \  / | \ / | \ / | \
#              :  0 -1 1  0 1  -1 0  0 -1 1  0 1  -1 0  0 -1 1  0 1  -1 0

# reference: https://en.wikipedia.org/wiki/Minimax#Maximin

from tictactoe import *


import math
def minimax(game, depth, maximizing_player, memo={}):
    if is_winner(game, COMPUTER): # computer win
        return 1
    if is_winner(game, PLAYER): # player win
        return -1
    if is_terminal(game) or depth == 0:
        return 0
    
    # Check if the current game state has already been evaluated
    state = tuple(game.board)
    if state in memo:
        return memo[state]
    
    # computer's move
    if maximizing_player:
        max_eval = -math.inf
        for i in range(game.size):
            for j in range(game.size):
                if game.board[i * game.size + j] == EMPTY:
                    game.board[i * game.size + j]  = COMPUTER
                    eval = minimax(game, depth - 1, False, memo)
                    game.board[i * game.size + j]  = EMPTY
                    max_eval = max(max_eval, eval)
        memo[state] = max_eval
        return max_eval
    else:
        min_eval = math.inf
        for i in range(game.size):
            for j in range(game.size):
                if game.board[i * game.size + j] == EMPTY == EMPTY:
                    game.board[i * game.size + j] = PLAYER
                    eval = minimax(game, depth - 1, True, memo)
                    game.board[i * game.size + j] = EMPTY
                    min_eval = min(min_eval, eval)
        memo[state] = min_eval
        return min_eval
    

minmax_memo = {}
def policy_minimax(game, memo= minmax_memo):
    best_move = None
    best_eval = -math.inf
    for i in range(game.size):
        for j in range(game.size):
            if game.board[i * game.size + j] == EMPTY:
                game.board[i * game.size + j] = COMPUTER
                eval = minimax(game, -1, False, memo)
                game.board[i * game.size + j] = EMPTY
                if eval > best_eval:
                    best_eval = eval
                    best_move = i * game.size + j
    # print("best eval:", best_eval)
    return best_move



In [138]:
# play the game with minimax policy
game_loop(policy_minimax)

start, input 'i j' to select the place. from 0 to 2:
— — — 
— — — 
— — — 
player 1 move:
X — — 
— — — 
— — — 
computer move:
X — — 
— O — 
— — — 
player 1 move:
X X — 
— O — 
— — — 
computer move:
X X O 
— O — 
— — — 
player 1 move:
X X O 
— O — 
X — — 
computer move:
X X O 
O O — 
X — — 
player 1 move:
X X O 
O O — 
X — X 
computer move:
X X O 
O O O 
X — X 
computer win
