In [31]:
# tic-tac-toe game
class TicTacToe():
  def __init__(self, size = 3):
    self.size = size
    self.board = [0 for _ in range(size*size)]
    self.palyer_1_label = 'X ' 
    self.palyer_2_label = 'O '
    self.empty_label = '— '
    self.label_map = {0: self.empty_label, 1: self.palyer_1_label, 2: self.palyer_2_label}

  def set_move(self, i, j, player):
    if self.board[i * self.size + j] == 0:
      self.board[i * self.size + j] = player
      return True
    else:
      return False

  def is_winner(self, board, player):
    # Check for win condition with size greater than 3
    # check rows
    for i in range(self.size):
      for j in range(self.size-2):
        if all([board[i*self.size+j + k] == player for k in range(3)]):
          return True
    # check columns
    for i in range(self.size-2):
      for j in range(self.size):
        if all([board[(i+k)*self.size+j] == player for k in range(3)]):
          return True
    # check diagonals
    for i in range(self.size-2):
      for j in range(self.size-2):
        if all([board[(i+k)*self.size+j+k] == player for k in range(3)]):
          return True
    # check anti-diagonals
    for i in range(2,self.size):
      for j in range(2, self.size):
        if all([board[(i-k)*self.size+j-k] == player for k in range(3)]):
          return True
            
    return False
  

  def is_terminal(self):
    if all([x != 0 for x in self.board]):
      return True
    return False
     
  def copy(self):
    new_game = TicTacToe(self.size)
    new_game.board = self.board.copy()
    return new_game
  
def print_board(game):
  for i in range(game.size):
    for j in range(game.size):
      print(game.label_map[game.board[i*game.size+j]], end = '')
    print(flush=True)

def game_loop( computer_policy, board_size = 3):
  game = TicTacToe(board_size)
  print("start, input 'i j' to select the place. from 0 to %d:"% (board_size-1), flush=True)
  print_board(game)

  while True:
    # get input from the user
    i, j = map(int, input("Enter your move (row and column): ").split())
    while game.board[i* game.size + j] != 0:
        print("Invalid move. Please try again.")
        i, j = map(int, input("Enter your move (row and column): ").split())

    game.set_move(i, j, 1)
    print("player 1 move:", flush=True)
    print_board(game)

    if game.is_winner(game.board, 1):
      print("player 1 win")
      break
    if game.is_terminal():
      print("draw")
      break
    
    # computer move
    best_move = computer_policy(game)
    game.set_move(best_move//game.size, best_move%game.size, 2)
    print("computer move:", flush=True)
    print_board(game)
    
    if game.is_winner(game.board, 2):
      print("computer win")
      break
    if game.is_terminal():
      print("draw")
      break

# test code 
# game = TicTacToe()
# print("start:")
# print_board(game)
# print("player 1 move:") 
# game.set_move(0,0,  1)
# print_board(game)

In [32]:
# minmax algorithm
# player: try to maximum the final score 
# computer: try to minimum player's final score
# step 1
#              :                             0  
# player(max)  :           /                 |                 \
#              :          0                  0                 0
# computer(min):      /   |   \           /   |   \          /   |   \
#              :     1    1    0         1    1    0        1    1    0
# player(max)  :   / | \  / |   | \   / | \ / | \ / | \  / | \ / | \ / | \
#              :  0 -1 1  0 1  -1 0  0 -1 1  0 1  -1 0  0 -1 1  0 1  -1 0

# reference: https://en.wikipedia.org/wiki/Minimax#Maximin


import math
COMPUTER = 2
PLAYER = 1
EMPTY = 0

def minimax(game, depth, maximizing_player, memo={}):
    if game.is_winner(game.board, COMPUTER): # computer win
        return 1
    if game.is_winner(game.board, PLAYER): # player win
        return -1
    if game.is_terminal() or depth == 0:
        return 0
    
    # Check if the current game state has already been evaluated
    state = tuple(game.board)
    if state in memo:
        return memo[state]
    
    # computer's move
    if maximizing_player:
        max_eval = -math.inf
        for i in range(game.size):
            for j in range(game.size):
                if game.board[i * game.size + j] == EMPTY:
                    game.board[i * game.size + j]  = COMPUTER
                    eval = minimax(game, depth - 1, False, memo)
                    game.board[i * game.size + j]  = EMPTY
                    max_eval = max(max_eval, eval)
        memo[state] = max_eval
        return max_eval
    else:
        min_eval = math.inf
        for i in range(game.size):
            for j in range(game.size):
                if game.board[i * game.size + j] == EMPTY == EMPTY:
                    game.board[i * game.size + j] = PLAYER
                    eval = minimax(game, depth - 1, True, memo)
                    game.board[i * game.size + j] = EMPTY
                    min_eval = min(min_eval, eval)
        memo[state] = min_eval
        return min_eval
    

minmax_memo = {}
def policy_minimax(game, memo= minmax_memo):
    best_move = None
    best_eval = -math.inf
    for i in range(game.size):
        for j in range(game.size):
            if game.board[i * game.size + j] == EMPTY:
                game.board[i * game.size + j] = COMPUTER
                eval = minimax(game, -1, False, memo)
                game.board[i * game.size + j] = EMPTY
                if eval > best_eval:
                    best_eval = eval
                    best_move = i * game.size + j
    # print("best eval:", best_eval)
    return best_move



In [33]:
# play the game with minimax policy
game_loop(policy_minimax)

start, input 'i j' to select the place. from 0 to 2:
— — — 
— — — 
— — — 


player 1 move:
X — — 
— — — 
— — — 
computer move:
X — — 
— O — 
— — — 
player 1 move:
X — — 
— O — 
— — X 
computer move:
X O — 
— O — 
— — X 
player 1 move:
X O — 
— O — 
— X X 
computer move:
X O — 
— O — 
O X X 
player 1 move:
X O X 
— O — 
O X X 
computer move:
X O X 
— O O 
O X X 
player 1 move:
X O X 
X O O 
O X X 
draw


In [34]:
# MCST algorithm

# reference: https://int8.io/monte-carlo-tree-search-beginners-guide/