In [None]:
# tic-tac-toe game
class TicTacToe():
  def __init__(self, size = 3):
    self.size = size
    self.board = [0 for _ in range(size*size)]
    self.palyer_1_label = 'X ' 
    self.palyer_2_label = 'O '
    self.empty_label = '— '
    self.label_map = {0: self.empty_label, 1: self.palyer_1_label, 2: self.palyer_2_label}

  def set_move(self, i, j, player):
    if self.board[i * self.size + j] == 0:
      self.board[i * self.size + j] = player
      return True
    else:
      return False

  def is_winner(self, board, player):
    for i in range(self.size):
      if all([board[i*self.size+j] == player for j in range(self.size)]):
        return True
      if all([board[j*self.size+i] == player for j in range(self.size)]):
        return True
    if all([board[i*self.size+i] == player for i in range(self.size)]):
      return True
    if all([board[i*self.size+self.size-1-i] == player for i in range(self.size)]):
      return True
    return False
    
  def is_terminal(self):
    if all([x != 0 for x in self.board]):
      return True
    return False
     
  def copy(self):
    new_game = TicTacToe(self.size)
    new_game.board = self.board.copy()
    return new_game
  
def print_board(game):
  for i in range(game.size):
    for j in range(game.size):
      print(game.label_map[game.board[i*game.size+j]], end = '')
    print()

def game_loop(computer_policy):
  game = TicTacToe()
  print("start, input 'i j' to select the place:", flush=True)
  print_board(game)

  while True:
    # get input from the user
    i, j = map(int, input("Enter your move (row and column): ").split())
    while game.board[i* game.size + j] != 0:
        print("Invalid move. Please try again.")
        i, j = map(int, input("Enter your move (row and column): ").split())

    game.set_move(i, j, 1)
    print("player 1 move:")
    print_board(game)

    if game.is_winner(game.board, 1):
      print("player 1 win")
      break
    if game.is_terminal():
      print("draw")
      break
    
    # computer move
    best_move = computer_policy(game)
    game.set_move(best_move//game.size, best_move%game.size, 2)
    print("computer move:")
    print_board(game)

    if game.is_winner(game.board, 2):
      print("computer win")
      break
    if game.is_terminal():
      print("draw")
      break

# test code 
game = TicTacToe()
print("start:")
print_board(game)
print("player 1 move:") 
game.set_move(0,0,  1)
print_board(game)

In [None]:
# DFS to count the win score of opponent given a game state
# game: the game state
# palyer: next move player (opponent)
def dfs(game, palyer, depth = 1, show = False):
  score = 0
  tgame = game.copy()
  for i in range(tgame.size*tgame.size):
    if tgame.board[i] == 0: 
      # get all posible moves, and test if it is a win, lose or draw (oppoent's move)
      tigame = tgame.copy()
      tigame.set_move(i//tigame.size, i%tigame.size , palyer)
      if tigame.is_winner(tigame.board, palyer): # opponent win
        score += 1/(depth)
        continue
      if tigame.is_winner(tigame.board, 2 if palyer == 1 else 1): # lose
        continue
      if tigame.is_terminal(): # draw
        continue
      # if not, then we need to do the next move (player's move)
      # loop through all the possible moves of the opponent
      for j in range(tigame.size*tigame.size):
        if tigame.board[j] == 0:
          tijgame = tigame.copy()
          tijgame.set_move(j//tigame.size, j%tigame.size, 2 if palyer == 1 else 1)
          if tijgame.is_winner(tijgame.board, 2 if palyer == 1 else 1):
            continue
          if tijgame.is_terminal():
            continue
          ts = dfs(tijgame, palyer, depth+2, show)
          score += ts
  return score

def policy_dfs(game):
    import math
    best_move = -1
    palyer_win_score = math.inf
    for i in range(game.size*game.size):
      if game.board[i] == 0:
        tgame = game.copy()
        tgame.set_move(i//tgame.size, i%tgame.size, 2)
        tscore = dfs(tgame, 1)
        if tscore < palyer_win_score:
          palyer_win_score = tscore
          best_move = i
    return best_move

game_loop(policy_dfs)

In [None]:
# minmax algorithm