# **Step 0**. Set default values for global variables

In [1]:
board = ["-", "-", "-",
         "-", "-", "-",
         "-", "-", "-"]

VALID_INPUTS = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
game_still_going = True
winner = None
current_player = "X"

# **Step 1**. Display the Game Board

In [2]:
def display_board():
  print("\n")
  print(f"{board[0]} | {board[1]} | {board[2]}     1 | 2 | 3 ")
  print(f"{board[3]} | {board[4]} | {board[5]}     4 | 5 | 6 ")
  print(f"{board[6]} | {board[7]} | {board[8]}     7 | 8 | 9 ")
  print("\n")

In [3]:
display_board()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 




# **Step 2**. Allow user to interact with the board

In [4]:
def process_a_turn(player):
  print(f"{player}'s turn.")
  position = input("Choose a position from 1-9: ")

  real_position = int(position) - 1   # In Python, index starts at 0, not 1
  board[real_position] = player       # Replace "-" with the player's piece
  display_board()                     # Show the updated game board

In [5]:
def check_if_game_over():
  global game_still_going
  # If all - has been replaced with X or O, the game is over
  if "-" not in board:
    game_still_going = False
    print("The game is over.")

In [6]:
def play_game():
  display_board()

  while game_still_going:
    process_a_turn(current_player)
    check_if_game_over()

In [7]:
play_game()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 1


X | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 2


X | X | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 3


X | X | X     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 4


X | X | X     1 | 2 | 3 
X | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 5


X | X | X     1 | 2 | 3 
X | X | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 6


X | X | X     1 | 2 | 3 
X | X | X     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 7


X | X | X     1 | 2 | 3 
X | X | X     4 | 5 | 6 
X | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 8


X | X | X     1 | 2 | 3 
X | X | X     4 | 5 |

# **Step 3**. Enable 2 players to both interact with the board

In [8]:
def switch_player():
  global current_player
  current_player = "O" if current_player == "X" else "X"

In [9]:
def reset_board():
    global board, game_still_going
    board = ["-", "-", "-",
            "-", "-", "-",
            "-", "-", "-"]
    game_still_going = True

In [10]:
def play_game():
  reset_board()
  display_board()

  while game_still_going:
    process_a_turn(current_player)
    check_if_game_over()
    switch_player()

In [11]:
play_game()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 1


X | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 2


X | O | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 3


X | O | X     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 4


X | O | X     1 | 2 | 3 
O | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 5


X | O | X     1 | 2 | 3 
O | X | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 6


X | O | X     1 | 2 | 3 
O | X | O     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 7


X | O | X     1 | 2 | 3 
O | X | O     4 | 5 | 6 
X | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 8


X | O | X     1 | 2 | 3 
O | X | O     4 | 5 |

# **Step 4**. Check for win/tie and declare the winner

In [12]:
# Check the rows for a win
def row_winner():
  if board[0] == board[1] == board[2] != "-": return board[0]
  if board[3] == board[4] == board[5] != "-": return board[3] 
  if board[6] == board[7] == board[8] != "-": return board[6]
  return None

In [13]:
# Check the columns for a win
def column_winner():
  if board[0] == board[3] == board[6] != "-": return board[0] 
  if board[1] == board[4] == board[7] != "-": return board[1] 
  if board[2] == board[5] == board[8] != "-": return board[2]
  return None

In [14]:
# Check the diagonals for a win
def diagonal_winner():
  if board[0] == board[4] == board[8] != "-": return board[0]
  if board[2] == board[4] == board[6] != "-": return board[2]
  return None

In [15]:
def find_winner():
  global winner
  winners = [row_winner(), column_winner(), diagonal_winner()]
  if "X" in winners: return "X"
  if "O" in winners: return "O"
  return None

In [16]:
# Update the check_if_game_over function
def check_if_game_over():
  global game_still_going, winner
  winner = find_winner()

  # If somebody wins, stop the game
  if winner != None:
    game_still_going = False
    print(f"{winner} won")
  # If nobody wins and all positions are used up, it's a tie
  elif "-" not in board:
    game_still_going = False
    print("Tie.")

In [17]:
play_game()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 1


O | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 2


O | X | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 3


O | X | O     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 4


O | X | O     1 | 2 | 3 
X | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 5


O | X | O     1 | 2 | 3 
X | O | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: 6


O | X | O     1 | 2 | 3 
X | O | X     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: 7


O | X | O     1 | 2 | 3 
X | O | X     4 | 5 | 6 
O | - | -     7 | 8 | 9 


O won


# **Step 5**. Handle our input errors

In [18]:
def get_valid_position(position):
  # If the input is not valid, or the position is already occupied, ask the user again for valid input
  while ( position not in VALID_INPUTS ) or ( board[int(position) - 1] != "-" ):
    position = input("Select a valid unoccupied position (1-9): ")

  # If the input is valid, return the real position
  real_position = int(position) - 1 
  return real_position

In [19]:
def process_a_turn(player):
  print("{somebody}'s turn.".format(somebody = player))
  position = input("Choose a position from 1-9: ")
  real_position = get_valid_position(position)

  board[real_position] = player     # Replace "-" with the player's piece
  display_board()                   # Show the updated game board

In [20]:
play_game()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: -1
Select a valid unoccupied position (1-9): 9876
Select a valid unoccupied position (1-9): hello
Select a valid unoccupied position (1-9): 1


X | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Choose a position from 1-9: -1
Select a valid unoccupied position (1-9): 9876
Select a valid unoccupied position (1-9): hello
Select a valid unoccupied position (1-9): 1
Select a valid unoccupied position (1-9): 2


X | O | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


X's turn.
Choose a position from 1-9: -1
Select a valid unoccupied position (1-9): 9876
Select a valid unoccupied position (1-9): hello
Select a valid unoccupied position (1-9): 1
Select a valid unoccupied position (1-9): 2
Select a valid unoccupied position (1-9): 3


X | O | X     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


O's turn.
Ch

# **Step 6**. Re-build the game in OOP (Object Oriented Programming)

In [21]:
class Human:
    def __init__(self, p1_or_p2):
        self.name = p1_or_p2

    def process_next_move(self, current_board, current_player):
        next_move = input("Choose a valid position from 1-9: ")
        valid_next_move = Board.get_valid_position(current_board, next_move)
        current_board[valid_next_move] = current_player

In [22]:
class Board:
    def __init__(self):
        self.board = ["-", "-", "-",
                      "-", "-", "-",
                      "-", "-", "-"]
        self.game_still_going = True
        self.winner = None
    
    def getBoard(self):
        return self.board
        
    def reset_board(self):
        self.board = ["-", "-", "-",
                      "-", "-", "-",
                      "-", "-", "-"]
        self.game_still_going = True
        self.winner = None

    def display_board(self):
        print("\n")
        print(f"{self.board[0]} | {self.board[1]} | {self.board[2]}     1 | 2 | 3 ")
        print(f"{self.board[3]} | {self.board[4]} | {self.board[5]}     4 | 5 | 6 ")
        print(f"{self.board[6]} | {self.board[7]} | {self.board[8]}     7 | 8 | 9 ")
        print("\n")
        
    @staticmethod
    def get_valid_position(current_board, position):
        VALID_INPUTS = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
    
        # If the input is not valid, or the position is already occupied, ask the user again for valid input
        while ( position not in VALID_INPUTS ) or ( current_board[int(position) - 1] != "-" ):
            position = input("Select a valid unoccupied position (1-9): ")

        # If the input is valid, return the real position
        real_position = int(position) - 1 
        return real_position
        
    # Check the rows for a win
    def row_winner(self):
      if self.board[0] == self.board[1] == self.board[2] != "-": return self.board[0]
      if self.board[3] == self.board[4] == self.board[5] != "-": return self.board[3] 
      if self.board[6] == self.board[7] == self.board[8] != "-": return self.board[6]
      return None
    
    # Check the columns for a win
    def column_winner(self):
      if self.board[0] == self.board[3] == self.board[6] != "-": return self.board[0] 
      if self.board[1] == self.board[4] == self.board[7] != "-": return self.board[1] 
      if self.board[2] == self.board[5] == self.board[8] != "-": return self.board[2]
      return None
    
    # Check the diagonals for a win
    def diagonal_winner(self):
      if self.board[0] == self.board[4] == self.board[8] != "-": return self.board[0]
      if self.board[2] == self.board[4] == self.board[6] != "-": return self.board[2]
      return None

    def find_winner(self):
      winners = [self.row_winner(), self.column_winner(), self.diagonal_winner()]
      if "X" in winners: return "X"
      if "O" in winners: return "O"
      return None

    def check_if_game_over(self):
        self.winner = self.find_winner()

        # If somebody wins, stop the game
        if self.winner is not None:
            self.game_still_going = False
            print(f"{self.winner} won")

        # If nobody wins and all positions are used up, it's a tie
        elif "-" not in self.board:
            self.game_still_going = False
            print("Tie.")

In [23]:
class Game:
    def __init__(self, p1, p2, board):
        self.board = board
        self.p1 = p1
        self.p2 = p2
        self.current_player = "O"
        
    def switch_player(self):
        self.current_player = "O" if self.current_player == "X" else "X"
        
    def reset_game(self):
        self.board.reset_board()
        self.current_player = "O"
    
    def play(self):
        self.reset_game()
        self.board.display_board()
        
        while self.board.game_still_going:
            self.switch_player()
            
            # X is p1
            if self.current_player == "X":
                self.p1.process_next_move(self.board.getBoard(), self.current_player)
            # O is p2
            else:
                self.p2.process_next_move(self.board.getBoard(), self.current_player)
                
            self.board.display_board()
            self.board.check_if_game_over()

In [24]:
p1 = Human('p1')
p2 = Human('p2')
board = Board()
game = Game(p1, p2, board)

game.play()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 1


X | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 5


X | - | -     1 | 2 | 3 
- | O | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 9


X | - | -     1 | 2 | 3 
- | O | -     4 | 5 | 6 
- | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 4


X | - | -     1 | 2 | 3 
O | O | -     4 | 5 | 6 
- | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 6


X | - | -     1 | 2 | 3 
O | O | X     4 | 5 | 6 
- | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 3


X | - | O     1 | 2 | 3 
O | O | X     4 | 5 | 6 
- | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 7


X | - | O     1 | 2 | 3 
O | O | X     4 | 5 | 6 
X | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 2


X | O | O     1 | 2 | 3 
O | O | X     4 | 5 | 6 
X | - | X     7 | 8 | 9 


C

# Step 7: Add a Reinforcement Learning bot

In [25]:
import pickle
from random import random, choice, uniform

In [26]:
# Add a new method to calculate the hash of the board
class Board:
    def __init__(self):
        self.board = ["-", "-", "-",
                      "-", "-", "-",
                      "-", "-", "-"]
        self.game_still_going = True
        self.winner = None
    
    def getBoard(self):
        return self.board
        
    def reset_board(self):
        self.board = ["-", "-", "-",
                      "-", "-", "-",
                      "-", "-", "-"]
        self.game_still_going = True
        self.winner = None

    def display_board(self):
        print("\n")
        print(f"{self.board[0]} | {self.board[1]} | {self.board[2]}     1 | 2 | 3 ")
        print(f"{self.board[3]} | {self.board[4]} | {self.board[5]}     4 | 5 | 6 ")
        print(f"{self.board[6]} | {self.board[7]} | {self.board[8]}     7 | 8 | 9 ")
        print("\n")
        
    @staticmethod
    def get_valid_position(current_board, position):
        VALID_INPUTS = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
    
        # If the input is not valid, or the position is already occupied, ask the user again for valid input
        while ( position not in VALID_INPUTS ) or ( current_board[int(position) - 1] != "-" ):
            position = input("Select a valid unoccupied position (1-9): ")

        # If the input is valid, return the real position
        real_position = int(position) - 1 
        return real_position
        
    # Check the rows for a win
    def row_winner(self):
      if self.board[0] == self.board[1] == self.board[2] != "-": return self.board[0]
      if self.board[3] == self.board[4] == self.board[5] != "-": return self.board[3] 
      if self.board[6] == self.board[7] == self.board[8] != "-": return self.board[6]
      return None
    
    # Check the columns for a win
    def column_winner(self):
      if self.board[0] == self.board[3] == self.board[6] != "-": return self.board[0] 
      if self.board[1] == self.board[4] == self.board[7] != "-": return self.board[1] 
      if self.board[2] == self.board[5] == self.board[8] != "-": return self.board[2]
      return None
    
    # Check the diagonals for a win
    def diagonal_winner(self):
      if self.board[0] == self.board[4] == self.board[8] != "-": return self.board[0]
      if self.board[2] == self.board[4] == self.board[6] != "-": return self.board[2]
      return None

    def find_winner(self):
      winners = [self.row_winner(), self.column_winner(), self.diagonal_winner()]
      if "X" in winners: return "X"
      if "O" in winners: return "O"
      return None

    def check_if_game_over(self):
        self.winner = self.find_winner()

        # If somebody wins, stop the game
        if self.winner is not None:
            self.game_still_going = False
            print(f"{self.winner} won")

        # If nobody wins and all positions are used up, it's a tie
        elif "-" not in self.board:
            self.game_still_going = False
            print("Tie.")
            
#----------------------------------------New methods----------------------------------------#
    @staticmethod
    def get_board_hash(board):
        return str(board)

In [27]:
class Computer:
    def __init__(self, p1_or_p2):
        self.name = p1_or_p2
        self.best_moves = []          # Save optimal moves in a list
        self.move_reward = {}         # Save the reward for each move in a dictionary
        self.learning_rate = 0.2

    def process_next_move(self, current_board, current_player):
        # Calculate all available positions
        available_positions = []
        for i in range(9):
            if current_board[i] == "-": available_positions.append(i)
                
        # For each remaining position, pick the one with the highest move_reward
        max_reward = -999
        next_move = choice(available_positions)
        
        for position in available_positions:
            next_board = current_board.copy()
            next_board[position] = current_player
            next_boardHash = Board.get_board_hash(next_board)

            if self.move_reward.get(next_boardHash) is None: current_reward = 0
            else: current_reward = self.move_reward.get(next_boardHash)

            if current_reward >= max_reward:
                max_reward = current_reward
                next_move = position
        
        # Perform the move
        current_board[next_move] = current_player
        
        # Memorize the best move
        current_move = Board.get_board_hash(current_board)
        self.best_moves.append(current_move)
        
    def train_next_move(self, current_board, current_player):
        # Calculate all available positions
        available_positions = []
        for i in range(9):
            if current_board[i] == "-": available_positions.append(i)
                
        # For each remaining position, pick the one with the highest move_reward
        max_reward = -999
        next_move = choice(available_positions)
        
        # Keep 30% random, 70% logical
        if uniform(0, 1) > 0.3:        
            for position in available_positions:
                next_board = current_board.copy()
                next_board[position] = current_player
                next_boardHash = Board.get_board_hash(next_board)

                if self.move_reward.get(next_boardHash) is None: current_reward = 0
                else: current_reward = self.move_reward.get(next_boardHash)

                if current_reward >= max_reward:
                    max_reward = current_reward
                    next_move = position
        
        # Perform the move
        current_board[next_move] = current_player
        
        # Memorize the best move
        current_move = Board.get_board_hash(current_board)
        self.best_moves.append(current_move)

    def feed_reward(self, reward):
        for move in self.best_moves:
            # If the reward for the current state doesn't exist, set it to 0
            if self.move_reward.get(move) is None:
                self.move_reward[move] = 0
            # Then, add reward to each movement
            self.move_reward[move] += self.learning_rate * (reward - self.move_reward[move])
        
    def reset_memory(self):
        self.best_moves = []

    def save_policy(self):
        policy = open(f'policy_{self.name}', 'wb')
        pickle.dump(self.move_reward, policy)
        policy.close()

    def load_policy(self, file):
        policy = open(file, 'rb')
        self.move_reward = pickle.load(policy)
        policy.close()

In [28]:
# Add new methods to train the bot and let human play against the bot
class Game:
    def __init__(self, p1, p2, board):
        self.board = board
        self.p1 = p1
        self.p2 = p2
        self.current_player = "O"
        
    def switch_player(self):
        self.current_player = "O" if self.current_player == "X" else "X"
        
    def reset_game(self):
        self.board.reset_board()
        self.current_player = "O"
    
    def play(self):
        self.reset_game()
        self.board.display_board()
        
        while self.board.game_still_going:
            self.switch_player()
            
            # X is p1
            if self.current_player == "X":
                self.p1.process_next_move(self.board.getBoard(), self.current_player)
            # O is p2
            else:
                self.p2.process_next_move(self.board.getBoard(), self.current_player)
                
            self.board.display_board()
            self.board.check_if_game_over()
#----------------------------------------New methods----------------------------------------#
    def train_computer(self, rounds=10000):
        for i in range(rounds):
            if i%100 == 0: print(f'Round {i}')
            self.reset_game()
            while self.board.game_still_going:
                self.switch_player()

                if self.current_player == "X":
                    self.p1.train_next_move(self.board.getBoard(), self.current_player)
                else:
                    self.p2.train_next_move(self.board.getBoard(), self.current_player)
                
                self.board.check_if_game_over()
            
            # If game is over, give rewards to each bot based on merit
            if self.board.game_still_going == False:
                if self.board.winner == "X":
                    self.p1.feed_reward(10)
                    self.p2.feed_reward(-10)

                elif self.board.winner == "O":
                    self.p1.feed_reward(-10)
                    self.p2.feed_reward(10)

                else:
                    self.p1.feed_reward(0)
                    self.p2.feed_reward(0)
                
                # Reset the bots' memory to prevent it from making old moves and not learning anything new
                self.p1.reset_memory()
                self.p2.reset_memory()

In [29]:
p1 = Computer('p1')
p2 = Computer('p2')
board = Board()
game = Game(p1, p2, board)

game.train_computer()

Round 0
X won
Tie.
Tie.
X won
X won
O won
X won
X won
X won
X won
X won
X won
O won
X won
X won
X won
Tie.
O won
X won
X won
X won
X won
X won
O won
O won
X won
X won
X won
X won
X won
X won
Tie.
O won
X won
X won
X won
Tie.
O won
X won
Tie.
Tie.
X won
X won
X won
X won
O won
X won
O won
X won
X won
X won
O won
X won
X won
X won
X won
Tie.
X won
X won
X won
Tie.
Tie.
O won
X won
X won
O won
X won
Tie.
X won
X won
Tie.
X won
X won
X won
X won
X won
X won
O won
O won
O won
X won
X won
X won
O won
Tie.
X won
O won
Tie.
X won
O won
X won
X won
X won
Tie.
X won
X won
Tie.
X won
O won
O won
Round 100
X won
X won
O won
X won
X won
X won
O won
X won
X won
O won
X won
X won
O won
X won
X won
Tie.
X won
X won
Tie.
O won
O won
X won
X won
X won
X won
X won
X won
X won
X won
O won
X won
X won
O won
O won
X won
X won
Tie.
Tie.
O won
O won
X won
O won
O won
O won
Tie.
Tie.
X won
Tie.
Tie.
O won
X won
X won
O won
O won
X won
X won
X won
O won
O won
X won
X won
O won
X won
O won
X won
X won
X won
X wo

X won
X won
X won
O won
Tie.
O won
O won
O won
X won
Tie.
X won
X won
Tie.
Tie.
O won
X won
X won
Tie.
O won
Tie.
X won
X won
O won
X won
X won
Tie.
X won
O won
X won
X won
X won
X won
Tie.
Tie.
Tie.
Tie.
Tie.
X won
Tie.
X won
Tie.
Tie.
Tie.
O won
Tie.
Tie.
O won
O won
O won
X won
X won
X won
X won
O won
X won
X won
Tie.
X won
O won
X won
O won
X won
Tie.
O won
Tie.
X won
X won
O won
X won
Tie.
Tie.
Tie.
Tie.
Tie.
Tie.
Tie.
O won
O won
X won
X won
X won
Tie.
X won
X won
X won
O won
Tie.
Tie.
O won
X won
Tie.
X won
Round 1500
X won
O won
X won
Tie.
Tie.
Tie.
Tie.
O won
X won
O won
Tie.
O won
X won
Tie.
O won
O won
X won
O won
X won
O won
X won
X won
X won
X won
O won
Tie.
O won
Tie.
X won
X won
X won
O won
X won
X won
O won
X won
X won
Tie.
O won
X won
X won
Tie.
Tie.
O won
Tie.
Tie.
X won
X won
O won
Tie.
Tie.
X won
O won
O won
Tie.
X won
X won
X won
X won
X won
X won
Tie.
X won
X won
Tie.
O won
X won
Tie.
X won
Tie.
O won
Tie.
X won
O won
X won
O won
O won
Tie.
O won
O won
Tie.
O won


Tie.
X won
X won
X won
X won
X won
X won
Round 2900
X won
X won
Tie.
X won
X won
X won
O won
X won
X won
X won
X won
Tie.
X won
X won
O won
X won
Tie.
X won
O won
O won
X won
X won
Tie.
Tie.
Tie.
X won
X won
Tie.
X won
Tie.
X won
Tie.
Tie.
O won
X won
X won
X won
Tie.
Tie.
X won
O won
X won
X won
X won
X won
X won
X won
X won
Tie.
X won
X won
Tie.
Tie.
X won
Tie.
X won
X won
X won
X won
O won
X won
O won
O won
X won
Tie.
Tie.
Tie.
X won
Tie.
O won
X won
Tie.
X won
Tie.
X won
Tie.
X won
X won
X won
O won
O won
O won
O won
Tie.
X won
O won
X won
O won
X won
X won
X won
Tie.
O won
Tie.
X won
Tie.
X won
Tie.
O won
Tie.
Round 3000
Tie.
X won
O won
X won
O won
O won
X won
X won
O won
X won
Tie.
O won
Tie.
Tie.
O won
O won
X won
X won
O won
X won
X won
X won
O won
X won
Tie.
X won
X won
X won
X won
X won
Tie.
O won
X won
Tie.
X won
Tie.
Tie.
X won
X won
X won
Tie.
O won
X won
X won
X won
Tie.
Tie.
X won
X won
X won
X won
X won
X won
O won
Tie.
X won
O won
O won
X won
X won
Tie.
X won
O won
X 

X won
O won
Tie.
X won
Tie.
Tie.
X won
O won
Tie.
Tie.
X won
X won
X won
Tie.
X won
X won
X won
Tie.
X won
O won
O won
O won
Tie.
Tie.
X won
O won
X won
X won
Round 4900
Tie.
O won
X won
Tie.
O won
Tie.
Tie.
Tie.
X won
Tie.
Tie.
X won
O won
Tie.
Tie.
X won
X won
X won
Tie.
O won
X won
O won
Tie.
O won
X won
O won
Tie.
O won
O won
Tie.
X won
Tie.
X won
O won
O won
X won
Tie.
Tie.
X won
X won
O won
O won
O won
O won
Tie.
X won
X won
X won
X won
X won
O won
O won
X won
Tie.
O won
X won
O won
Tie.
O won
Tie.
X won
X won
X won
Tie.
Tie.
O won
Tie.
O won
Tie.
O won
Tie.
X won
O won
X won
X won
X won
Tie.
Tie.
Tie.
Tie.
Tie.
X won
Tie.
Tie.
Tie.
O won
Tie.
X won
Tie.
X won
X won
X won
X won
O won
X won
Tie.
X won
X won
X won
Tie.
Round 5000
Tie.
X won
O won
O won
Tie.
X won
X won
Tie.
X won
Tie.
Tie.
O won
Tie.
Tie.
X won
X won
X won
Tie.
X won
X won
X won
X won
X won
X won
X won
O won
X won
X won
X won
X won
X won
Tie.
X won
X won
X won
O won
X won
O won
X won
Tie.
X won
X won
Tie.
X won
Tie

Tie.
X won
O won
Tie.
X won
X won
Tie.
X won
X won
Tie.
X won
O won
O won
X won
Tie.
X won
Tie.
X won
X won
X won
O won
Tie.
O won
Tie.
X won
Tie.
X won
X won
Tie.
O won
Tie.
X won
X won
X won
Tie.
Tie.
X won
O won
X won
X won
Tie.
O won
Tie.
Tie.
Round 6400
O won
Tie.
Tie.
X won
Tie.
X won
X won
Tie.
Tie.
X won
Tie.
X won
X won
X won
X won
X won
Tie.
X won
Tie.
X won
X won
X won
O won
X won
X won
X won
Tie.
X won
X won
X won
Tie.
Tie.
X won
X won
Tie.
X won
X won
Tie.
Tie.
X won
X won
X won
X won
X won
X won
Tie.
X won
Tie.
X won
Tie.
X won
X won
X won
X won
X won
O won
X won
X won
X won
O won
X won
X won
O won
X won
X won
O won
O won
Tie.
X won
Tie.
Tie.
X won
Tie.
X won
O won
O won
Tie.
X won
Tie.
O won
O won
X won
O won
X won
X won
X won
Tie.
O won
X won
Tie.
O won
X won
Tie.
X won
Tie.
X won
X won
X won
Tie.
X won
Round 6500
X won
X won
X won
X won
Tie.
Tie.
Tie.
O won
Tie.
Tie.
X won
X won
X won
Tie.
Tie.
X won
Tie.
Tie.
O won
X won
Tie.
Tie.
X won
Tie.
Tie.
X won
O won
Tie.
O wo

Tie.
Tie.
X won
O won
X won
O won
Tie.
X won
X won
O won
Tie.
O won
Tie.
O won
X won
X won
X won
X won
X won
X won
X won
O won
Tie.
X won
O won
Tie.
Tie.
Tie.
O won
O won
Tie.
X won
Tie.
X won
Tie.
X won
Tie.
Tie.
X won
O won
Tie.
Tie.
X won
O won
Tie.
X won
X won
Tie.
X won
X won
X won
O won
X won
X won
X won
X won
X won
X won
X won
X won
Round 7900
X won
Tie.
X won
X won
Tie.
X won
X won
Tie.
Tie.
X won
Tie.
O won
Tie.
X won
X won
O won
O won
Tie.
X won
X won
Tie.
O won
X won
X won
O won
Tie.
X won
X won
O won
X won
Tie.
O won
X won
X won
Tie.
X won
X won
X won
Tie.
X won
Tie.
Tie.
O won
X won
Tie.
O won
Tie.
Tie.
X won
O won
Tie.
Tie.
X won
Tie.
Tie.
Tie.
X won
X won
X won
Tie.
O won
Tie.
O won
Tie.
Tie.
X won
X won
X won
Tie.
X won
X won
X won
Tie.
Tie.
O won
X won
O won
Tie.
Tie.
O won
Tie.
Tie.
X won
O won
O won
O won
X won
X won
Tie.
O won
X won
O won
O won
Tie.
Tie.
X won
X won
Tie.
X won
X won
Round 8000
Tie.
Tie.
O won
O won
X won
O won
O won
O won
X won
O won
X won
O won
X w

Tie.
Tie.
X won
X won
X won
X won
Tie.
X won
X won
X won
O won
X won
X won
O won
X won
X won
O won
Tie.
O won
X won
X won
X won
X won
Tie.
X won
X won
Tie.
Tie.
Tie.
X won
X won
X won
Tie.
X won
X won
X won
X won
Tie.
X won
Tie.
X won
X won
Tie.
X won
X won
Tie.
Tie.
X won
Tie.
X won
X won
Tie.
Tie.
X won
X won
X won
X won
X won
X won
O won
X won
O won
O won
Tie.
Tie.
X won
O won
X won
X won
Tie.
X won
O won
Tie.
O won
Tie.
X won
Round 9400
O won
Tie.
X won
X won
O won
O won
X won
X won
X won
X won
Tie.
X won
Tie.
Tie.
Tie.
O won
O won
Tie.
X won
O won
X won
X won
X won
O won
Tie.
X won
Tie.
Tie.
X won
Tie.
O won
X won
X won
O won
X won
Tie.
X won
X won
Tie.
O won
Tie.
X won
X won
X won
Tie.
X won
X won
O won
Tie.
X won
X won
O won
Tie.
O won
Tie.
O won
X won
X won
X won
X won
X won
O won
X won
Tie.
O won
X won
X won
O won
Tie.
X won
X won
X won
X won
X won
X won
O won
X won
O won
O won
X won
Tie.
X won
X won
X won
O won
X won
X won
O won
X won
X won
X won
Tie.
Tie.
X won
O won
O won
X

In [30]:
p1.save_policy()
p2.save_policy()

In [31]:
p1 = Computer("p1")
p1.load_policy('policy_p1')

p2 = Human("p2")

board = Board()
game = Game(p1, p2, board)

game.play()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 




- | - | -     1 | 2 | 3 
X | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 5


- | - | -     1 | 2 | 3 
X | O | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 




X | - | -     1 | 2 | 3 
X | O | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 7


X | - | -     1 | 2 | 3 
X | O | -     4 | 5 | 6 
O | - | -     7 | 8 | 9 




X | - | X     1 | 2 | 3 
X | O | -     4 | 5 | 6 
O | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 2


X | O | X     1 | 2 | 3 
X | O | -     4 | 5 | 6 
O | - | -     7 | 8 | 9 




X | O | X     1 | 2 | 3 
X | O | -     4 | 5 | 6 
O | X | -     7 | 8 | 9 


Choose a valid position from 1-9: 9


X | O | X     1 | 2 | 3 
X | O | -     4 | 5 | 6 
O | X | O     7 | 8 | 9 




X | O | X     1 | 2 | 3 
X | O | X     4 | 5 | 6 
O | X | O     7 | 8 | 9 


Tie.


In [32]:
p1 = Human("p1")

p2 = Computer("p2")
p2.load_policy('policy_p2')

board = Board()
game = Game(p1, p2, board)

game.play()



- | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 1


X | - | -     1 | 2 | 3 
- | - | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 




X | - | -     1 | 2 | 3 
- | O | -     4 | 5 | 6 
- | - | -     7 | 8 | 9 


Choose a valid position from 1-9: 9


X | - | -     1 | 2 | 3 
- | O | -     4 | 5 | 6 
- | - | X     7 | 8 | 9 




X | - | -     1 | 2 | 3 
- | O | -     4 | 5 | 6 
O | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 3


X | - | X     1 | 2 | 3 
- | O | -     4 | 5 | 6 
O | - | X     7 | 8 | 9 




X | O | X     1 | 2 | 3 
- | O | -     4 | 5 | 6 
O | - | X     7 | 8 | 9 


Choose a valid position from 1-9: 6


X | O | X     1 | 2 | 3 
- | O | X     4 | 5 | 6 
O | - | X     7 | 8 | 9 


X won
