Farama-Foundation · elliottower · May 3, 2024 · Mar 9, 2024 · Mar 9, 2024 · Mar 9, 2024
diff --git a/docs/api/wrappers/pz_wrappers.md b/docs/api/wrappers/pz_wrappers.md
@@ -65,8 +65,8 @@ You can apply these wrappers to your environment in a similar manner to the belo
 To wrap an AEC environment:
 ```python
 from pettingzoo.utils import TerminateIllegalWrapper
-from pettingzoo.classic import tictactoe_v3
-env = tictactoe_v3.env()
+from pettingzoo.classic import tictactoe_v4
+env = tictactoe_v4.env()
 env = TerminateIllegalWrapper(env, illegal_reward=-1)
 
 env.reset()

diff --git a/pettingzoo/classic/all_modules.py b/pettingzoo/classic/all_modules.py
@@ -8,14 +8,14 @@
     rps_v2,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v3,
+    tictactoe_v4,
 )
 
 classic_environments = {
     "classic/chess_v6": chess_v6,
     "classic/rps_v2": rps_v2,
     "classic/connect_four_v3": connect_four_v3,
-    "classic/tictactoe_v3": tictactoe_v3,
+    "classic/tictactoe_v4": tictactoe_v4,
     "classic/leduc_holdem_v4": leduc_holdem_v4,
     "classic/texas_holdem_v4": texas_holdem_v4,
     "classic/texas_holdem_no_limit_v6": texas_holdem_no_limit_v6,

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
@@ -1,79 +1,113 @@
+class BadTicTacToeMoveException(Exception):
+    """Exception raised when a bad move is made on TicTacToe board."""
+
+    def __init__(self, message="Bad TicTacToe move"):
+        super().__init__(message)
+
+
+TTT_PLAYER1_WIN = 0
+TTT_PLAYER2_WIN = 1
+TTT_TIE = -1
+TTT_GAME_NOT_OVER = -2
+
+
 class Board:
+    """Board for a TicTacToe Game.
+
+    This tracks the position and identity of marks on the game board
+    and allows checking for a winner.
+
+    Example of usage:
+
+    import random
+    board = Board()
+
+    # random legal moves - for example purposes
+    def choose_move(board_obj: Board) -> int:
+        legal_moves = [i for i, mark in enumerate(board_obj.squares) if mark == 0]
+        return random.choice(legal_moves)
+
+    player = 0
+    while True:
+        move = choose_move(board)
+        board.play_turn(player, move)
+        status = board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            if status in [TTT_PLAYER1_WIN, TTT_PLAYER2_WIN]:
+                print(f"player {status} won")
+            else:  # status == TTT_TIE
+                print("Tie Game")
+            break
+        player = player ^ 1  # swaps between players 0 and 1
+    """
+
+    # indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
+    winning_combinations = [
+        (0, 1, 2),
+        (3, 4, 5),
+        (6, 7, 8),
+        (0, 3, 6),
+        (1, 4, 7),
+        (2, 5, 8),
+        (0, 4, 8),
+        (2, 4, 6),
+    ]
+
     def __init__(self):
-        # internally self.board.squares holds a flat representation of tic tac toe board
-        # where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]
-        # where indexes are column wise order
+        # self.squares holds a flat representation of the tic tac toe board.
+        # an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0].
+        # player 1's squares are marked 1, while player 2's are marked 2.
+        # mapping of the flat indices to the 3x3 grid is as follows:
         # 0 3 6
         # 1 4 7
         # 2 5 8
-
-        # empty -- 0
-        # player 0 -- 1
-        # player 1 -- 2
         self.squares = [0] * 9
 
-        # precommute possible winning combinations
-        self.calculate_winners()
+    @property
+    def _n_empty_squares(self):
+        """The current number of empty squares on the board."""
+        return self.squares.count(0)
 
-    def setup(self):
-        self.calculate_winners()
+    def reset(self):
+        """Remove all marks from the board."""
+        self.squares = [0] * 9
 
     def play_turn(self, agent, pos):
-        # if spot is empty
+        """Place a mark by the agent in the spot given.
+
+        The following are required for a move to be valid:
+        * The agent must be a known agent ID (either 0 or 1).
+        * The spot must be be empty.
+        * The spot must be in the board (integer: 0 <= spot <= 8)
+
+        If any of those are not true, a BadTicTacToeMoveException
+        will be raised.
+        """
+        if pos < 0 or pos > 8:
+            raise BadTicTacToeMoveException("Invalid move location")
+        if agent != 0 and agent != 1:
+            raise BadTicTacToeMoveException("Invalid agent")
         if self.squares[pos] != 0:
-            return
-        if agent == 0:
-            self.squares[pos] = 1
-        elif agent == 1:
-            self.squares[pos] = 2
-        return
-
-    def calculate_winners(self):
-        winning_combinations = []
-        indices = [x for x in range(0, 9)]
-
-        # Vertical combinations
-        winning_combinations += [
-            tuple(indices[i : (i + 3)]) for i in range(0, len(indices), 3)
-        ]
-
-        # Horizontal combinations
-        winning_combinations += [
-            tuple(indices[x] for x in range(y, len(indices), 3)) for y in range(0, 3)
-        ]
-
-        # Diagonal combinations
-        winning_combinations.append(tuple(x for x in range(0, len(indices), 4)))
-        winning_combinations.append(tuple(x for x in range(2, len(indices) - 1, 2)))
-
-        self.winning_combinations = winning_combinations
-
-    # returns:
-    # -1 for no winner
-    # 1 -- agent 0 wins
-    # 2 -- agent 1 wins
-    def check_for_winner(self):
-        winner = -1
-        for combination in self.winning_combinations:
-            states = []
-            for index in combination:
-                states.append(self.squares[index])
-            if all(x == 1 for x in states):
-                winner = 1
-            if all(x == 2 for x in states):
-                winner = 2
-        return winner
-
-    def check_game_over(self):
-        winner = self.check_for_winner()
-
-        if winner == -1 and all(square in [1, 2] for square in self.squares):
-            # tie
-            return True
-        elif winner in [1, 2]:
-            return True
-        else:
-            return False
+            raise BadTicTacToeMoveException("Location is not empty")
+
+        # agent is [0, 1]. board values are stored as [1, 2].
+        self.squares[pos] = agent + 1
+
+    def game_status(self):
+        """Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER)."""
+        for indices in self.winning_combinations:
+            states = [self.squares[idx] for idx in indices]
+            if states == [1, 1, 1]:
+                return TTT_PLAYER1_WIN
+            if states == [2, 2, 2]:
+                return TTT_PLAYER2_WIN
+        if self._n_empty_squares == 0:
+            return TTT_TIE
+        return TTT_GAME_NOT_OVER
 
     def __str__(self):
         return str(self.squares)
+
+    def legal_moves(self):
+        """Return list of legal moves (as flat indices for spaces on the board)."""
+        return [i for i, mark in enumerate(self.squares) if mark == 0]
diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py
@@ -0,0 +1,128 @@
+"""Test cases for TicTacToe board."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from pettingzoo.classic.tictactoe.board import (  # type: ignore
+    TTT_GAME_NOT_OVER,
+    TTT_PLAYER1_WIN,
+    TTT_PLAYER2_WIN,
+    TTT_TIE,
+    BadTicTacToeMoveException,
+    Board,
+)
+
+# Note: mapping of moves to board positions are:
+# 0 3 6
+# 1 4 7
+# 2 5 8
+
+agent2_win = {
+    "moves": [
+        # agent_id, position, board after move
+        (0, 4, [0, 0, 0, 0, 1, 0, 0, 0, 0]),
+        (1, 0, [2, 0, 0, 0, 1, 0, 0, 0, 0]),
+        (0, 2, [2, 0, 1, 0, 1, 0, 0, 0, 0]),
+        (1, 6, [2, 0, 1, 0, 1, 0, 2, 0, 0]),
+        (0, 3, [2, 0, 1, 1, 1, 0, 2, 0, 0]),
+        (1, 7, [2, 0, 1, 1, 1, 0, 2, 2, 0]),
+        (0, 1, [2, 1, 1, 1, 1, 0, 2, 2, 0]),
+        (1, 8, [2, 1, 1, 1, 1, 0, 2, 2, 2]),  # agent 2 wins here
+        (0, 5, [2, 1, 1, 1, 1, 1, 2, 2, 2]),
+    ],
+    "max_step": 7,  # should not get past here
+    "winner": TTT_PLAYER2_WIN,
+}
+
+tie = {
+    "moves": [  # should be tie
+        (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
+        (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
+        (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
+        (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
+        (0, 5, [1, 1, 0, 2, 2, 1, 0, 0, 0]),
+        (1, 2, [1, 1, 2, 2, 2, 1, 0, 0, 0]),
+        (0, 6, [1, 1, 2, 2, 2, 1, 1, 0, 0]),
+        (1, 7, [1, 1, 2, 2, 2, 1, 1, 2, 0]),
+        (0, 8, [1, 1, 2, 2, 2, 1, 1, 2, 1]),
+    ],
+    "max_step": 8,
+    "winner": TTT_TIE,
+}
+
+agent1_win = {
+    "moves": [
+        (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
+        (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
+        (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
+        (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
+        (0, 2, [1, 1, 1, 2, 2, 0, 0, 0, 0]),  # agent 1 should win here
+        (1, 5, [1, 1, 1, 2, 2, 2, 0, 0, 0]),
+        (0, 6, [1, 1, 1, 2, 2, 2, 1, 0, 0]),
+        (1, 7, [1, 1, 1, 2, 2, 2, 1, 2, 0]),
+        (0, 8, [1, 1, 1, 2, 2, 2, 1, 2, 1]),
+    ],
+    "max_step": 4,
+    "winner": TTT_PLAYER1_WIN,
+}
+
+
+@pytest.mark.parametrize("values", [agent1_win, agent2_win, tie])
+def test_tictactoe_board_games(values: dict[str, Any]) -> None:
+    """Test that TicTacToe games go as expected."""
+    expected_winner = values["winner"]
+    max_step = values["max_step"]
+
+    board = Board()
+    for i, (agent, pos, board_layout) in enumerate(values["moves"]):
+        assert i <= max_step, "max step exceed in tictactoe game"
+        board.play_turn(agent, pos)
+        assert board_layout == board.squares, "wrong tictactoe layout after move"
+        status = board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            assert i == max_step, "tictactoe game ended on wrong step"
+            assert status == expected_winner, "wrong winner in tictactoe board test"
+            break
+
+
+def test_tictactoe_winning_boards() -> None:
+    """Test that winning board configurations actually win."""
+    # these are the winning lines for player 1. Note that moves
+    # for player 2 are included to make it a legal board.
+    winning_lines = [  # vertical(x3), horizontal(x3), diagonal(x2)
+        [1, 1, 1, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 1, 1, 1, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 1, 1, 1],
+        [1, 0, 0, 1, 0, 0, 1, 0, 0],
+        [0, 1, 0, 0, 1, 0, 0, 1, 0],
+        [0, 0, 1, 0, 0, 1, 0, 0, 1],
+        [1, 0, 0, 0, 1, 0, 0, 0, 1],
+        [0, 0, 1, 0, 1, 0, 1, 0, 0],
+    ]
+    for line in winning_lines:
+        board = Board()
+        board.squares = line
+        assert board.game_status() == TTT_PLAYER1_WIN, "Bad win check in TicTacToe"
+
+
+def test_tictactoe_bad_move() -> None:
+    """Test that illegal TicTacToe moves are rejected."""
+    board = Board()
+    # 1) move out of bounds should be rejected
+    for outside_space in [-1, 9]:
+        with pytest.raises(BadTicTacToeMoveException):
+            board.play_turn(0, outside_space)
+
+    # 2) move by unknown agent should be rejected
+    for unknown_agent in [-1, 2]:
+        with pytest.raises(BadTicTacToeMoveException):
+            board.play_turn(unknown_agent, 0)
+
+    # 3) move in occupied space by either agent should be rejected
+    board.play_turn(0, 4)  # this is fine
+    for agent in [0, 1]:
+        with pytest.raises(BadTicTacToeMoveException):
+            board.play_turn(agent, 4)  # repeating move is not valid