Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the TicTacToe environment #1192

Merged
merged 27 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b12f6ba
Simplify TicTacToe reward accumulation
dm-ackerman Mar 9, 2024
24f7569
Don't update TicTacToe agent on winning step
dm-ackerman Mar 9, 2024
e3f9cc9
Move TicTacToe test for valid moves to Board
dm-ackerman Mar 9, 2024
6708fdb
Hard code winning lines in TicTacToe board
dm-ackerman Mar 9, 2024
ea15ddf
Simplify win check in TicTacToe
dm-ackerman Mar 9, 2024
967caae
Clean up tictactoe board functions
dm-ackerman Mar 9, 2024
e6851a1
Add reset to TicTacToe board
dm-ackerman Mar 11, 2024
42794bc
Update TicTacToe masking and observe
dm-ackerman Mar 11, 2024
2847c17
Update TicTacToe winning code
dm-ackerman Mar 11, 2024
eda5033
Minor cleanups of TicTacToe code
dm-ackerman Mar 11, 2024
364c307
Don't create screen if not rending in TicTacToe
dm-ackerman Mar 11, 2024
758e6a2
Add legal_moves() to TicTacToe board
dm-ackerman Mar 13, 2024
8b7ce5e
Remove win detection short-cut in TicTacToe
dm-ackerman Mar 20, 2024
5bebafa
Remove unneeded variable in TicTacToe
dm-ackerman Mar 20, 2024
8e33bf7
Add test cases for TicTacToe board
dm-ackerman Mar 20, 2024
df4c950
Update TicTacToe code comments
dm-ackerman Mar 20, 2024
f750754
Merge branch 'master' into ttt_update
dm-ackerman Mar 20, 2024
72ef62f
Bump TicTacToe environment to version 4
dm-ackerman Mar 20, 2024
e4bd228
Add __future__ annotations to TicTacToe tests
dm-ackerman Mar 20, 2024
4f35a22
Merge branch 'master' into ttt_update
dm-ackerman Mar 20, 2024
1f95299
Change TicTacToe from medium to easy in SB3 test.
dm-ackerman Mar 22, 2024
39de495
Replace TicTacToe exceptions with asserts
dm-ackerman Mar 22, 2024
6336393
Check messages of assert errors in tictactoe test
dm-ackerman Mar 22, 2024
71ca217
Fix agent swap in TicTacToe
dm-ackerman May 3, 2024
4170f2b
revert TicTacToe version to 3
dm-ackerman May 3, 2024
da8373e
Merge branch 'master' into ttt_update
dm-ackerman May 3, 2024
44716c3
Merge branch 'master' into ttt_update
elliottower May 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/api/wrappers/pz_wrappers.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ You can apply these wrappers to your environment in a similar manner to the belo
To wrap an AEC environment:
```python
from pettingzoo.utils import TerminateIllegalWrapper
from pettingzoo.classic import tictactoe_v3
env = tictactoe_v3.env()
from pettingzoo.classic import tictactoe_v4
env = tictactoe_v4.env()
env = TerminateIllegalWrapper(env, illegal_reward=-1)

env.reset()
Expand Down
4 changes: 2 additions & 2 deletions pettingzoo/classic/all_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
rps_v2,
texas_holdem_no_limit_v6,
texas_holdem_v4,
tictactoe_v3,
tictactoe_v4,
)

classic_environments = {
"classic/chess_v6": chess_v6,
"classic/rps_v2": rps_v2,
"classic/connect_four_v3": connect_four_v3,
"classic/tictactoe_v3": tictactoe_v3,
"classic/tictactoe_v4": tictactoe_v4,
"classic/leduc_holdem_v4": leduc_holdem_v4,
"classic/texas_holdem_v4": texas_holdem_v4,
"classic/texas_holdem_no_limit_v6": texas_holdem_no_limit_v6,
Expand Down
164 changes: 99 additions & 65 deletions pettingzoo/classic/tictactoe/board.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,113 @@
class BadTicTacToeMoveException(Exception):
elliottower marked this conversation as resolved.
Show resolved Hide resolved
"""Exception raised when a bad move is made on TicTacToe board."""

def __init__(self, message="Bad TicTacToe move"):
super().__init__(message)


TTT_PLAYER1_WIN = 0
TTT_PLAYER2_WIN = 1
TTT_TIE = -1
TTT_GAME_NOT_OVER = -2


class Board:
"""Board for a TicTacToe Game.

This tracks the position and identity of marks on the game board
and allows checking for a winner.

Example of usage:

import random
board = Board()

# random legal moves - for example purposes
def choose_move(board_obj: Board) -> int:
legal_moves = [i for i, mark in enumerate(board_obj.squares) if mark == 0]
return random.choice(legal_moves)

player = 0
while True:
move = choose_move(board)
board.play_turn(player, move)
status = board.game_status()
if status != TTT_GAME_NOT_OVER:
if status in [TTT_PLAYER1_WIN, TTT_PLAYER2_WIN]:
print(f"player {status} won")
else: # status == TTT_TIE
print("Tie Game")
break
player = player ^ 1 # swaps between players 0 and 1
"""

# indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
winning_combinations = [
(0, 1, 2),
(3, 4, 5),
(6, 7, 8),
(0, 3, 6),
(1, 4, 7),
(2, 5, 8),
(0, 4, 8),
(2, 4, 6),
]

def __init__(self):
# internally self.board.squares holds a flat representation of tic tac toe board
# where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]
# where indexes are column wise order
# self.squares holds a flat representation of the tic tac toe board.
# an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0].
# player 1's squares are marked 1, while player 2's are marked 2.
# mapping of the flat indices to the 3x3 grid is as follows:
# 0 3 6
# 1 4 7
# 2 5 8

# empty -- 0
# player 0 -- 1
# player 1 -- 2
self.squares = [0] * 9

# precommute possible winning combinations
self.calculate_winners()
@property
def _n_empty_squares(self):
"""The current number of empty squares on the board."""
return self.squares.count(0)

def setup(self):
self.calculate_winners()
def reset(self):
"""Remove all marks from the board."""
self.squares = [0] * 9

def play_turn(self, agent, pos):
# if spot is empty
"""Place a mark by the agent in the spot given.

The following are required for a move to be valid:
* The agent must be a known agent ID (either 0 or 1).
* The spot must be be empty.
* The spot must be in the board (integer: 0 <= spot <= 8)

If any of those are not true, a BadTicTacToeMoveException
will be raised.
"""
if pos < 0 or pos > 8:
raise BadTicTacToeMoveException("Invalid move location")
if agent != 0 and agent != 1:
raise BadTicTacToeMoveException("Invalid agent")
if self.squares[pos] != 0:
return
if agent == 0:
self.squares[pos] = 1
elif agent == 1:
self.squares[pos] = 2
return

def calculate_winners(self):
winning_combinations = []
indices = [x for x in range(0, 9)]

# Vertical combinations
winning_combinations += [
tuple(indices[i : (i + 3)]) for i in range(0, len(indices), 3)
]

# Horizontal combinations
winning_combinations += [
tuple(indices[x] for x in range(y, len(indices), 3)) for y in range(0, 3)
]

# Diagonal combinations
winning_combinations.append(tuple(x for x in range(0, len(indices), 4)))
winning_combinations.append(tuple(x for x in range(2, len(indices) - 1, 2)))

self.winning_combinations = winning_combinations

# returns:
# -1 for no winner
# 1 -- agent 0 wins
# 2 -- agent 1 wins
def check_for_winner(self):
winner = -1
for combination in self.winning_combinations:
states = []
for index in combination:
states.append(self.squares[index])
if all(x == 1 for x in states):
winner = 1
if all(x == 2 for x in states):
winner = 2
return winner

def check_game_over(self):
winner = self.check_for_winner()

if winner == -1 and all(square in [1, 2] for square in self.squares):
# tie
return True
elif winner in [1, 2]:
return True
else:
return False
raise BadTicTacToeMoveException("Location is not empty")

# agent is [0, 1]. board values are stored as [1, 2].
self.squares[pos] = agent + 1

def game_status(self):
"""Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER)."""
for indices in self.winning_combinations:
states = [self.squares[idx] for idx in indices]
if states == [1, 1, 1]:
return TTT_PLAYER1_WIN
if states == [2, 2, 2]:
return TTT_PLAYER2_WIN
if self._n_empty_squares == 0:
return TTT_TIE
return TTT_GAME_NOT_OVER

def __str__(self):
return str(self.squares)

def legal_moves(self):
"""Return list of legal moves (as flat indices for spaces on the board)."""
return [i for i, mark in enumerate(self.squares) if mark == 0]
128 changes: 128 additions & 0 deletions pettingzoo/classic/tictactoe/test_board.py
elliottower marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""Test cases for TicTacToe board."""

from __future__ import annotations

from typing import Any

import pytest

from pettingzoo.classic.tictactoe.board import ( # type: ignore
TTT_GAME_NOT_OVER,
TTT_PLAYER1_WIN,
TTT_PLAYER2_WIN,
TTT_TIE,
BadTicTacToeMoveException,
Board,
)

# Note: mapping of moves to board positions are:
# 0 3 6
# 1 4 7
# 2 5 8

agent2_win = {
"moves": [
# agent_id, position, board after move
(0, 4, [0, 0, 0, 0, 1, 0, 0, 0, 0]),
(1, 0, [2, 0, 0, 0, 1, 0, 0, 0, 0]),
(0, 2, [2, 0, 1, 0, 1, 0, 0, 0, 0]),
(1, 6, [2, 0, 1, 0, 1, 0, 2, 0, 0]),
(0, 3, [2, 0, 1, 1, 1, 0, 2, 0, 0]),
(1, 7, [2, 0, 1, 1, 1, 0, 2, 2, 0]),
(0, 1, [2, 1, 1, 1, 1, 0, 2, 2, 0]),
(1, 8, [2, 1, 1, 1, 1, 0, 2, 2, 2]), # agent 2 wins here
(0, 5, [2, 1, 1, 1, 1, 1, 2, 2, 2]),
],
"max_step": 7, # should not get past here
"winner": TTT_PLAYER2_WIN,
}

tie = {
"moves": [ # should be tie
(0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
(1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
(0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
(1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
(0, 5, [1, 1, 0, 2, 2, 1, 0, 0, 0]),
(1, 2, [1, 1, 2, 2, 2, 1, 0, 0, 0]),
(0, 6, [1, 1, 2, 2, 2, 1, 1, 0, 0]),
(1, 7, [1, 1, 2, 2, 2, 1, 1, 2, 0]),
(0, 8, [1, 1, 2, 2, 2, 1, 1, 2, 1]),
],
"max_step": 8,
"winner": TTT_TIE,
}

agent1_win = {
"moves": [
(0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
(1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
(0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
(1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
(0, 2, [1, 1, 1, 2, 2, 0, 0, 0, 0]), # agent 1 should win here
(1, 5, [1, 1, 1, 2, 2, 2, 0, 0, 0]),
(0, 6, [1, 1, 1, 2, 2, 2, 1, 0, 0]),
(1, 7, [1, 1, 1, 2, 2, 2, 1, 2, 0]),
(0, 8, [1, 1, 1, 2, 2, 2, 1, 2, 1]),
],
"max_step": 4,
"winner": TTT_PLAYER1_WIN,
}


@pytest.mark.parametrize("values", [agent1_win, agent2_win, tie])
def test_tictactoe_board_games(values: dict[str, Any]) -> None:
"""Test that TicTacToe games go as expected."""
expected_winner = values["winner"]
max_step = values["max_step"]

board = Board()
for i, (agent, pos, board_layout) in enumerate(values["moves"]):
assert i <= max_step, "max step exceed in tictactoe game"
board.play_turn(agent, pos)
assert board_layout == board.squares, "wrong tictactoe layout after move"
status = board.game_status()
if status != TTT_GAME_NOT_OVER:
assert i == max_step, "tictactoe game ended on wrong step"
assert status == expected_winner, "wrong winner in tictactoe board test"
break


def test_tictactoe_winning_boards() -> None:
"""Test that winning board configurations actually win."""
# these are the winning lines for player 1. Note that moves
# for player 2 are included to make it a legal board.
winning_lines = [ # vertical(x3), horizontal(x3), diagonal(x2)
[1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
]
for line in winning_lines:
board = Board()
board.squares = line
assert board.game_status() == TTT_PLAYER1_WIN, "Bad win check in TicTacToe"


def test_tictactoe_bad_move() -> None:
"""Test that illegal TicTacToe moves are rejected."""
board = Board()
# 1) move out of bounds should be rejected
for outside_space in [-1, 9]:
with pytest.raises(BadTicTacToeMoveException):
board.play_turn(0, outside_space)

# 2) move by unknown agent should be rejected
for unknown_agent in [-1, 2]:
with pytest.raises(BadTicTacToeMoveException):
board.play_turn(unknown_agent, 0)

# 3) move in occupied space by either agent should be rejected
board.play_turn(0, 4) # this is fine
for agent in [0, 1]:
with pytest.raises(BadTicTacToeMoveException):
board.play_turn(agent, 4) # repeating move is not valid