In [10]:
import numpy as np
from tabulate import tabulate

class TicTacToeEnv:
    def __init__(self):
        self.board = None
        self.current_player = None
        self.reset()

    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.current_player = 1  # Player 1 starts
        return self.board

    def step(self, action):
        row, col = action
        if self.board[row, col] == 0:
            self.board[row, col] = self.current_player
            if self.is_winner(self.current_player):
                return self.board, self.current_player, True, {}
            elif self.is_draw():
                return self.board, 0, True, {}
            else:
                self.current_player = 3 - self.current_player  # Switch player
                return self.board, None, False, {}
        else:
            print("Invalid action!")
            return self.board, None, False, {'message': 'Invalid action'}

    def render(self):
        board = np.zeros((3, 3), dtype=str)
        for ii in range(3):
            for jj in range(3):
                if self.board[ii][jj] == 0:
                    board[ii, jj] = "-"
                elif self.board[ii][jj] == 1:
                    board[ii, jj] = "X"
                elif self.board[ii][jj] == 2:
                    board[ii, jj] = "O"
        board = tabulate(board, tablefmt="fancy_grid")
        print(board)

    def is_winner(self, player):
        # Check rows, columns, and diagonals
        for i in range(3):
            if all(self.board[i, :] == player) or all(self.board[:, i] == player):
                return True
        if self.board[0, 0] == self.board[1, 1] == self.board[2, 2] == player or \
           self.board[0, 2] == self.board[1, 1] == self.board[2, 0] == player:
            return True
        return False

    def is_draw(self):
        return np.all(self.board != 0)

    def available_actions(self):
        return [(row, col) for row in range(3) for col in range(3) if self.board[row, col] == 0]

    def check_move_result(self, action):
        row, col = action
        # Temporarily make the move on a copy of the board
        temp_board = np.copy(self.board)
        temp_board[row, col] = self.current_player
        if self.is_winner(self.current_player, temp_board):
            return 1.0
        elif self.is_draw(temp_board):
            return 0.0
        else:
            return 0.0


In [11]:
# Create an instance of the TicTacToe environment
env = TicTacToeEnv()

# Reset the game to its initial state
env.reset()

# Making a move; for example, player 1 places an 'X' at position (0, 0)
state, reward, done, info = env.step((0, 0))

# Render the board after the first move
env.render()



╒═══╤═══╤═══╕
│ X │ - │ - │
├───┼───┼───┤
│ - │ - │ - │
├───┼───┼───┤
│ - │ - │ - │
╘═══╧═══╧═══╛


In [12]:
env.check_move_result((1, 0))

TypeError: is_winner() takes 2 positional arguments but 3 were given

In [5]:
step(board, 1, (0,0))
print_board(board)

[1. 0. 0.]
[0. 0. 0.]
[0. 0. 0.]



In [6]:
step(board, -1, (0,1))
print_board(board)

[ 1. -1.  0.]
[0. 0. 0.]
[0. 0. 0.]



In [7]:
check_draw(board)


False