# Random Agent

In [10]:
from pettingzoo.classic import tictactoe_v3
import random
import numpy as np

# Create the environment
env = tictactoe_v3.env(render_mode="human")  # Specify render mode
env.reset()
print("Starting Tic-Tac-Toe with Random Agent!")

# Render initial state
env.render()

# Play the game
for agent in env.agent_iter():
    observation, reward, termination, truncation, info = env.last()
    
    # Check if the game is over
    if termination or truncation:
        env.step(None)
        break
    
    # Use action_mask to get valid moves
    action_mask = observation['action_mask']
    valid_moves = np.where(action_mask == 1)[0]
    
    # Choose a random move from the valid moves
    move = random.choice(valid_moves)
    
    # Step in the environment with the chosen move
    env.step(move)
    
    # Render after each move
    env.render()

print("Game Over!")

# Close the environment
env.close()

Starting Tic-Tac-Toe with Random Agent!
Game Over!


# Minimax agent against random agent

In [2]:
import numpy as np
import random
from pettingzoo.classic import tictactoe_v3



class MiniMaxAgent:
    def __init__(self, player_symbol):
        self.player_symbol = player_symbol
        self.opponent_symbol = 'O' if player_symbol == 'X' else 'X'
    
    def minimax(self, board, depth, is_maximizing):
        # Check for terminal states
        result = self.check_winner(board)
        if result is not None:
            return result
        
        if is_maximizing:
            best_score = float('-inf')
            for i in range(3):
                for j in range(3):
                    if board[i, j, 0] == 0 and board[i, j, 1] == 0:
                        board[i, j, 0] = 1
                        score = self.minimax(board, depth + 1, False)
                        board[i, j, 0] = 0
                        best_score = max(score, best_score)
            return best_score
        else:
            best_score = float('inf')
            for i in range(3):
                for j in range(3):
                    if board[i, j, 0] == 0 and board[i, j, 1] == 0:
                        board[i, j, 1] = 1
                        score = self.minimax(board, depth + 1, True)
                        board[i, j, 1] = 0
                        best_score = min(score, best_score)
            return best_score
    
    def check_winner(self, board):
        # Check rows, columns, and diagonals for player 0 (X)
        for i in range(3):
            # Rows
            if board[i, 0, 0] and board[i, 1, 0] and board[i, 2, 0]:
                return 1
            # Columns
            if board[0, i, 0] and board[1, i, 0] and board[2, i, 0]:
                return 1
        
        # Diagonals for player 0 (X)
        if board[0, 0, 0] and board[1, 1, 0] and board[2, 2, 0]:
            return 1
        if board[0, 2, 0] and board[1, 1, 0] and board[2, 0, 0]:
            return 1
        
        # Check rows, columns, and diagonals for player 1 (O)
        for i in range(3):
            # Rows
            if board[i, 0, 1] and board[i, 1, 1] and board[i, 2, 1]:
                return -1
            # Columns
            if board[0, i, 1] and board[1, i, 1] and board[2, i, 1]:
                return -1
        
        # Diagonals for player 1 (O)
        if board[0, 0, 1] and board[1, 1, 1] and board[2, 2, 1]:
            return -1
        if board[0, 2, 1] and board[1, 1, 1] and board[2, 0, 1]:
            return -1
        
        # Check for draw
        if np.all(board[:, :, 0] | board[:, :, 1]):
            return 0
        
        # Game still in progress
        return None
    
    def choose_move(self, observation):
        # The observation is already a (3,3,2) array
        board = observation['observation']
        
        # Find empty spaces
        empty_spaces = np.where((board[:, :, 0] == 0) & (board[:, :, 1] == 0))
        
        # If only one space left, take it
        if len(empty_spaces[0]) == 1:
            return empty_spaces[0][0] * 3 + empty_spaces[1][0]
        
        # Try all possible moves and choose the best
        best_score = float('-inf')
        best_move = None
        
        for i in range(3):
            for j in range(3):
                if board[i, j, 0] == 0 and board[i, j, 1] == 0:
                    board[i, j, 0] = 1
                    score = self.minimax(board, 0, False)
                    board[i, j, 0] = 0
                    
                    if score > best_score:
                        best_score = score
                        best_move = i * 3 + j
        
        return best_move

# Create the environment
env = tictactoe_v3.env(render_mode="human")
env.reset()
print("Starting Tic-Tac-Toe: MiniMax vs Random!")

# Create agents
minimax_agent = MiniMaxAgent('X')  # MiniMax is X

# Render initial state
env.render()

# Play the game
for agent in env.agent_iter():
    observation, reward, termination, truncation, info = env.last()

    # Check if the game is over
    if termination or truncation:
        env.step(None)
        break

    # Choose move based on the agent
    if agent == env.possible_agents[0]:  # MiniMax agent (X)
        # Use action mask to validate move
        action_mask = observation['action_mask']
        move = minimax_agent.choose_move(observation)

        # Ensure the move is valid
        if action_mask[move] == 0:
            # Fallback to random if chosen move is invalid
            valid_moves = np.where(action_mask == 1)[0]
            move = random.choice(valid_moves)
    else:  # Random agent (O)
        # Use action mask to get valid moves
        action_mask = observation['action_mask']
        valid_moves = np.where(action_mask == 1)[0]
        move = random.choice(valid_moves)

    # Step in the environment with the chosen move
    env.step(move)

    # Render after each move
    env.render()

print("Game Over!")

# Close the environment
env.close()
  

Starting Tic-Tac-Toe: MiniMax vs Random!
Game Over!


# Render and save video

In [6]:
import numpy as np
import random
import cv2
from pettingzoo.classic import tictactoe_v3

record_video = True

class MiniMaxAgent:
    def __init__(self, player_symbol):
        self.player_symbol = player_symbol
        self.opponent_symbol = 'O' if player_symbol == 'X' else 'X'
    
    def minimax(self, board, depth, is_maximizing):
        # Check for terminal states
        result = self.check_winner(board)
        if result is not None:
            return result
        
        if is_maximizing:
            best_score = float('-inf')
            for i in range(3):
                for j in range(3):
                    if board[i, j, 0] == 0 and board[i, j, 1] == 0:
                        board[i, j, 0] = 1
                        score = self.minimax(board, depth + 1, False)
                        board[i, j, 0] = 0
                        best_score = max(score, best_score)
            return best_score
        else:
            best_score = float('inf')
            for i in range(3):
                for j in range(3):
                    if board[i, j, 0] == 0 and board[i, j, 1] == 0:
                        board[i, j, 1] = 1
                        score = self.minimax(board, depth + 1, True)
                        board[i, j, 1] = 0
                        best_score = min(score, best_score)
            return best_score
    
    def check_winner(self, board):
        # Check rows, columns, and diagonals for player 0 (X)
        for i in range(3):
            # Rows
            if board[i, 0, 0] and board[i, 1, 0] and board[i, 2, 0]:
                return 1
            # Columns
            if board[0, i, 0] and board[1, i, 0] and board[2, i, 0]:
                return 1
        
        # Diagonals for player 0 (X)
        if board[0, 0, 0] and board[1, 1, 0] and board[2, 2, 0]:
            return 1
        if board[0, 2, 0] and board[1, 1, 0] and board[2, 0, 0]:
            return 1
        
        # Check rows, columns, and diagonals for player 1 (O)
        for i in range(3):
            # Rows
            if board[i, 0, 1] and board[i, 1, 1] and board[i, 2, 1]:
                return -1
            # Columns
            if board[0, i, 1] and board[1, i, 1] and board[2, i, 1]:
                return -1
        
        # Diagonals for player 1 (O)
        if board[0, 0, 1] and board[1, 1, 1] and board[2, 2, 1]:
            return -1
        if board[0, 2, 1] and board[1, 1, 1] and board[2, 0, 1]:
            return -1
        
        # Check for draw
        if np.all(board[:, :, 0] | board[:, :, 1]):
            return 0
        
        # Game still in progress
        return None
    
    def choose_move(self, observation):
        # The observation is already a (3,3,2) array
        board = observation['observation']
        
        # Find empty spaces
        empty_spaces = np.where((board[:, :, 0] == 0) & (board[:, :, 1] == 0))
        
        # If only one space left, take it
        if len(empty_spaces[0]) == 1:
            return empty_spaces[0][0] * 3 + empty_spaces[1][0]
        
        # Try all possible moves and choose the best
        best_score = float('-inf')
        best_move = None
        
        for i in range(3):
            for j in range(3):
                if board[i, j, 0] == 0 and board[i, j, 1] == 0:
                    board[i, j, 0] = 1
                    score = self.minimax(board, 0, False)
                    board[i, j, 0] = 0
                    
                    if score > best_score:
                        best_score = score
                        best_move = i * 3 + j
        
        return best_move

# Create the environment
env = tictactoe_v3.env(render_mode="rgb_array")
env.reset()
print("Starting Tic-Tac-Toe: MiniMax vs Random!")

# Video recording setup
frames = []

# Create agents
minimax_agent = MiniMaxAgent('X')  # MiniMax is X

# Play the game
for agent in env.agent_iter():
    observation, reward, termination, truncation, info = env.last()

    # Capture frame
    frame = env.render()
    frames.append(frame)

    # Check if the game is over
    if termination or truncation:
        env.step(None)
        break

    # Choose move based on the agent
    if agent == env.possible_agents[0]:  # MiniMax agent (X)
        # Use action mask to validate move
        action_mask = observation['action_mask']
        move = minimax_agent.choose_move(observation)

        # Ensure the move is valid
        if action_mask[move] == 0:
            # Fallback to random if chosen move is invalid
            valid_moves = np.where(action_mask == 1)[0]
            move = random.choice(valid_moves)
    else:  # Random agent (O)
        # Use action mask to get valid moves
        action_mask = observation['action_mask']
        valid_moves = np.where(action_mask == 1)[0]
        move = random.choice(valid_moves)

    # Step in the environment with the chosen move
    env.step(move)

print("Game Over!")

if record_video and frames:
    # Define codec and initialize VideoWriter
    fourcc = cv2.VideoWriter_fourcc(*'avc1') # mp4v doesn't work with Mac 
    frame_shape = frames[0].shape  # Assume all frames have the same shape
    out = cv2.VideoWriter('./tictactoe_game.mp4', fourcc, 2.0, (frame_shape[1], frame_shape[0]))

    if not out.isOpened():
        print("VideoWriter failed to open. Check codec or output path.")
    else:
        # Write each frame
        for frame in frames:
            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            out.write(frame_bgr)

        # Release writer and confirm
        out.release()
        print("Video saved as tictactoe_game.mp4")
else:
    print("No video saved. Check `record_video` and ensure frames are collected.")




Starting Tic-Tac-Toe: MiniMax vs Random!
Game Over!
Video saved as tictactoe_game.mp4
