In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

In [2]:
# implementing a simple policy network for tic tac toe that outputs a probability distribution over all moves

class SimplePolicyNetwork(nn.Module):
    def __init__(self, board_size, num_moves):
        """
        Initializes the Policy Network.
        :param board_size: Tuple of the board dimensions, e.g., (19, 19) for Go.
        :param num_moves: Total number of possible moves in the game.
        """
        super(SimplePolicyNetwork, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc = nn.Linear(128 * board_size[0] * board_size[1], num_moves)

    def forward(self, x):
        """
        Forward pass of the network.
        :param x: Input tensor, the game state.
        :return: Probability distribution over all possible moves.
        """
        # Apply two convolutional layers with ReLU activations
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))

        # Flatten the output for the fully connected layer
        x = x.view(x.size(0), -1)

        # Output layer with a softmax to get probabilities
        x = self.fc(x)
        return F.softmax(x, dim=1)

# Example usage
board_size = (3, 3)  # For Tic Tac Toe
num_moves = board_size[0] * board_size[1]  # Assuming each cell is a possible move
model = SimplePolicyNetwork(board_size, num_moves)

# Example input: a single game state, with 1 channel, and 19x19 board size
# The input should be a 4D tensor: [batch_size, channels, height, width]
# Here, batch_size = 1, channels = 1 (just the board, could be more for different game states)
game_state = torch.randn(1, 1, board_size[0], board_size[1])
# Get the probability distribution over moves
probabilities = model(game_state)

print(probabilities)  # Each element corresponds to the probability of a move being the best next move


tensor([[0.1119, 0.1021, 0.1026, 0.1123, 0.1156, 0.1169, 0.1072, 0.1223, 0.1089]],
       grad_fn=<SoftmaxBackward0>)


In [6]:
# Solving tic-tac-toe using tree search
# This is a simple implementation of the minimax algorithm

# The game is represented as a 3x3 matrix
# 0 represents an empty cell
# 1 represents a cell with a cross
# 2 represents a cell with a circle

class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.turn = 1
        self.winner = 0

    def is_full(self):
        return np.all(self.board)

    def is_winner(self, player):
        for i in range(3):
            if np.all(self.board[i] == player) or np.all(self.board[:, i] == player):
                return True
        if np.all(self.board.diagonal() == player) or np.all(np.fliplr(self.board).diagonal() == player):
            return True
        return False

    def is_game_over(self):
        for player in [1, 2]:
            if self.is_winner(player):
                self.winner = player
                return True
        return self.is_full()

    def get_valid_moves(self):
        return np.argwhere(self.board == 0)
    
    def get_valid_moves_indices(self):
        return np.flatnonzero(self.board == 0)
    

    def make_move(self, move):
        self.board[tuple(move)] = self.turn
        self.turn = 3 - self.turn

    def make_move_from_index(self, index):
        move = np.unravel_index(index, (3, 3))
        self.make_move(move)

    def undo_move(self, move):
        self.board[tuple(move)] = 0
        self.turn = 3 - self.turn

    def __str__(self):
        return str(self.board)


In [3]:
# game = TicTacToe()
# game.make_move((1,1))
# print(game)

# game.make_move((2,1))
# print(game)

# game.make_move((0,1))
# print(game)

In [40]:
def convert_board_to_input(board):
    """
    Convert the game board to a tensor suitable for the policy network.
    The input is a 4D tensor: [batch_size, channels, height, width].
    """
    # Convert the board to a tensor with shape (1, 1, 3, 3)
    # 1 channel, the board's state is represented in a 3x3 grid
    board_tensor = torch.tensor(board, dtype=torch.float).unsqueeze(0).unsqueeze(0)
    return board_tensor

def select_move(probabilities, valid_moves_indices):
    """
    Select the move with the highest probability that is also a valid move.
    """
    # Zero out the probabilities of moves that are not valid
    prob_masked = probabilities.clone().detach()
    prob_masked[0, np.setdiff1d(np.arange(num_moves), valid_moves_indices)] = 0
    # Select the move with the highest probability
    move_index = torch.argmax(prob_masked).item()
    return move_index

# Initialize the TicTacToe game
game = TicTacToe()

# Initialize the policy network
model = SimplePolicyNetwork(board_size, num_moves)

# Play until the game is over
while not game.is_game_over():
    # Convert the current game state to a tensor input for the network
    current_state_tensor = convert_board_to_input(game.board)
    # Get the probability distribution over moves from the policy network
    probabilities = model(current_state_tensor)
    # Get valid move indices
    valid_moves_indices = game.get_valid_moves_indices()
    # Select the move with the highest probability among valid moves
    selected_move_index = select_move(probabilities, valid_moves_indices)
    # Make the move
    game.make_move_from_index(selected_move_index)
    # Print the board state
    print(game)
    print("------")

# Check the result
if game.winner:
    print(f"Player {game.winner} wins!")
else:
    print("It's a draw!")

[[0 1 0]
 [0 0 0]
 [0 0 0]]
------
[[0 1 0]
 [0 2 0]
 [0 0 0]]
------
[[0 1 0]
 [0 2 0]
 [0 1 0]]
------
[[2 1 0]
 [0 2 0]
 [0 1 0]]
------
[[2 1 0]
 [0 2 0]
 [0 1 1]]
------
[[2 1 0]
 [0 2 2]
 [0 1 1]]
------
[[2 1 0]
 [0 2 2]
 [1 1 1]]
------
Player 1 wins!


IndentationError: expected an indented block (973205156.py, line 2)