In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [3]:
class BoardGameNet(nn.Module):
    def __init__(self, board_size, hidden_size=128):
        super(BoardGameNet, self).__init__()
    
        input_layer_size = board_size * board_size + 1 # 1 neuron for every square, and another
                                                     #   neuron to indicate which player is moving 


        self.fc1 = nn.Linear(input_layer_size, hidden_size) # input layer
        self.fc2 = nn.Linear(hidden_size, hidden_size) # hidden layer
        self.fc3 = nn.Linear(hidden_size, board_size * board_size) # output layer

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        x = F.softmax(x, dim=1)

In [4]:
board_size = 3
model = BoardGameNet(board_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

building the dataloader:

In [2]:
import copy

PLAYER1, BLANK, PLAYER2 = -1, 0, 1 #enum encoding representation of square values


board = [[0] * 3] # each "board" is 9 squares encoded with x or o as -1 / 1
                    # and an element dictating who's turn it is (player1 or player2) as -1 / 1
                    # am thinking about adding the turn element after populating the board


In [4]:
# Function to determine whose turn it is

def get_current_player(board):
    player1_moves = sum(cell == PLAYER1 for row in board for cell in row)
    player2_moves = sum(cell == PLAYER2 for row in board for cell in row)
    
    if player1_moves <= player2_moves:
        return PLAYER1  # Player 1's turn
    else:
        return PLAYER2  # Player 2's turn

1


In [5]:

def check_winner(board):
    # Check rows, columns, and diagonals for a winner
    for row in board:
        if abs(sum(row)) == 3:  # All squares in row are the same
            return True
    
    for col in range(3):
        if abs(sum([board[row][col] for row in range(3)])) == 3:
            return True
    
    # Check diagonals
    if abs(board[0][0] + board[1][1] + board[2][2]) == 3:
        return True
    if abs(board[0][2] + board[1][1] + board[2][0]) == 3:
        return True
    
    return False


In [8]:
def buildBoards(board):
    new_boards = []
    new_boards.append(board)

    print(new_boards)

    for board in new_boards:

        current_player = get_current_player(board)  # Determine the current player

        for row in range(3):
            for col in range(3):
                
                if board[row][col] == BLANK:
                    # Create a deep copy of the current board
                    board_copy = [copy.deepcopy(row) for row in board]

                    # Place the current player's mark in the empty square
                    board_copy[row][col] = current_player
                    
                    #print("it is player",current_player,"'s turn, evaluating position (",row,",",col,")\n")
                    #print("old board:   \n", board[0], "\n", board[1], "\n", board[2], "\n")
                    #print("board with move:  \n", board_copy[0],"\n",board_copy[1],"\n",board_copy[2], "\n\n\n")
                    
                    
                    # Check if the board has a winner
                    if not check_winner(board_copy):
                        #print("this board has been added to the dataset")
                        new_boards.append(board_copy)
                        


    return new_boards



In [9]:
board = [[0] * 3] * 3

dataset = buildBoards(board)

print(len(dataset))

[[[0, 0, 0], [0, 0, 0], [0, 0, 0]]]
340858


In [None]:
def train(model, data_loader, epochs=10):
    for epoch in range(epochs):
        for board_state, move_label in data_loader:

            #zero gradients
            optimizer.zero_grad()


            #forward pass (propagate forward)
            output = model(board_state)
            loss = criterion(output, move_label)

            
            # Backpropagation and Optimization
            loss.backward()
            optimizer.step()

        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')