In [26]:
from hnefatal.game import *
from tests.test_game import run_all_tests

run_all_tests()

. . . . O O O O O . . . .
. . . . . . O . . . . . .
. . . . . . . . . . . . .
. . . . . . . . . . . . .
O . . . . . X . . . . . O
O . . . . X X X . . . . O
O O . . X X K X X . . O O
O . . . . X X X . . . . O
O . . . . . X . . . . . O
. . . . . . . . . . . . .
. . . . . . . . . . . . .
. . . . . . O . . . . . .
. . . . O O O O O . . . .

### All tests passed! ###


In [27]:
import torch
from torch import nn

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

BOARD_WIDTH = 13
DISTANCE_SIZE = BOARD_WIDTH - 1
LABEL_SIZE = 4056 # Equal to valid_mask.sum()

# Precompute all possible moves and their probabilities in a single pass
from_rows, from_cols = torch.meshgrid(torch.arange(BOARD_WIDTH), torch.arange(BOARD_WIDTH), indexing='ij')
directions = torch.arange(4)
distances = torch.arange(1, DISTANCE_SIZE + 1)

# Expand to all combinations
from_rows = from_rows.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 4, DISTANCE_SIZE)
from_cols = from_cols.unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 4, DISTANCE_SIZE)
directions = directions.view(1, 1, 4, 1).expand(BOARD_WIDTH, BOARD_WIDTH, 4, DISTANCE_SIZE)
distances = distances.view(1, 1, 1, DISTANCE_SIZE).expand(BOARD_WIDTH, BOARD_WIDTH, 4, DISTANCE_SIZE)

# Calculate destination coordinates
to_rows = from_rows + torch.where(directions == 0, -distances, torch.where(directions == 1, distances, torch.zeros_like(distances)))
to_cols = from_cols + torch.where(directions == 2, -distances, torch.where(directions == 3, distances, torch.zeros_like(distances)))

# Mask for valid board positions
valid_mask = (
    (from_rows >= 0) & (from_rows < BOARD_WIDTH) &
    (from_cols >= 0) & (from_cols < BOARD_WIDTH) &
    (to_rows >= 0) & (to_rows < BOARD_WIDTH) &
    (to_cols >= 0) & (to_cols < BOARD_WIDTH)
)

assert valid_mask.sum() == LABEL_SIZE, f"Expected {LABEL_SIZE} valid moves, but got {valid_mask.sum()}"

# TODO: Remove positions moving from corner 
label_index_by_move = [Move(Coord(0, 0), Coord(0, 0))] * LABEL_SIZE

def apply_direction(row, col, direction, distance):
    assert distance > 0
    
    if direction == 0:   # Up
        return row - distance, col
    elif direction == 1: # Down
        return row + distance, col
    elif direction == 2: # Left
        return row, col - distance
    elif direction == 3: # Right
        return row, col + distance

i = 0
for from_row in range(BOARD_WIDTH):
    for from_col in range(BOARD_WIDTH):
        for direction in range(4):
            for distance in range(1, DISTANCE_SIZE + 1):
                to_row, to_col = apply_direction(from_row, from_col, direction, distance)

                from_pos = Coord(from_row, from_col)
                to_pos = Coord(to_row, to_col)

                if valid_mask[from_row, from_col, direction, distance - 1]:
                    label_index_by_move[i] = Move(from_pos, to_pos)
                    i += 1
                    

def encode_move_to_index(move: Move):
    row, col, = move.from_pos.x, move.from_pos.y
    diff = move.to_pos - move.from_pos
    movement = diff.x if diff.x != 0 else diff.y
    direction = movement / abs(movement)
    distance = abs(movement)

    assert distance != 0

    return int(row * BOARD_WIDTH * 4 * DISTANCE_SIZE + col * 4 * DISTANCE_SIZE + direction * DISTANCE_SIZE + distance)


class SimpleDefendersModel(nn.Module):
    def __init__(self):
        super(SimpleDefendersModel, self).__init__()
        # self.flatten = nn.Flatten()
        self.encode = nn.functional.one_hot
        self.nn = nn.Sequential(
            nn.Linear(BOARD_WIDTH*BOARD_WIDTH*4, 1024),
            nn.ReLU(),
            # nn.Linear(512, 512),
            # nn.ReLU(),
            nn.Linear(1024, LABEL_SIZE),
            nn.Softmax(dim=-1)  # Softmax to get probabilities for each move
        )

    def forward(self, x):
        # x = self.flatten(x)
        # TODO: Replace with 3 classes, where all digits being off represents None
        x = self.encode(x, num_classes=4)
        x = x.type(torch.float32).flatten().reshape(1,-1)
        logits = self.nn(x)
        return logits
    
model = SimpleDefendersModel().to(device)

def find_ai_move(game, player):
    # Flatten the board and get the value of each piece
    x = torch.tensor(game.board).reshape(-1, 1)

    # Model prediction
    logits = model(x)
    logits = logits.flatten()

    while True:
        idx = torch.argmax(logits).item()
        assert logits[idx] != 0, "No valid moves found"

        # Recover indices
        move = label_index_by_move[idx]

        if game.is_valid_move(move.from_pos, move.to_pos):
            return move
        else:
            # Set the probability of this move to zero and continue
            logits[idx] = 0
            continue

def test_ai_move():
    game = Game()
    game.fill_board_13_by_13()
    move = find_ai_move(game, Player.DEFENDER)

test_ai_move()

Using cpu device


In [28]:
INITIAL_REWARD = 10.
DECAY_FACTOR = 0.9

LEARNING_RATE = 5

# Will optimize the model after the match history, rewarding or punisning all the moves leading to the final, diminishing as it goes backwards 
# It replaces the values of (board, move) in game_history with (board, target_Y) or (board, target_label) depending on how you.. label it..
def assign_rewards(winner, game_history):
    reward = INITIAL_REWARD if winner == Player.DEFENDER else -INITIAL_REWARD

    for i in range(len(game_history) - 1, -1, -1):
        board, move = game_history[i]
        assert isinstance(move, Move), f"Expected Move, got {type(move)}"

        target = torch.zeros(LABEL_SIZE)

        label_index = label_index_by_move.index(move)
        target[label_index] = reward

        game_history[i] = (board, target)

        reward *= DECAY_FACTOR

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

def train(winner, game_history):
    assign_rewards(winner, game_history)

    for board_state, target in game_history:
        board_state = torch.tensor(board_state)
        target = target.reshape(1, -1)

        # Compute prediction and loss
        pred = model(board_state) # TODO: replace with already predicted values
        loss = loss_fn(pred, target)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

In [29]:
import random

def find_random_move(game: Game, player: Player):
    valid_from_positions = [Coord(x, y) for x, row in enumerate(game.board) for y, piece in enumerate(row) if piece in player]
    assert len(valid_from_positions) > 0, "No valid from positions found"

    while True:
        from_pos = random.choice(valid_from_positions)

        valid_to_positions = [Coord(x, y) for x in range(13) for y in range(13) if game.is_valid_move(from_pos, Coord(x, y))]
        if len(valid_to_positions) < 1:
            continue  # No valid moves from this position, try another from_pos

        to_pos = random.choice(valid_to_positions)

        return Move(from_pos, to_pos)

def find_human_move(game: Game, player: Player):
    while True:
        try:
            from_x = int(input(f"Player {player}, enter the x-coordinate of the piece to move (0-12): "))
            from_y = int(input(f"Player {player}, enter the y-coordinate of the piece to move (0-12): "))
            to_x = int(input(f"Player {player}, enter the x-coordinate to move to (0-12): "))
            to_y = int(input(f"Player {player}, enter the y-coordinate to move to (0-12): "))

            from_pos = Coord(from_x, from_y)
            to_pos = Coord(to_x, to_y)

            if game.is_valid_move(from_pos, to_pos):
                return Move(from_pos, to_pos)
            else:
                print("Invalid move. Please try again.")
        except ValueError:
            print("Invalid input. Please enter integers between 0 and 12.")

In [30]:
import time

def single_game():
    game = Game()
    game.fill_board_13_by_13()

    player = Player.DEFENDER  # Attacker starts the game
    # print(f"Starting game with player: {player}")

    game_history = []

    # moves = 0
    # game_time = time.time()
    # thinking_times = []
    winner = None
    while winner == None:

        try:
            if player == Player.DEFENDER:
                move = find_ai_move(game, player)
            else:
                move = find_random_move(game, player)
        except:
            print()
            print()
            print("### ERROR ENCOUNTERED ###")
            game.print_board()
            print()
            print(f"Current player: {player}")

        game.move_piece_and_attack(move.from_pos, move.to_pos)
        
        # Switch players
        player = Player.ATTACKER if player == Player.DEFENDER else Player.DEFENDER

        # if moves % 100 == 0:
            # print(f"Moves: {moves}, Time: {time.time() - game_time:.2f} seconds")
            # print(f"Average thinking time for AI: {sum(thinking_times) / len(thinking_times):.2f} seconds" if len(thinking_times) > 0 else "")
            # game.print_board()

        winner = game.is_game_over()
        game_history.append((game.board, move))
    # game.print_board()
    # attacker_count = sum(piece == Piece.ATTACKER for row in game.board for piece in row)
    # defender_count = sum(piece == Piece.DEFENDER for row in game.board for piece in row)
    # print(f"Attackers left: {attacker_count}, Defenders left: {defender_count}")
    # print(f"Game Over after {moves} moves, in {int(time.time() - game_time)} seconds")

    return winner, game_history


In [None]:
import time

ITERATIONS = 10

time_start = time.time()
time_epoch_start = time_start

GAMES_TRACK_SIZE = 10
last_games = [None for _ in range(GAMES_TRACK_SIZE)]

for i in range(1, ITERATIONS + 1):
    winner, game_history = single_game()
    train(winner, game_history)

    last_games[i % GAMES_TRACK_SIZE] = winner
    
    if i % (ITERATIONS/10) == 0 or i == ITERATIONS:
        wins_NN = last_games.count(Player.DEFENDER)
        wins_Random = last_games.count(Player.ATTACKER)
        seconds = time.time() - time_start
        minutes, seconds = divmod(seconds, 60)
        hours, minutes = divmod(minutes, 60)
        print(f"Game {i}\tProgress: {i / ITERATIONS * 100:.0f}%\tWin rate: {wins_NN/(wins_NN+wins_Random)*100:.2f}%\tTime: {hours:.0f}h {minutes:.0f}m {seconds:.0f}s (+{time.time() - time_epoch_start:.0f}s)")
        time_epoch_start = time.time()

Game 1	Progress: 100%	Win rate: 0.00%	Time: 0h 0m 11s (+11s)
