Creating the Neural Network Class

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

from abalone import Abalone


class GameNetwork(nn.Module):
    def __init__(self, input_size, action_size, hidden_size=128):
        super(GameNetwork, self).__init__()

        # שכבה ראשית משותפת
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()

        # ראש מדיניות (Policy Head)
        self.policy_head = nn.Linear(hidden_size, action_size)
        self.softmax = nn.Softmax(dim=-1)

        # ראש ערך (Value Head)
        self.value_head = nn.Linear(hidden_size, 1)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.relu(self.fc1(x))

        policy_logits = self.policy_head(x)
        policy = self.softmax(policy_logits)

        value = self.tanh(self.value_head(x))

        return policy, value

    def save_model(self, path="game_network.pth"):
        torch.save(self.state_dict(), path)

    def load_model(self, path="game_network.pth"):
        self.load_state_dict(torch.load(path))
        self.eval()


# בדיקה בסיסית
if __name__ == "__main__":
    input_size = 81  # לדוגמה, 9x9 לוח ה-Abalone מקודד
    action_size = 60  # לדוגמה, מספר מהלכים חוקיים פוטנציאליים
    model = GameNetwork(input_size, action_size)

    test_input = torch.rand((1, input_size))
    policy, value = model(test_input)

    print("Policy Output Shape:", policy.shape)
    print("Value Output:", value)


Policy Output Shape: torch.Size([1, 60])
Value Output: tensor([[-0.1017]], grad_fn=<TanhBackward0>)
