### Necessary Imports  and Required Jupyter Notebooks

In [1]:
import os
import numpy as np
import random
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from tqdm import trange


In [None]:
%run AttaxGame.ipynb
%run AlphaZero_Attax.ipynb
%run NeuralNetwork_Attax.ipynb



## AlphaZero Agent for Attax Game

This notebook implements an intelligent agent that utilizes the AlphaZero algorithm to play the Attax game. The `AlphaZeroAgent` class encapsulates the logic required for the agent to interact with the game environment, make strategic decisions, and learn from its experiences.

Key components of the `AlphaZeroAgent` class include:

- **Initialization**: Set up the agent with the specific game it will play and load the pre-trained AlphaZero model.
- **Model Loading**: Depending on the game dimensions (4x4, 5x5, or 6x6), load the corresponding neural network model trained to evaluate game states and generate probable moves.
- **Move Making**: Use a Monte Carlo Tree Search (MCTS) guided by the neural network to simulate games and decide on the most promising move.
- **Response Handling**: Process responses from the game server, which includes updating the game state based on opponent moves and handling end-game scenarios.

The following code block defines the `AlphaZeroAgent` class and its critical methods. It is designed to integrate seamlessly with the Attax game server, receiving game updates, and sending moves back in a predefined communication protocol.


In [None]:
class AlphaZeroAgent:
    
    """
    Agent class that uses the AlphaZero algorithm to play games.
    """

    def __init__(self, Game):

        """
        Initializes the AlphaZero agent with the game it will play.
        """

        self.Game = Game
        self.alpha_zero = self.load_alpha_zero()
        self.turn = -1  # Keeps track of whose turn it is (-1 for opponent, 1 for agent)




    def load_alpha_zero(self):

        """

        Loads the appropriate AlphaZero model based on the game being played.

        """

        game = AttaxGame()  # Initializes the game

        # Load the model file corresponding to the game size
        model_files = {
            "A4x4": '/Users/larasousa/Desktop/uni/labiacd/Attax/model8Attax4.pt',
            "A5x5": '/Users/larasousa/Desktop/uni/labiacd/Attax/model14Attax5.pt',
            "A6x6": '/Users/larasousa/Desktop/uni/labiacd/Attax/model7Attax6.pt',
        }
        model_file = model_files.get(self.Game, "Unknown game type")
        
        # Check if GPU is available and set the device accordingly
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Set the game board size based on the game type
        game_sizes = {"A4x4": 4, "A5x5": 5, "A6x6": 6}
        game.N = game_sizes.get(self.Game)

        # Initialize the game state and model
        game.action_size = game.N * game.N
        state = game.get_initial_state()
        model = ResNet(game, state, 1, 4, 64, device)
        model.load_state_dict(torch.load(model_file, map_location=torch.device('cpu')))
        model.eval()

        # Create an AlphaZero instance with the loaded model and other dependencies
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

        # Training parameters
        args = {
            'C': 2, # Exploration constant for the UCT formula
            'num_searches': 100, # Number of searches to run for each move
            'num_iterations': 500, # Number of iterations to run the training
            'num_selfPlay_iterations': 1000, # Number of self play iterations
            'num_epochs': 50, # Number of epochs to run the training
            'batch_size': 64, # Batch size
            'epsilon': 0.25, # Epsilon for the Dirichlet noise
            'alpha': 0.3 # Alpha for the Dirichlet noise
        }

        az = AlphaZero(model, optimizer, game, args)
        return az



    def make_move(self, agent):
        """
        Determines the move to be made by the AlphaZero agent.
        """
        # Update the turn based on the agent number
        self.turn = 1 if agent == 1 else -1

        # Perform self-play to determine the next move
        memory = self.alpha_zero.selfPlay()

        # Select the last state and action probabilities from memory
        last_state, last_action_probs, _ = memory[-1]

        # Choose an action based on the probabilities
        action = np.random.choice(list(last_action_probs.keys()), p=list(last_action_probs.values()))
        return action

    def receive_response(self, response, ag):
        """
        Processes the server's response and updates the game state accordingly.
        """
        # Update the turn based on the agent number
        self.turn = -1 if ag == 1 else 1

        # Process the server's response, updating the game state or handling game end
        if 'MOVE' in response:
            # Parse the move from the response
            _, move_str = response.split(' ')
            coords = move_str.split(',')
            if len(coords) == 4:
                # Update the game state with the received move
                move_formatted = f"{coords[0]}{coords[1]}_{coords[2]}{coords[3]}"
                self.alpha_zero.update_state(move_formatted)
            else:
                print("Unexpected response format:", response)

        if 'END GAME' in response:
            # Handle the end of the game
            return False

        # Continue the game if no end condition is met
        return True
