**Following notebook is written as a guide to test the agent in your own notebook**
1.   copy and paste all the following three notes in your code.

2.   We have already trined a model which you should load on the agent
3.   First copy the agent from the following note

In [None]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque

# Defines the neural network architecture
class ComplexDQN(nn.Module):
    def __init__(self, input_size, output_size):
        super(ComplexDQN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 512)
        self.fc4 = nn.Linear(512, 256)
        self.fc5 = nn.Linear(256, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        return self.fc5(x)


# Defines constants
input_size = 5 * 5  # Size of the flattened game board
output_size = 44     # Number of possible moves (from_pos, move)
hidden_size = 64    # Size of the hidden layer in the neural network
learning_rate = 0.001
epsilon = 0.01       # Epsilon for epsilon-greedy policy
batch_size = 32     # Batch size for training the neural network
memory_size = 10000 # Size of the experience replay buffer

# Let's define Deep RL player
class DeepRLPlayer(Player):
    def __init__(self) -> None:
        super().__init__()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = ComplexDQN(input_size, output_size).to(self.device)
        self.target_model = ComplexDQN(input_size, output_size).to(self.device)
        self.target_model.load_state_dict(self.model.state_dict())
        self.target_model.eval()
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.memory = deque(maxlen=memory_size)
        self.steps_done = 0
        self.gamma = 0.99  # Discount factor for future rewards

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        state = torch.tensor(game.get_board().flatten(), dtype=torch.float32).unsqueeze(0).to(self.device)

        # Choose action using epsilon-greedy policy
        if random.random() < epsilon:
            action = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
            from_pos = (random.randint(0, 4), random.randint(0, 4))
        else:
            with torch.no_grad():
                q_values = self.model(state)
                from_pos = classes[torch.argmax(q_values).item()][0]
                action = classes[torch.argmax(q_values).item()][1]
        return from_pos, action

    def train(self):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*minibatch)
        states = torch.tensor(states, dtype=torch.float32).to(self.device)
        actions = torch.tensor(actions, dtype=torch.int64).to(self.device)
        rewards = torch.tensor(rewards, dtype=torch.float32).to(self.device)
        next_states = torch.tensor(next_states, dtype=torch.float32).to(self.device)
        dones = torch.tensor(dones, dtype=torch.float32).to(self.device)

        current_q_values = self.model(states).gather(1, actions.unsqueeze(1))
        next_q_values = self.target_model(next_states).max(1)[0].detach()
        target_q_values = rewards + (1 - dones) * self.gamma * next_q_values

        loss = F.smooth_l1_loss(current_q_values, target_q_values.unsqueeze(1))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Update target network
        if self.steps_done % target_update == 0:
            self.target_model.load_state_dict(self.model.state_dict())

        self.steps_done += 1

    def store_experience(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

# Define constants
target_update = 1000  # Frequency of updating the target network

Second, create a directory and move the train model to it.

In [None]:
import os
# Define a directory to save the trained models
MODEL_DIR = 'trained_models'
os.makedirs(MODEL_DIR, exist_ok=True)

Third, run the following note, it creates the Deep RL agent in player 2 and loads the model on it. player one is considered to be Random Player, which you may change it.

In [None]:
from tqdm import tqdm
model_filename = "deep_rl_player_10.pth, TrainWinRate_82.53,roundNum_ 10000, lr_, 0.001, epsilon_0.01, structure_ 256_512"

# Function to load model parameters
def load_model(model, filename):
    filepath = os.path.join(MODEL_DIR, filename)
    model.load_state_dict(torch.load(filepath))
    print(f"Model loaded from '{filepath}'")

g = Game()
player1 = RandomPlayer()
player2 = DeepRLPlayer()

# Load the trained model
load_model(player2.model, model_filename)
print("Model loaded.")

Enjoy testing!