In [3]:
import torch
import torch.nn as nn
import numpy as np
from single_agent_env import SinglePlayerFootball, ACTION_SPACE_SIZE, STATE_SPACE_SIZE
from RL import DeepQNetworkAgent
from RL.utils import ReplayBuffer
torch.manual_seed(3407)
np.random.seed(3407)

In [4]:
class DQN(nn.Module):

    def __init__(self, input_shape, output_shape) -> None:
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_shape, 512),
            nn.LeakyReLU(),
            nn.Linear(512, 256),
            nn.LeakyReLU(),
            nn.Linear(256, 128),
            nn.LeakyReLU(),
            nn.Linear(128, output_shape)
        )

    def forward(self, x):
        return self.model(x)


In [None]:
MAX_REPLAY_BUFFER = 1_000_000
BATCH_SIZE = 64
TARGET_NET_UPDATE_FREQ = 5
SAVE_INTERVAL = 10000
CURRENT_TRAIN_ID = f'2023-02-28'

torch.manual_seed(3407)
np.random.seed(3407)
env = SinglePlayerFootball(title="DQN train")
agent = DeepQNetworkAgent(STATE_SPACE_SIZE, ACTION_SPACE_SIZE, device="cuda:0")
agent.create_model(DQN, lr=0.00025, y=0.99, e_decay=0.9999999, batchs=BATCH_SIZE, target_update_freq=TARGET_NET_UPDATE_FREQ)
agent.create_buffer(ReplayBuffer(MAX_REPLAY_BUFFER, 50_000, STATE_SPACE_SIZE))

scores = []
action = None

while env.running:
    rewards = []
    state = env.env.reset()
    while not env.loop_once():
        action = agent.policy(state)
        n_state, reward, done = env.step(action)
        agent.learn(state, action, n_state, reward, done)
        state = n_state
    scores.append(sum(rewards))

print(agent.step_count, agent.train_count)
env.rendering = True
for _ in range(10):
    s = env.reset()
    while not env.loop_once():
        s, _, _ = env.step(agent.policy(s))
del env