In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from single_agent_env import SinglePlayerFootball, ACTION_SPACE_SIZE, STATE_SPACE_SIZE
from RL import DeepQNetworkAgent
from RL.utils import ReplayBuffer
torch.manual_seed(3407)
torch.cuda.manual_seed(3407)
np.random.seed(3407)

In [None]:
class DQN(nn.Module):

    def __init__(self, input_shape, output_shape) -> None:
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_shape, 1024),
            nn.LeakyReLU(),
            nn.Linear(1024, 512),
            nn.LeakyReLU(),
            nn.Linear(512, 256),
            nn.LeakyReLU(),
            nn.Linear(256, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 64),
            nn.LeakyReLU(),
            nn.Linear(64, output_shape),
        )

    def forward(self, x):
        return self.model(x)


In [None]:
env = SinglePlayerFootball(title="DQN train, reward -0.01")
agent = DeepQNetworkAgent(STATE_SPACE_SIZE, ACTION_SPACE_SIZE, device="cuda:0")
agent.create_model(DQN, lr=0.0001, y=0.99, e_decay=0.999995, batchs=64)
agent.create_buffer(ReplayBuffer(1_000_000, 100_000, STATE_SPACE_SIZE))
scores = []
while env.running:
    rewards = []
    state = env.football.reset()
    while not env.loop_once():
        action = agent.policy(state)
        n_state, reward, done = env.step(action)
        agent.learn(state, action, n_state, reward, done, update="soft")
        state = n_state
        rewards.append(reward)
    scores.append(sum(rewards))
del env

In [None]:
env = SinglePlayerFootball(title="DQN train, reward -1")
for _ in range(10):
    s = env.reset()
    while not env.loop_once():
        s, _, _ = env.step(agent.policy(s))
del env