In [None]:
import torch
import torch.nn as nn
import numpy as np
from RL import ReinforceAgent
from single_agent_env import SinglePlayerFootball, ACTION_SPACE_SIZE, STATE_SPACE_SIZE
torch.manual_seed(3407)
torch.cuda.manual_seed(3407)
np.random.seed(3407)

In [None]:
class PG(nn.Module):

    def __init__(self, observation_size, action_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(observation_size, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 32),
            nn.LeakyReLU(),
            nn.Linear(32, action_size),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
train_id = "RI_fixed_ball_normalized_scratch"
env = SinglePlayerFootball(title=train_id)
agent = ReinforceAgent(STATE_SPACE_SIZE, ACTION_SPACE_SIZE, device="cuda:1")
agent.create_model(PG, lr=0.0003, y=0.99, reward_norm_factor=300)
while env.running:
    state = env.reset(random_ball=False)
    while not env.loop_once():
        action = agent.policy(state)
        next_state, reward, done = env.step(action)
        agent.learn(state, action, next_state, reward, done)
        state = next_state
del env

In [None]:
with open(f'{train_id}_rewards.txt', 'w') as f:
    f.writelines([f"{round(item, 6)}\n" for item in agent.reward_history])