In [30]:
%load_ext autoreload 
%autoreload 2
import sys
sys.tracebacklimit = 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
from pettingzoo.test import parallel_api_test
from solution.custom_gym import CustomGymEnviornment
from solution.trainer import *


In [32]:
from core.agent import *
from core.world import * 
from core.map import *
from core.render import * 
from core.env_params import * 
from core.skill import * 
from core.models import *
from core.env_params import *

In [33]:
def initialize_swarm(world : World):
    swarm = [Agent() for i in range(20)]
    skill_initializer = DefaultSkillInitializer(num_skills = PRODUCT_TYPES)
    utility_initializer = UtilitySampler()

    for agent in swarm:
        skill_initializer.forward(agent)
        utility_initializer.forward(agent)
        world.add_agent(agent)
    swarm = initialize_positions_randomly(world, swarm)
    

In [34]:
world = World(dims = WORLD_DIMS,
              swarm_initialzier= initialize_swarm,
              resource_generator= RandomMapGenerator(RESOURCE_TYPES),
              energy_model=EnergyModel(),
              chemistry_model= ChemistryModel()
              )
world.reset()

In [35]:
custom_gym : CustomGymEnviornment = CustomGymEnviornment(world)

In [36]:
custom_gym.reset(42)

({'1': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '2': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '3': {'vision': array([[0, 0, 3, 3, 3, 3, 3],
          [3, 3, 3, 3, 3, 3, 3],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 3, 3, 3, 3],
          [4, 4, 3, 3, 3, 3, 3]])},
  '4': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '5

### Testing

In [37]:
parallel_api_test(custom_gym, num_cycles=1_000_000)
custom_gym.reset()

Passed Parallel API test


({'1': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '2': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '3': {'vision': array([[0, 0, 3, 3, 3, 3, 3],
          [3, 3, 3, 3, 3, 3, 3],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 3, 3, 3, 3],
          [4, 4, 3, 3, 3, 3, 3]])},
  '4': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '5

# Training

In [38]:

from models.base import * 
from models.idqn import * 
from solution.policy_net import PolicyNet

In [39]:
model = IDQN(custom_gym, PolicyNet(1, 8, 12), PolicyNet(1, 8, 12))

In [27]:
train_loop(custom_gym, model, games=10, seed=42)

Training on thesis.
Using cuda device
-------------------------------
| time/              |        |
|    fps             | 13085  |
|    iterations      | 1      |
|    time_elapsed    | 62     |
|    total_timesteps | 819200 |
-------------------------------
Model has been saved.
-------------------------------
| time/              |        |
|    fps             | 13149  |
|    iterations      | 1      |
|    time_elapsed    | 62     |
|    total_timesteps | 819200 |
-------------------------------


KeyboardInterrupt: 