In [56]:
%load_ext autoreload 
%autoreload 2
import sys
sys.tracebacklimit = 0

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [57]:
from pettingzoo.test import parallel_api_test
from solution.custom_gym import CustomGymEnviornment
from solution.trainer import *
from solution.policy_net import *

In [58]:
from core.agent import *
from core.world import * 
from core.map import *
from core.render import * 
from core.env_params import * 
from core.skill import * 
from core.models import *
from core.env_params import *

In [59]:
def initialize_swarm(world : World):
    swarm = [Agent() for i in range(20)]
    skill_initializer = DefaultSkillInitializer(num_skills = PRODUCT_TYPES)
    utility_initializer = UtilitySampler()

    for agent in swarm:
        skill_initializer.forward(agent)
        utility_initializer.forward(agent)
        world.add_agent(agent)
    swarm = initialize_positions_randomly(world, swarm)
    

In [60]:
world = World(dims = WORLD_DIMS,
              swarm_initialzier= initialize_swarm,
              resource_generator= RandomMapGenerator(RESOURCE_TYPES),
              energy_model=EnergyModel(),
              chemistry_model= ChemistryModel()
              )
world.reset()

In [61]:
custom_gym : CustomGymEnviornment = CustomGymEnviornment(world)

In [62]:
custom_gym.reset(42)

({'1': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '2': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '3': {'vision': array([[0, 0, 3, 3, 3, 3, 3],
          [3, 3, 3, 3, 3, 3, 3],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 3, 3, 3, 3],
          [4, 4, 3, 3, 3, 3, 3]])},
  '4': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '5

### Testing

In [63]:
parallel_api_test(custom_gym, num_cycles=1_000_000)
custom_gym.reset()

Passed Parallel API test


({'1': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [3, 3, 0, 0, 0, 1, 1],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '2': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '3': {'vision': array([[0, 0, 3, 3, 3, 3, 3],
          [3, 3, 3, 3, 3, 3, 3],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 0, 0, 0, 0],
          [3, 3, 3, 3, 3, 3, 3],
          [4, 4, 3, 3, 3, 3, 3]])},
  '4': {'vision': array([[0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]])},
  '5

# Training

In [64]:

from models.base import * 
from models.idqn import * 
from solution.policy_net import PolicyNet

In [65]:
model = IDQN(env = custom_gym,
             feature_extractor= feature_extractor,
             policy_net= PolicyNet(1, 8, 12), 
             target_net=  PolicyNet(1, 8, 12))

In [66]:
train_loop(custom_gym, model, games=10, seed=42)

Training on thesis.
{'1': tensor([3]), '2': tensor([3]), '3': tensor([4]), '4': tensor([10]), '5': tensor([5]), '6': tensor([0]), '7': tensor([5]), '8': tensor([9]), '9': tensor([0]), '10': tensor([7]), '11': tensor([2]), '12': tensor([11]), '13': tensor([5]), '14': tensor([10]), '15': tensor([0]), '16': tensor([4]), '17': tensor([7]), '18': tensor([0]), '19': tensor([9]), '20': tensor([10])}
{'1': tensor([3]), '2': tensor([4]), '3': tensor([10]), '4': tensor([6]), '5': tensor([10]), '6': tensor([11]), '7': tensor([11]), '8': tensor([0]), '9': tensor([9]), '10': tensor([6]), '11': tensor([0]), '12': tensor([5]), '13': tensor([5]), '14': tensor([8]), '15': tensor([1]), '16': tensor([11]), '17': tensor([5]), '18': tensor([6]), '19': tensor([6]), '20': tensor([4])}
{'1': tensor([8]), '2': tensor([9]), '3': tensor([7]), '4': tensor([1]), '5': tensor([4]), '6': tensor([6]), '7': tensor([2]), '8': tensor([0]), '9': tensor([4]), '10': tensor([11]), '11': tensor([10]), '12': tensor([11]), '13'

TypeError: conv2d() received an invalid combination of arguments - got (list, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias = None, tuple of ints stride = 1, tuple of ints padding = 0, tuple of ints dilation = 1, int groups = 1)
      didn't match because some of the arguments have invalid types: (!list of [dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict, dict]!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, !int!)
 * (Tensor input, Tensor weight, Tensor bias = None, tuple of ints stride = 1, str padding = "valid", tuple of ints dilation = 1, int groups = 1)
      didn't match bec