In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import sys
sys.tracebacklimit = 0

import numpy as np
import networkx as nx 
import matplotlib.pyplot as plt

In [3]:
from pettingzoo.test import parallel_api_test
from solution.custom_gym import CustomGymEnviornment
from solution.trainer import *
from solution.policy_net import *

In [4]:
from core.agent import *
from core.world import * 
from core.render import * 
from core.skill import * 
from core.models import *
from core.message import *

In [5]:
DEVICE = "cuda"
BELIEF_DIMS = 5


In [6]:
from sar.sar_agent import *
from sar.sar_world import *
from sar.sar_env_params import *
from sar.sar_traits_sampler import *

belief_initializer = SARBeliefInitializer(BELIEF_DIMS)
trait_sampler = SARTraitSampler()

def initialize_swarm(world : BaseWorld):
    swarm = trait_sampler.generate(SWARM_SIZE, DEVICE)
    for agent in swarm:
        agent.set_utility(SARUtilityFunction())
        world.add_agent(agent)
    swarm = initialize_positions_randomly(world, swarm)
    swarm = belief_initializer.initialize_beliefs(swarm)

In [7]:

from sar.urban_gen import * 
from sar.victims import * 
from sar.sar_comm import *
from utils.geo import * 

terrain_generator = UrbanTerrainMapGenerator(padding = MAX_VISIBILITY)
victim_generator = VictimGenerator(padding = MAX_VISIBILITY)

GEO_TIFF_PATH = "C:\\Users\\Joaquin\\Desktop\\Thesis-Playground\\datasets\\output_SRTMGL1.tif"

def initialize_terrain(world : BaseWorld):
    terrain_map, population_map = terrain_generator.generate(world._dims)
    geo_map = read_tiff(GEO_TIFF_PATH)[0]
    tx, ty = geo_map.shape

    if tx > world._dims[0] or ty > world._dims[1]:
        start_x = np.random.randint(0, tx - world._dims[0] + 1)
        start_y = np.random.randint(0, ty - world._dims[1] + 1)
        height_map = geo_map[start_x:start_x + world._dims[0], start_y:start_y + world._dims[1]]
        minimum = height_map.min()
        maximum = height_map.max() 
        terrain_map = TerrainMap(height_map, terrain_map._padding, minimum, maximum)


    map_collection : BaseMapCollection = BaseMapCollection()
    map_collection.add_map("Terrain", terrain_map)
    map_collection.add_map("Population", population_map)

    total_victims = np.random.randint(300, 400)
    victim_generator.set_density_map(population_map)
    victim_map = victim_generator.generate(world._dims, total_victims)

    map_collection.add_map("Victims", victim_map)
    return map_collection


In [8]:
from sar.energy import EnergyModel
from sar.victims import VictimModel
from solution.sar_action_interpreter import *
from solution.encoder_net import *
from solution.decoder_net import *
from solution.complex_model import * 

world = SARWorld(dims = WORLD_DIMS,
              swarm_initializer= initialize_swarm,
              generation_pipeline=initialize_terrain
              )
world.add_model("energy_model", EnergyModel())
world.add_model("victim_model", VictimModel())
world.reset()

In [9]:
grid_size = 2 * 3 + 1

complex_model = ComplexModel(
    action_dims= 4,
    belief_dims= BELIEF_DIMS,
    grid_size=grid_size, 
    latent_dims= 16, 
    state_dims= 5,
    trait_dims= 3,
    packet_dims= 2, 
)

comms_protocol = SARCommunicationProtocol(complex_model._encoder_net, complex_model._decoder_net)
action_interpreter = SARActionInterpreter(BELIEF_DIMS)

custom_gym : CustomGymEnviornment = CustomGymEnviornment(world, action_interpreter, comms_protocol)

complex_model.to(DEVICE)
custom_gym.to(DEVICE)

In [10]:
custom_gym.reset(42)

({1: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([71.3182,  0.0000,  0.0000,  0.3500,  0.2000], device='cuda:0')},
  2: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]),
   'State': tensor([139.2945,   0.0000,   0.0000,   0.2500,   0.1600], device='cuda:0')},
  3: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([110.2918,   0.0000,   0.0000,   0.3800,   0.8500], device='cuda:0')},
  4: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([97.7534,  0.0000,  0.0000,  0.5600,  0.

### Testing

In [11]:
parallel_api_test(custom_gym, num_cycles=1_000)
custom_gym.reset()

Passed Parallel API test


({1: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([71.3182,  0.0000,  0.0000,  0.3500,  0.2000], device='cuda:0')},
  2: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]),
   'State': tensor([139.2945,   0.0000,   0.0000,   0.2500,   0.1600], device='cuda:0')},
  3: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([110.2918,   0.0000,   0.0000,   0.3800,   0.8500], device='cuda:0')},
  4: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([97.7534,  0.0000,  0.0000,  0.5600,  0.

# Training

In [12]:

from models.base import * 
from models.idqn import * 
import matplotlib.pyplot as plt

In [None]:
model = IDQN(env = custom_gym,
             feature_extractor= feature_extractor,
             target_net= complex_model.get_target_net(),
             model= complex_model,
             batch_size=512,
             device = DEVICE,
             buffer_size= custom_gym._max_time_steps * 10
             )

In [None]:
rewards = train_loop(custom_gym, model, games=100, optimization_passes = 2, seed=42)

Training on thesis.


  return F.mse_loss(input, target, reduction=self.reduction)


Average loss 25.352660527825357
Model has been saved.

Starting evaluation on thesis (num_games=1)


Training Progress:   1%|          | 1/100 [00:11<19:27, 11.80s/it]

Avg reward: 1.22  std: 1.1097747519204066  coeff : 0.9096514360003334
Avg reward per agent, per game:  {1: 2.0, 2: 3.0, 3: 2.0, 4: 0.0, 5: 0.0, 6: 1.0, 7: 2.0, 8: 2.0, 9: 1.0, 10: 1.0, 11: 0.0, 12: 1.0, 13: 1.0, 14: 2.0, 15: 0.0, 16: 0.0, 17: 1.0, 18: 1.0, 19: 0.0, 20: 1.0, 21: 0.0, 22: 3.0, 23: 1.0, 24: 0.0, 25: 1.0, 26: 0.0, 27: 2.0, 28: 1.0, 29: 1.0, 30: 1.0, 31: 1.0, 32: 0.0, 33: 2.0, 34: 0.0, 35: 1.0, 36: 1.0, 37: 3.0, 38: 0.0, 39: 1.0, 40: 2.0, 41: 3.0, 42: 3.0, 43: 0.0, 44: 1.0, 45: 3.0, 46: 1.0, 47: 3.0, 48: 4.0, 49: 0.0, 50: 0.0, 51: 1.0, 52: 1.0, 53: 1.0, 54: 1.0, 55: 0.0, 56: 0.0, 57: 0.0, 58: 1.0, 59: 2.0, 60: 5.0, 61: 2.0, 62: 1.0, 63: 3.0, 64: 0.0, 65: 1.0, 66: 0.0, 67: 2.0, 68: 2.0, 69: 1.0, 70: 1.0, 71: 1.0, 72: 0.0, 73: 1.0, 74: 1.0, 75: 1.0, 76: 1.0, 77: 1.0, 78: 1.0, 79: 0.0, 80: 3.0, 81: 3.0, 82: 3.0, 83: 0.0, 84: 4.0, 85: 1.0, 86: 0.0, 87: 1.0, 88: 3.0, 89: 1.0, 90: 1.0, 91: 1.0, 92: 2.0, 93: 2.0, 94: 0.0, 95: 0.0, 96: 1.0, 97: 0.0, 98: 3.0, 99: 0.0, 100: 1.0}
Aver

Training Progress:   2%|▏         | 2/100 [00:33<29:04, 17.80s/it]

Avg reward: 1.15  std: 1.1169153951844337  coeff : 0.9712307784212468
Avg reward per agent, per game:  {1: 2.0, 2: 0.0, 3: 1.0, 4: 1.0, 5: 0.0, 6: 2.0, 7: 2.0, 8: 0.0, 9: 0.0, 10: 2.0, 11: 2.0, 12: 1.0, 13: 0.0, 14: 2.0, 15: 0.0, 16: 1.0, 17: 2.0, 18: 0.0, 19: 0.0, 20: 1.0, 21: 2.0, 22: 1.0, 23: 1.0, 24: 1.0, 25: 2.0, 26: 0.0, 27: 2.0, 28: 0.0, 29: 0.0, 30: 1.0, 31: 1.0, 32: 0.0, 33: 0.0, 34: 0.0, 35: 1.0, 36: 4.0, 37: 2.0, 38: 0.0, 39: 1.0, 40: 0.0, 41: 0.0, 42: 1.0, 43: 0.0, 44: 3.0, 45: 5.0, 46: 2.0, 47: 0.0, 48: 1.0, 49: 0.0, 50: 1.0, 51: 0.0, 52: 3.0, 53: 1.0, 54: 1.0, 55: 1.0, 56: 3.0, 57: 2.0, 58: 1.0, 59: 0.0, 60: 2.0, 61: 0.0, 62: 0.0, 63: 1.0, 64: 2.0, 65: 1.0, 66: 4.0, 67: 1.0, 68: 2.0, 69: 1.0, 70: 0.0, 71: 0.0, 72: 0.0, 73: 3.0, 74: 1.0, 75: 1.0, 76: 1.0, 77: 2.0, 78: 4.0, 79: 0.0, 80: 0.0, 81: 1.0, 82: 1.0, 83: 1.0, 84: 3.0, 85: 2.0, 86: 0.0, 87: 1.0, 88: 1.0, 89: 2.0, 90: 1.0, 91: 4.0, 92: 1.0, 93: 1.0, 94: 0.0, 95: 2.0, 96: 0.0, 97: 1.0, 98: 2.0, 99: 2.0, 100: 0.0}


Training Progress:   2%|▏         | 2/100 [00:47<38:24, 23.51s/it]


KeyboardInterrupt: 

In [None]:
plt.plot(rewards)

# Optimizations

In [None]:
import cProfile

In [None]:
def stress_test():
    train_loop(custom_gym, model, games=1, optimization_passes = 1, seed=42)

In [None]:

cProfile.run('stress_test()', sort = 'time')

# Visualization

In [None]:
from models.base import RenderWrapper

renderer = RenderWrapper(model=model)

In [None]:
renderer.render()