In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import sys
sys.tracebacklimit = 0

import numpy as np
import networkx as nx 
import matplotlib.pyplot as plt

In [3]:
from pettingzoo.test import parallel_api_test
from solution.custom_gym import CustomGymEnviornment
from solution.trainer import *
from solution.policy_net import *

In [4]:
from core.agent import *
from core.world import * 
from core.render import * 
from core.skill import * 
from core.models import *
from core.message import *

In [5]:
DEVICE = "cuda"
BELIEF_DIMS = 5


In [None]:
from sar.sar_agent import *
from sar.sar_world import *
from sar.sar_env_params import MAX_TIME_STEPS
from sar.sar_traits_sampler import *

belief_initializer = SARBeliefInitializer(BELIEF_DIMS)
trait_sampler = SARTraitSampler()

def initialize_swarm(world : BaseWorld):
    swarm = trait_sampler.generate(SWARM_SIZE, DEVICE)
    for agent in swarm:
        agent.set_utility(SARUtilityFunction())
        world.add_agent(agent)
    swarm = initialize_positions_randomly(world, swarm)
    swarm = belief_initializer.initialize_beliefs(swarm)

In [7]:

from sar.urban_gen import * 
from sar.victims import * 
from sar.sar_comm import *
from utils.geo import * 

terrain_generator = UrbanTerrainMapGenerator(padding = MAX_VISIBILITY)
victim_generator = VictimGenerator(padding = MAX_VISIBILITY)

GEO_TIFF_PATH = "C:\\Users\\Joaquin\\Desktop\\Thesis-Playground\\datasets\\output_SRTMGL1.tif"

def initialize_terrain(world : BaseWorld):
    terrain_map, population_map = terrain_generator.generate(world._dims)
    geo_map = read_tiff(GEO_TIFF_PATH)[0]
    tx, ty = geo_map.shape

    terrain_map = TerrainMap(np.zeros_like(terrain_map._map), terrain_map._padding, 0, 1 )

    # if tx > world._dims[0] or ty > world._dims[1]:
    #     start_x = np.random.randint(0, tx - world._dims[0] + 1)
    #     start_y = np.random.randint(0, ty - world._dims[1] + 1)
    #     height_map = geo_map[start_x:start_x + world._dims[0], start_y:start_y + world._dims[1]]
    #     minimum = height_map.min()
    #     maximum = height_map.max() 
    #     terrain_map = TerrainMap(height_map, terrain_map._padding, minimum, maximum)


    map_collection : BaseMapCollection = BaseMapCollection()
    map_collection.add_map("Terrain", terrain_map)
    map_collection.add_map("Population", population_map)

    total_victims = np.random.randint(300, 400)
    victim_generator.set_density_map(population_map)
    victim_map = victim_generator.generate(world._dims, total_victims)

    map_collection.add_map("Victims", victim_map)
    return map_collection


In [8]:
from sar.energy import EnergyModel
from sar.victims import VictimModel
from solution.sar_action_interpreter import *
from solution.encoder_net import *
from solution.decoder_net import *
from solution.complex_model import * 

world = SARWorld(dims = WORLD_DIMS,
              swarm_initializer= initialize_swarm,
              generation_pipeline=initialize_terrain
              )
world.add_model("energy_model", EnergyModel())
world.add_model("victim_model", VictimModel())
world.reset()

In [None]:
grid_size = 2 * 3 + 1

complex_model = ComplexModel(
    action_dims= 4,
    belief_dims= BELIEF_DIMS,
    grid_size=grid_size, 
    latent_dims= 16, 
    state_dims= 3,
    trait_dims= 3,
    packet_dims= 2, 
)

comms_protocol = SARCommunicationProtocol(complex_model._encoder_net, complex_model._decoder_net)
action_interpreter = SARActionInterpreter(BELIEF_DIMS)

custom_gym : CustomGymEnviornment = CustomGymEnviornment(world, action_interpreter, comms_protocol, time_step_upper= MAX_TIME_STEPS)

complex_model.to(DEVICE)
custom_gym.to(DEVICE)

In [10]:
custom_gym.reset(42)

({1: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [1., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([0.1036, 0.0000, 0.3712], device='cuda:0')},
  2: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]),
   'State': tensor([0.0993, 0.0000, 0.5749], device='cuda:0')},
  3: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.]]),
   'State': tensor([0.1356, 1.0000, 0.6721], device='cuda:0')},
  4: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Visi

### Testing

In [11]:
parallel_api_test(custom_gym, num_cycles=1_000)
custom_gym.reset()

Passed Parallel API test


({1: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0.],
          [1., 0., 0.],
          [0., 0., 0.]]),
   'State': tensor([0.1036, 0.0000, 0.3712], device='cuda:0')},
  2: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]),
   'State': tensor([0.0993, 0.0000, 0.5749], device='cuda:0')},
  3: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.]]),
   'State': tensor([0.1356, 1.0000, 0.6721], device='cuda:0')},
  4: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Visi

# Training

In [12]:

from models.base import * 
from models.idqn import * 
import matplotlib.pyplot as plt

In [13]:
model = IDQN(env = custom_gym,
             feature_extractor= feature_extractor,
             target_net= complex_model.get_target_net(),
             model= complex_model,
             batch_size=1024,
             device = DEVICE,
             buffer_size= custom_gym._max_time_steps * 10
             )

In [None]:
rewards = train_loop(custom_gym, model, games=100, optimization_passes = 1, seed=42)

Training on thesis.


  return F.mse_loss(input, target, reduction=self.reduction)


Average loss 51.2285943326354
Model has been saved.

Starting evaluation on thesis (num_games=1)


Training Progress:   1%|          | 1/100 [00:50<1:23:37, 50.69s/it]

Avg reward: 39.79889553744083  std: 14.165383541106515  coeff : 0.35592403632860675
Avg reward per agent, per game:  {1: 61.882548268429304, 2: 51.508904996825706, 3: 50.58010237085925, 4: 54.10351748071799, 5: 28.750457898944926, 6: 40.97518940799185, 7: 43.441927482803436, 8: 36.35102845163457, 9: 44.3214517132501, 10: 61.64659141609993, 11: 37.84150076725515, 12: 7.964171159665513, 13: 41.60687701972212, 14: 49.71631422890412, 15: 36.004239121678324, 16: 32.106550203209345, 17: 35.44363452838081, 18: 33.79796364191898, 19: 46.0205109440262, 20: 56.220040334365585, 21: 35.331682789418835, 22: 73.48589531194646, 23: 30.280594933034813, 24: 11.714975898647216, 25: 43.55301427555089, 26: 29.378922625600982, 27: 38.44726094216531, 28: 46.8073546387347, 29: 34.96590229759247, 30: 22.06138530200444, 31: 25.388271268112174, 32: 52.80399727865071, 33: 42.71470304363648, 34: 44.0654387938991, 35: 35.88085382998589, 36: 33.100590534004596, 37: 44.95827040746468, 38: 20.329072059931285, 39: 29.

Training Progress:   2%|▏         | 2/100 [01:37<1:18:48, 48.26s/it]

Avg reward: 40.19796133382961  std: 13.83226911458538  coeff : 0.34410374694660156
Avg reward per agent, per game:  {1: 65.45742334427952, 2: 52.176241246983324, 3: 44.36336126353222, 4: 55.58070067627587, 5: 30.073894966301783, 6: 39.18912123326694, 7: 44.829418831672356, 8: 39.90972925150813, 9: 46.66191780310948, 10: 67.06796891532989, 11: 33.224823597925166, 12: 9.477392189115138, 13: 50.932494931771764, 14: 45.631800332890926, 15: 30.84912629780107, 16: 30.704712026476383, 17: 43.81459120138688, 18: 31.458598702166526, 19: 39.05611292883611, 20: 52.30065125009231, 21: 36.21569113818486, 22: 72.0181108889331, 23: 27.65148932198976, 24: 14.434296429240138, 25: 40.028736871958586, 26: 29.26373631585808, 27: 40.64185341362618, 28: 48.72066008692117, 29: 28.706835152871083, 30: 25.772118372002623, 31: 25.665327900638275, 32: 49.30609182413069, 33: 45.49019656463518, 34: 47.76251821256099, 35: 37.61347625913322, 36: 35.559312319152575, 37: 39.39221926439616, 38: 31.297521312728602, 39: 

Training Progress:   3%|▎         | 3/100 [02:28<1:19:51, 49.40s/it]

Avg reward: 39.98566315639489  std: 14.763034743510092  coeff : 0.3692082005934931
Avg reward per agent, per game:  {1: 66.90050325548543, 2: 55.11699977450654, 3: 47.499979181220866, 4: 61.29796729712099, 5: 30.409876181015978, 6: 40.647370972327536, 7: 33.44475746556791, 8: 33.063960024969674, 9: 51.40539305043247, 10: 63.18486680960352, 11: 40.30710200416873, 12: 13.379663596847477, 13: 46.76242883516321, 14: 44.143141750124705, 15: 37.93453448515114, 16: 42.557476738052976, 17: 41.38653382237483, 18: 37.23526627561905, 19: 46.73368143498587, 20: 54.99561017524929, 21: 37.30392846787278, 22: 72.91037241362649, 23: 30.195003205143543, 24: 17.613064745203562, 25: 42.0986789682495, 26: 32.46896655644253, 27: 44.54517617991799, 28: 46.60905275176081, 29: 40.29085963423773, 30: 24.073109812038563, 31: 28.6183842586102, 32: 50.00388024105728, 33: 52.69200782679832, 34: 45.93515603728163, 35: 30.69795180934558, 36: 37.886982733977426, 37: 52.14212336856441, 38: 21.694145541372173, 39: 26.7

Training Progress:   4%|▍         | 4/100 [03:16<1:18:22, 48.98s/it]

Avg reward: 40.860569521754954  std: 13.953547066623077  coeff : 0.3414917420373678
Avg reward per agent, per game:  {1: 63.54541862416227, 2: 50.40441425255806, 3: 49.84052720659927, 4: 49.550344210801995, 5: 30.472659633753924, 6: 38.54233336813792, 7: 34.83132130809141, 8: 36.13861107649932, 9: 44.16628294819599, 10: 69.58016011815316, 11: 36.80113794459133, 12: 9.644051137818419, 13: 51.10762102844874, 14: 38.943119772334455, 15: 36.33560482725596, 16: 31.76957729145872, 17: 31.799322601539952, 18: 51.73024115282381, 19: 43.21530851204836, 20: 54.996697993194395, 21: 33.52795708100257, 22: 72.14523335085616, 23: 23.39967301805327, 24: 12.020341441312784, 25: 43.61906831153793, 26: 30.035544651202194, 27: 37.09371950342792, 28: 55.210646978711395, 29: 33.6456749939658, 30: 24.036794666770284, 31: 26.17662586845501, 32: 47.59201634960824, 33: 52.153396199818815, 34: 51.867513073231265, 35: 31.188269333615622, 36: 34.18466905734433, 37: 48.771955534906446, 38: 25.931771953727036, 39: 

Training Progress:   5%|▌         | 5/100 [04:05<1:17:47, 49.13s/it]

Avg reward: 39.82841813694346  std: 13.608492216878037  coeff : 0.34167794889788183
Avg reward per agent, per game:  {1: 63.764133342324975, 2: 56.66717720310278, 3: 52.18050629988549, 4: 54.54723952248228, 5: 29.640716063247226, 6: 45.34889656326486, 7: 35.92203948252291, 8: 34.891170084865486, 9: 44.060073711375814, 10: 62.85651340923256, 11: 39.90974047732696, 12: 10.178256711922158, 13: 45.545503807759076, 14: 38.505014468783536, 15: 29.87842270897648, 16: 30.436889110425017, 17: 33.92377432145851, 18: 47.386001122801346, 19: 40.804567112378365, 20: 55.3202354346887, 21: 34.62983764475295, 22: 58.70907141682577, 23: 29.34392257573941, 24: 14.051054777251059, 25: 38.74799535645035, 26: 28.894840660423277, 27: 36.20820841454445, 28: 46.51024267113462, 29: 33.06081231078071, 30: 21.601733416649477, 31: 29.946294653150833, 32: 51.27299200136392, 33: 49.47273893857828, 34: 45.642736377185315, 35: 26.37786208348478, 36: 43.309416245455985, 37: 48.912697429102245, 38: 29.14547532522685, 3

In [None]:
plt.plot(rewards)

# Optimizations

In [None]:
import cProfile

In [None]:
def stress_test():
    train_loop(custom_gym, model, games=1, optimization_passes = 1, seed=42)

In [None]:

cProfile.run('stress_test()', sort = 'time')

# Visualization

In [None]:
from models.base import RenderWrapper

renderer = RenderWrapper(model=model)

In [None]:
renderer.render()