In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import sys
sys.tracebacklimit = 0

import numpy as np
import networkx as nx 
import matplotlib.pyplot as plt

In [3]:
from pettingzoo.test import parallel_api_test
from solution.custom_gym import CustomGymEnviornment
from solution.trainer import *
from solution.policy_net import *

In [4]:
from core.agent import *
from core.world import * 
from core.render import * 
from core.skill import * 
from core.models import *
from core.message import *

In [5]:
DEVICE = "cuda"

In [6]:
from sar.sar_agent import *
from sar.sar_world import *
from sar.sar_env_params import *
from sar.sar_traits_sampler import *

belief_initializer = SARBeliefInitializer(BELIEF_DIMS)
trait_sampler = SARTraitSampler()

def initialize_swarm(world : BaseWorld):
    swarm = trait_sampler.generate(SWARM_SIZE, DEVICE)
    for agent in swarm:
        agent.set_utility(SARUtilityFunction())
        world.add_agent(agent)
    swarm = initialize_positions_randomly(world, swarm)
    swarm = belief_initializer.initialize_beliefs(swarm)

In [7]:

from sar.urban_gen import * 
from sar.victims import * 
from sar.sar_comm import * 

terrain_generator = UrbanTerrainMapGenerator(padding = MAX_VISIBILITY)
victim_generator = VictimGenerator(padding = MAX_VISIBILITY)
def initialize_terrain(world : BaseWorld):
    terrain_map, population_map = terrain_generator.generate(world._dims)
    map_collection : BaseMapCollection = BaseMapCollection()
    map_collection.add_map("Terrain", terrain_map)
    map_collection.add_map("Population", population_map)

    victim_generator.set_density_map(population_map)
    victim_map = victim_generator.generate(world._dims)

    map_collection.add_map("Victims", victim_map)
    return map_collection


In [8]:
from sar.energy import EnergyModel
from sar.victims import VictimModel
from solution.sar_action_interpreter import *
from solution.encoder_net import *
from solution.decoder_net import *
from models.complex_model import * 

world = SARWorld(dims = WORLD_DIMS,
              swarm_initializer= initialize_swarm,
              generation_pipeline=initialize_terrain
              )
world.add_model("energy_model", EnergyModel())
world.add_model("victim_model", VictimModel())
world.reset()

In [None]:
grid_size = 2 * 3 + 1
policy_net= PolicyNet(1, grid_size, 4)
target_net=  PolicyNet(1, grid_size, 4)
encoder_model = Encoder()
decoder_model = Decoder()

complex_model = ComplexModel(
    policy_net= policy_net, 
    encoder_net = encoder_model, 
    decoder_net = decoder_model
)

comms_protocol = SARCommunicationProtocol(encoder_model, decoder_model)
action_interpreter = SARActionInterpreter(BELIEF_DIMS)

custom_gym : CustomGymEnviornment = CustomGymEnviornment(world, action_interpreter, comms_protocol)

complex_model.to(DEVICE)
custom_gym.to(DEVICE)

In [10]:
custom_gym.reset(42)

({1: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 0., 0., 0., 1.],
          [0., 0., 1., 0., 0.],
          [1., 0., 0., 0., 0.]]),
   'State': tensor([90.3885,  0.0000,  0.0000], device='cuda:0')},
  2: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[1., 1., 1., 1., 0.],
          [1., 1., 1., 0., 1.],
          [0., 1., 0., 0., 1.],
          [1., 1., 1., 0., 1.],
          [0., 0., 0., 0., 0.]]),
   'State': tensor([107.1394,   1.0000,   1.0000], device='cuda:0')},
  3: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[1., 0., 1.],
          [0., 0., 0.],
          [0., 1., 1.]]),
   'State': tensor([99.2335,  1.0000,  1.0000], device='cuda:0')},
  4: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[1., 1., 0., 1., 0.],
          [1., 1., 1., 0., 1.],
          [1., 0., 0., 1., 1.],
          

### Testing

In [11]:
parallel_api_test(custom_gym, num_cycles=1_000)
custom_gym.reset()

Passed Parallel API test


({1: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[0., 0., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 0., 0., 0., 1.],
          [0., 0., 1., 0., 0.],
          [1., 0., 0., 0., 0.]]),
   'State': tensor([90.3885,  0.0000,  0.0000], device='cuda:0')},
  2: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[1., 1., 1., 1., 0.],
          [1., 1., 1., 0., 1.],
          [0., 1., 0., 0., 1.],
          [1., 1., 1., 0., 1.],
          [0., 0., 0., 0., 0.]]),
   'State': tensor([107.1394,   1.0000,   1.0000], device='cuda:0')},
  3: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[1., 0., 1.],
          [0., 0., 0.],
          [0., 1., 1.]]),
   'State': tensor([99.2335,  1.0000,  1.0000], device='cuda:0')},
  4: {'Belief': tensor([0., 0., 0., 0., 0.], device='cuda:0'),
   'Vision': array([[1., 1., 0., 1., 0.],
          [1., 1., 1., 0., 1.],
          [1., 0., 0., 1., 1.],
          

# Training

In [12]:

from models.base import * 
from models.idqn import * 
from solution.policy_net import PolicyNet
import matplotlib.pyplot as plt

In [13]:
model = IDQN(env = custom_gym,
             feature_extractor= feature_extractor,
             target_net= target_net,
             model= complex_model,
             batch_size=1024,
             device = DEVICE
             )

In [None]:
rewards = train_loop(custom_gym, model, games=100, optimization_passes = 1, seed=42)

Training on thesis.


  return F.mse_loss(input, target, reduction=self.reduction)


Average loss 0.12871457014232873
Model has been saved.

Starting evaluation on thesis (num_games=1)


Training Progress:   1%|          | 1/100 [00:09<15:29,  9.38s/it]

Avg reward: 15.26  std: 6.71210846157897  coeff : 0.43984983365524055
Avg reward per agent, per game:  {1: 31.0, 2: 10.0, 3: 19.0, 4: 23.0, 5: 26.0, 6: 28.0, 7: 19.0, 8: 12.0, 9: 7.0, 10: 10.0, 11: 11.0, 12: 20.0, 13: 9.0, 14: 15.0, 15: 18.0, 16: 17.0, 17: 15.0, 18: 14.0, 19: 33.0, 20: 25.0, 21: 9.0, 22: 22.0, 23: 23.0, 24: 14.0, 25: 15.0, 26: 9.0, 27: 16.0, 28: 12.0, 29: 20.0, 30: 5.0, 31: 9.0, 32: 11.0, 33: 8.0, 34: 10.0, 35: 8.0, 36: 8.0, 37: 11.0, 38: 10.0, 39: 17.0, 40: 16.0, 41: 15.0, 42: 26.0, 43: 18.0, 44: 8.0, 45: 12.0, 46: 18.0, 47: 10.0, 48: 27.0, 49: 9.0, 50: 11.0, 51: 21.0, 52: 9.0, 53: 23.0, 54: 15.0, 55: 11.0, 56: 14.0, 57: 2.0, 58: 9.0, 59: 28.0, 60: 11.0, 61: 10.0, 62: 10.0, 63: 33.0, 64: 10.0, 65: 13.0, 66: 21.0, 67: 17.0, 68: 9.0, 69: 9.0, 70: 15.0, 71: 8.0, 72: 12.0, 73: 16.0, 74: 14.0, 75: 10.0, 76: 28.0, 77: 23.0, 78: 21.0, 79: 10.0, 80: 15.0, 81: 15.0, 82: 5.0, 83: 13.0, 84: 5.0, 85: 14.0, 86: 32.0, 87: 22.0, 88: 10.0, 89: 12.0, 90: 15.0, 91: 22.0, 92: 18.0, 93: 

Training Progress:   2%|▏         | 2/100 [00:18<15:17,  9.36s/it]

Avg reward: 16.03  std: 6.879614814798863  coeff : 0.4291712298689247
Avg reward per agent, per game:  {1: 20.0, 2: 8.0, 3: 23.0, 4: 12.0, 5: 16.0, 6: 25.0, 7: 18.0, 8: 21.0, 9: 13.0, 10: 21.0, 11: 8.0, 12: 23.0, 13: 12.0, 14: 21.0, 15: 17.0, 16: 17.0, 17: 23.0, 18: 13.0, 19: 22.0, 20: 25.0, 21: 15.0, 22: 24.0, 23: 24.0, 24: 10.0, 25: 15.0, 26: 10.0, 27: 18.0, 28: 18.0, 29: 15.0, 30: 8.0, 31: 13.0, 32: 14.0, 33: 11.0, 34: 13.0, 35: 11.0, 36: 19.0, 37: 9.0, 38: 11.0, 39: 34.0, 40: 18.0, 41: 7.0, 42: 34.0, 43: 12.0, 44: 16.0, 45: 16.0, 46: 25.0, 47: 26.0, 48: 24.0, 49: 5.0, 50: 9.0, 51: 31.0, 52: 8.0, 53: 27.0, 54: 10.0, 55: 2.0, 56: 7.0, 57: 1.0, 58: 10.0, 59: 21.0, 60: 15.0, 61: 12.0, 62: 17.0, 63: 19.0, 64: 25.0, 65: 20.0, 66: 23.0, 67: 17.0, 68: 7.0, 69: 14.0, 70: 10.0, 71: 23.0, 72: 6.0, 73: 11.0, 74: 7.0, 75: 12.0, 76: 30.0, 77: 13.0, 78: 18.0, 79: 16.0, 80: 9.0, 81: 8.0, 82: 9.0, 83: 17.0, 84: 20.0, 85: 15.0, 86: 18.0, 87: 13.0, 88: 21.0, 89: 21.0, 90: 14.0, 91: 11.0, 92: 24.0, 93

Training Progress:   3%|▎         | 3/100 [00:30<16:50, 10.42s/it]

Avg reward: 16.52  std: 7.078813459895663  coeff : 0.428499604109907
Avg reward per agent, per game:  {1: 27.0, 2: 8.0, 3: 23.0, 4: 20.0, 5: 32.0, 6: 34.0, 7: 9.0, 8: 35.0, 9: 15.0, 10: 16.0, 11: 13.0, 12: 19.0, 13: 20.0, 14: 23.0, 15: 16.0, 16: 13.0, 17: 12.0, 18: 16.0, 19: 27.0, 20: 29.0, 21: 20.0, 22: 16.0, 23: 17.0, 24: 15.0, 25: 21.0, 26: 8.0, 27: 20.0, 28: 10.0, 29: 16.0, 30: 5.0, 31: 10.0, 32: 20.0, 33: 11.0, 34: 14.0, 35: 11.0, 36: 13.0, 37: 8.0, 38: 19.0, 39: 26.0, 40: 13.0, 41: 7.0, 42: 20.0, 43: 15.0, 44: 16.0, 45: 18.0, 46: 11.0, 47: 20.0, 48: 18.0, 49: 21.0, 50: 11.0, 51: 25.0, 52: 7.0, 53: 17.0, 54: 15.0, 55: 5.0, 56: 14.0, 57: 13.0, 58: 30.0, 59: 8.0, 60: 14.0, 61: 16.0, 62: 11.0, 63: 23.0, 64: 8.0, 65: 16.0, 66: 14.0, 67: 18.0, 68: 7.0, 69: 9.0, 70: 2.0, 71: 22.0, 72: 9.0, 73: 25.0, 74: 11.0, 75: 9.0, 76: 27.0, 77: 27.0, 78: 24.0, 79: 9.0, 80: 15.0, 81: 11.0, 82: 9.0, 83: 18.0, 84: 17.0, 85: 17.0, 86: 23.0, 87: 10.0, 88: 13.0, 89: 27.0, 90: 11.0, 91: 29.0, 92: 27.0, 93:

Training Progress:   4%|▍         | 4/100 [00:40<16:45, 10.47s/it]

Avg reward: 15.59  std: 6.9326690386892125  coeff : 0.4446869171705717
Avg reward per agent, per game:  {1: 18.0, 2: 11.0, 3: 20.0, 4: 19.0, 5: 31.0, 6: 25.0, 7: 23.0, 8: 10.0, 9: 16.0, 10: 12.0, 11: 13.0, 12: 17.0, 13: 14.0, 14: 21.0, 15: 13.0, 16: 22.0, 17: 23.0, 18: 16.0, 19: 38.0, 20: 30.0, 21: 10.0, 22: 20.0, 23: 25.0, 24: 9.0, 25: 11.0, 26: 9.0, 27: 20.0, 28: 20.0, 29: 16.0, 30: 11.0, 31: 15.0, 32: 10.0, 33: 18.0, 34: 8.0, 35: 22.0, 36: 8.0, 37: 6.0, 38: 14.0, 39: 20.0, 40: 19.0, 41: 22.0, 42: 12.0, 43: 6.0, 44: 11.0, 45: 18.0, 46: 31.0, 47: 22.0, 48: 24.0, 49: 3.0, 50: 19.0, 51: 29.0, 52: 15.0, 53: 23.0, 54: 21.0, 55: 6.0, 56: 6.0, 57: 3.0, 58: 11.0, 59: 12.0, 60: 9.0, 61: 10.0, 62: 10.0, 63: 25.0, 64: 12.0, 65: 4.0, 66: 14.0, 67: 19.0, 68: 10.0, 69: 20.0, 70: 17.0, 71: 13.0, 72: 6.0, 73: 14.0, 74: 16.0, 75: 20.0, 76: 26.0, 77: 11.0, 78: 29.0, 79: 5.0, 80: 11.0, 81: 15.0, 82: 5.0, 83: 10.0, 84: 16.0, 85: 15.0, 86: 14.0, 87: 19.0, 88: 14.0, 89: 20.0, 90: 9.0, 91: 10.0, 92: 21.0, 

Training Progress:   5%|▌         | 5/100 [00:50<16:08, 10.20s/it]

Avg reward: 16.38  std: 7.673043724624539  coeff : 0.46843978782811596
Avg reward per agent, per game:  {1: 30.0, 2: 13.0, 3: 22.0, 4: 19.0, 5: 25.0, 6: 25.0, 7: 12.0, 8: 9.0, 9: 15.0, 10: 11.0, 11: 8.0, 12: 14.0, 13: 11.0, 14: 22.0, 15: 18.0, 16: 22.0, 17: 20.0, 18: 20.0, 19: 20.0, 20: 14.0, 21: 7.0, 22: 20.0, 23: 22.0, 24: 12.0, 25: 15.0, 26: 8.0, 27: 25.0, 28: 17.0, 29: 28.0, 30: 5.0, 31: 13.0, 32: 15.0, 33: 12.0, 34: 9.0, 35: 19.0, 36: 16.0, 37: 7.0, 38: 15.0, 39: 45.0, 40: 23.0, 41: 20.0, 42: 25.0, 43: 16.0, 44: 20.0, 45: 15.0, 46: 7.0, 47: 30.0, 48: 11.0, 49: 3.0, 50: 23.0, 51: 40.0, 52: 3.0, 53: 19.0, 54: 13.0, 55: 14.0, 56: 3.0, 57: 8.0, 58: 23.0, 59: 11.0, 60: 22.0, 61: 6.0, 62: 15.0, 63: 13.0, 64: 7.0, 65: 11.0, 66: 13.0, 67: 19.0, 68: 12.0, 69: 12.0, 70: 33.0, 71: 31.0, 72: 9.0, 73: 20.0, 74: 12.0, 75: 13.0, 76: 23.0, 77: 10.0, 78: 13.0, 79: 7.0, 80: 16.0, 81: 9.0, 82: 7.0, 83: 26.0, 84: 25.0, 85: 17.0, 86: 18.0, 87: 21.0, 88: 11.0, 89: 27.0, 90: 18.0, 91: 12.0, 92: 21.0, 93

Training Progress:   6%|▌         | 6/100 [01:01<16:07, 10.29s/it]

Avg reward: 16.41  std: 7.931071806508878  coeff : 0.48330723988475793
Avg reward per agent, per game:  {1: 20.0, 2: 17.0, 3: 28.0, 4: 16.0, 5: 24.0, 6: 26.0, 7: 15.0, 8: 39.0, 9: 7.0, 10: 12.0, 11: 8.0, 12: 19.0, 13: 21.0, 14: 19.0, 15: 22.0, 16: 12.0, 17: 17.0, 18: 23.0, 19: 32.0, 20: 17.0, 21: 14.0, 22: 16.0, 23: 14.0, 24: 11.0, 25: 12.0, 26: 16.0, 27: 29.0, 28: 17.0, 29: 24.0, 30: 9.0, 31: 13.0, 32: 20.0, 33: 7.0, 34: 19.0, 35: 9.0, 36: 10.0, 37: 8.0, 38: 18.0, 39: 30.0, 40: 36.0, 41: 11.0, 42: 21.0, 43: 8.0, 44: 23.0, 45: 7.0, 46: 14.0, 47: 17.0, 48: 28.0, 49: 5.0, 50: 21.0, 51: 31.0, 52: 13.0, 53: 21.0, 54: 21.0, 55: 2.0, 56: 7.0, 57: 2.0, 58: 26.0, 59: 31.0, 60: 6.0, 61: 11.0, 62: 9.0, 63: 31.0, 64: 9.0, 65: 13.0, 66: 13.0, 67: 22.0, 68: 8.0, 69: 26.0, 70: 4.0, 71: 18.0, 72: 8.0, 73: 18.0, 74: 11.0, 75: 10.0, 76: 33.0, 77: 6.0, 78: 16.0, 79: 16.0, 80: 15.0, 81: 9.0, 82: 8.0, 83: 21.0, 84: 9.0, 85: 16.0, 86: 19.0, 87: 9.0, 88: 17.0, 89: 27.0, 90: 15.0, 91: 12.0, 92: 26.0, 93: 16.

Training Progress:   7%|▋         | 7/100 [01:11<15:52, 10.24s/it]

Avg reward: 15.95  std: 7.206073826987898  coeff : 0.45179146250707825
Avg reward per agent, per game:  {1: 19.0, 2: 4.0, 3: 27.0, 4: 30.0, 5: 20.0, 6: 13.0, 7: 19.0, 8: 29.0, 9: 12.0, 10: 19.0, 11: 4.0, 12: 13.0, 13: 14.0, 14: 12.0, 15: 30.0, 16: 15.0, 17: 17.0, 18: 24.0, 19: 31.0, 20: 17.0, 21: 15.0, 22: 14.0, 23: 30.0, 24: 8.0, 25: 22.0, 26: 17.0, 27: 16.0, 28: 24.0, 29: 25.0, 30: 10.0, 31: 14.0, 32: 11.0, 33: 9.0, 34: 11.0, 35: 11.0, 36: 8.0, 37: 25.0, 38: 17.0, 39: 33.0, 40: 13.0, 41: 9.0, 42: 14.0, 43: 15.0, 44: 11.0, 45: 13.0, 46: 16.0, 47: 14.0, 48: 22.0, 49: 11.0, 50: 12.0, 51: 34.0, 52: 12.0, 53: 21.0, 54: 9.0, 55: 5.0, 56: 12.0, 57: 1.0, 58: 23.0, 59: 11.0, 60: 24.0, 61: 15.0, 62: 17.0, 63: 17.0, 64: 6.0, 65: 19.0, 66: 27.0, 67: 10.0, 68: 15.0, 69: 12.0, 70: 18.0, 71: 28.0, 72: 14.0, 73: 13.0, 74: 8.0, 75: 15.0, 76: 23.0, 77: 12.0, 78: 17.0, 79: 12.0, 80: 17.0, 81: 20.0, 82: 4.0, 83: 14.0, 84: 9.0, 85: 11.0, 86: 14.0, 87: 7.0, 88: 17.0, 89: 23.0, 90: 9.0, 91: 12.0, 92: 17.0,

Training Progress:   8%|▊         | 8/100 [01:21<15:45, 10.27s/it]

Avg reward: 16.49  std: 6.476874246115946  coeff : 0.39277587908526057
Avg reward per agent, per game:  {1: 20.0, 2: 17.0, 3: 20.0, 4: 35.0, 5: 25.0, 6: 20.0, 7: 6.0, 8: 27.0, 9: 18.0, 10: 11.0, 11: 14.0, 12: 13.0, 13: 14.0, 14: 24.0, 15: 17.0, 16: 17.0, 17: 15.0, 18: 15.0, 19: 18.0, 20: 14.0, 21: 13.0, 22: 20.0, 23: 20.0, 24: 11.0, 25: 13.0, 26: 18.0, 27: 28.0, 28: 18.0, 29: 21.0, 30: 7.0, 31: 18.0, 32: 23.0, 33: 14.0, 34: 8.0, 35: 20.0, 36: 13.0, 37: 6.0, 38: 17.0, 39: 17.0, 40: 33.0, 41: 12.0, 42: 18.0, 43: 17.0, 44: 13.0, 45: 20.0, 46: 29.0, 47: 20.0, 48: 12.0, 49: 8.0, 50: 14.0, 51: 31.0, 52: 7.0, 53: 15.0, 54: 9.0, 55: 10.0, 56: 12.0, 57: 14.0, 58: 16.0, 59: 22.0, 60: 4.0, 61: 20.0, 62: 14.0, 63: 20.0, 64: 17.0, 65: 7.0, 66: 16.0, 67: 10.0, 68: 7.0, 69: 19.0, 70: 6.0, 71: 20.0, 72: 9.0, 73: 15.0, 74: 17.0, 75: 20.0, 76: 24.0, 77: 17.0, 78: 21.0, 79: 15.0, 80: 8.0, 81: 9.0, 82: 6.0, 83: 12.0, 84: 20.0, 85: 17.0, 86: 21.0, 87: 28.0, 88: 11.0, 89: 23.0, 90: 16.0, 91: 21.0, 92: 12.0,

Training Progress:   9%|▉         | 9/100 [01:32<15:51, 10.46s/it]

Avg reward: 15.6  std: 7.260853944268539  coeff : 0.4654393554018294
Avg reward per agent, per game:  {1: 20.0, 2: 7.0, 3: 21.0, 4: 34.0, 5: 26.0, 6: 15.0, 7: 18.0, 8: 12.0, 9: 16.0, 10: 11.0, 11: 7.0, 12: 12.0, 13: 16.0, 14: 13.0, 15: 28.0, 16: 12.0, 17: 21.0, 18: 23.0, 19: 15.0, 20: 22.0, 21: 11.0, 22: 11.0, 23: 28.0, 24: 9.0, 25: 11.0, 26: 15.0, 27: 14.0, 28: 15.0, 29: 17.0, 30: 7.0, 31: 3.0, 32: 11.0, 33: 13.0, 34: 4.0, 35: 12.0, 36: 19.0, 37: 11.0, 38: 17.0, 39: 31.0, 40: 20.0, 41: 15.0, 42: 36.0, 43: 16.0, 44: 12.0, 45: 13.0, 46: 18.0, 47: 18.0, 48: 11.0, 49: 6.0, 50: 26.0, 51: 32.0, 52: 13.0, 53: 34.0, 54: 22.0, 55: 10.0, 56: 9.0, 57: 2.0, 58: 12.0, 59: 24.0, 60: 8.0, 61: 13.0, 62: 16.0, 63: 23.0, 64: 10.0, 65: 14.0, 66: 29.0, 67: 19.0, 68: 9.0, 69: 16.0, 70: 5.0, 71: 13.0, 72: 14.0, 73: 21.0, 74: 6.0, 75: 17.0, 76: 27.0, 77: 6.0, 78: 16.0, 79: 17.0, 80: 14.0, 81: 17.0, 82: 7.0, 83: 11.0, 84: 7.0, 85: 5.0, 86: 13.0, 87: 19.0, 88: 14.0, 89: 27.0, 90: 11.0, 91: 14.0, 92: 25.0, 93:

Training Progress:  10%|█         | 10/100 [01:43<15:56, 10.63s/it]

Avg reward: 15.69  std: 7.902777992579572  coeff : 0.5036824724397433
Avg reward per agent, per game:  {1: 22.0, 2: 6.0, 3: 28.0, 4: 33.0, 5: 15.0, 6: 16.0, 7: 13.0, 8: 27.0, 9: 14.0, 10: 20.0, 11: 12.0, 12: 8.0, 13: 5.0, 14: 34.0, 15: 23.0, 16: 23.0, 17: 8.0, 18: 11.0, 19: 21.0, 20: 16.0, 21: 14.0, 22: 22.0, 23: 12.0, 24: 14.0, 25: 8.0, 26: 12.0, 27: 32.0, 28: 15.0, 29: 23.0, 30: 4.0, 31: 9.0, 32: 8.0, 33: 8.0, 34: 10.0, 35: 28.0, 36: 12.0, 37: 6.0, 38: 12.0, 39: 19.0, 40: 49.0, 41: 38.0, 42: 18.0, 43: 19.0, 44: 16.0, 45: 19.0, 46: 19.0, 47: 17.0, 48: 15.0, 49: 4.0, 50: 20.0, 51: 25.0, 52: 12.0, 53: 17.0, 54: 14.0, 55: 5.0, 56: 11.0, 57: 2.0, 58: 21.0, 59: 14.0, 60: 6.0, 61: 8.0, 62: 22.0, 63: 17.0, 64: 16.0, 65: 11.0, 66: 16.0, 67: 9.0, 68: 9.0, 69: 12.0, 70: 4.0, 71: 11.0, 72: 6.0, 73: 24.0, 74: 14.0, 75: 16.0, 76: 14.0, 77: 10.0, 78: 20.0, 79: 9.0, 80: 20.0, 81: 18.0, 82: 6.0, 83: 18.0, 84: 11.0, 85: 16.0, 86: 20.0, 87: 11.0, 88: 24.0, 89: 13.0, 90: 14.0, 91: 12.0, 92: 19.0, 93: 25

Training Progress:  11%|█         | 11/100 [01:53<15:34, 10.50s/it]

Avg reward: 15.9  std: 7.762087348130012  coeff : 0.48818159422201335
Avg reward per agent, per game:  {1: 13.0, 2: 6.0, 3: 21.0, 4: 26.0, 5: 17.0, 6: 28.0, 7: 21.0, 8: 23.0, 9: 21.0, 10: 26.0, 11: 12.0, 12: 8.0, 13: 13.0, 14: 18.0, 15: 17.0, 16: 13.0, 17: 19.0, 18: 26.0, 19: 25.0, 20: 12.0, 21: 8.0, 22: 13.0, 23: 10.0, 24: 11.0, 25: 22.0, 26: 8.0, 27: 26.0, 28: 20.0, 29: 23.0, 30: 10.0, 31: 14.0, 32: 13.0, 33: 7.0, 34: 13.0, 35: 18.0, 36: 14.0, 37: 8.0, 38: 8.0, 39: 28.0, 40: 37.0, 41: 14.0, 42: 18.0, 43: 13.0, 44: 14.0, 45: 10.0, 46: 21.0, 47: 19.0, 48: 11.0, 49: 3.0, 50: 12.0, 51: 34.0, 52: 11.0, 53: 19.0, 54: 15.0, 55: 3.0, 56: 3.0, 57: 2.0, 58: 19.0, 59: 23.0, 60: 17.0, 61: 6.0, 62: 12.0, 63: 22.0, 64: 5.0, 65: 6.0, 66: 33.0, 67: 18.0, 68: 5.0, 69: 7.0, 70: 9.0, 71: 14.0, 72: 8.0, 73: 13.0, 74: 22.0, 75: 18.0, 76: 29.0, 77: 14.0, 78: 27.0, 79: 14.0, 80: 11.0, 81: 13.0, 82: 12.0, 83: 12.0, 84: 13.0, 85: 11.0, 86: 35.0, 87: 6.0, 88: 18.0, 89: 35.0, 90: 22.0, 91: 10.0, 92: 26.0, 93: 

Training Progress:  12%|█▏        | 12/100 [02:04<15:25, 10.51s/it]

Avg reward: 16.47  std: 7.714214153107237  coeff : 0.4683797300004395
Avg reward per agent, per game:  {1: 21.0, 2: 6.0, 3: 27.0, 4: 23.0, 5: 21.0, 6: 29.0, 7: 11.0, 8: 15.0, 9: 14.0, 10: 9.0, 11: 7.0, 12: 19.0, 13: 15.0, 14: 19.0, 15: 12.0, 16: 16.0, 17: 11.0, 18: 19.0, 19: 34.0, 20: 16.0, 21: 11.0, 22: 20.0, 23: 12.0, 24: 9.0, 25: 14.0, 26: 7.0, 27: 12.0, 28: 10.0, 29: 30.0, 30: 4.0, 31: 32.0, 32: 21.0, 33: 14.0, 34: 9.0, 35: 22.0, 36: 12.0, 37: 14.0, 38: 14.0, 39: 23.0, 40: 26.0, 41: 12.0, 42: 15.0, 43: 19.0, 44: 5.0, 45: 15.0, 46: 22.0, 47: 38.0, 48: 14.0, 49: 2.0, 50: 14.0, 51: 30.0, 52: 17.0, 53: 39.0, 54: 17.0, 55: 11.0, 56: 15.0, 57: 2.0, 58: 24.0, 59: 17.0, 60: 9.0, 61: 8.0, 62: 14.0, 63: 19.0, 64: 28.0, 65: 10.0, 66: 20.0, 67: 7.0, 68: 10.0, 69: 24.0, 70: 25.0, 71: 21.0, 72: 8.0, 73: 11.0, 74: 10.0, 75: 14.0, 76: 25.0, 77: 8.0, 78: 25.0, 79: 10.0, 80: 12.0, 81: 18.0, 82: 11.0, 83: 25.0, 84: 13.0, 85: 20.0, 86: 27.0, 87: 9.0, 88: 12.0, 89: 31.0, 90: 17.0, 91: 12.0, 92: 26.0, 9

Training Progress:  13%|█▎        | 13/100 [02:15<15:36, 10.77s/it]

Avg reward: 16.53  std: 7.189513196315867  coeff : 0.43493727745407545
Avg reward per agent, per game:  {1: 26.0, 2: 7.0, 3: 17.0, 4: 15.0, 5: 37.0, 6: 16.0, 7: 21.0, 8: 30.0, 9: 11.0, 10: 8.0, 11: 23.0, 12: 18.0, 13: 11.0, 14: 23.0, 15: 32.0, 16: 16.0, 17: 19.0, 18: 25.0, 19: 28.0, 20: 35.0, 21: 4.0, 22: 21.0, 23: 26.0, 24: 9.0, 25: 13.0, 26: 7.0, 27: 30.0, 28: 14.0, 29: 24.0, 30: 17.0, 31: 16.0, 32: 20.0, 33: 18.0, 34: 11.0, 35: 17.0, 36: 8.0, 37: 20.0, 38: 11.0, 39: 19.0, 40: 22.0, 41: 27.0, 42: 19.0, 43: 17.0, 44: 11.0, 45: 11.0, 46: 18.0, 47: 20.0, 48: 19.0, 49: 11.0, 50: 10.0, 51: 21.0, 52: 9.0, 53: 19.0, 54: 16.0, 55: 12.0, 56: 9.0, 57: 9.0, 58: 9.0, 59: 21.0, 60: 14.0, 61: 4.0, 62: 20.0, 63: 21.0, 64: 6.0, 65: 27.0, 66: 20.0, 67: 14.0, 68: 6.0, 69: 12.0, 70: 7.0, 71: 29.0, 72: 17.0, 73: 16.0, 74: 23.0, 75: 7.0, 76: 29.0, 77: 12.0, 78: 23.0, 79: 9.0, 80: 13.0, 81: 11.0, 82: 2.0, 83: 20.0, 84: 17.0, 85: 9.0, 86: 21.0, 87: 21.0, 88: 13.0, 89: 22.0, 90: 12.0, 91: 12.0, 92: 16.0, 93

Training Progress:  14%|█▍        | 14/100 [02:26<15:36, 10.89s/it]

Avg reward: 16.2  std: 7.835815209663893  coeff : 0.4836922968928329
Avg reward per agent, per game:  {1: 22.0, 2: 11.0, 3: 23.0, 4: 30.0, 5: 25.0, 6: 31.0, 7: 22.0, 8: 21.0, 9: 9.0, 10: 14.0, 11: 8.0, 12: 14.0, 13: 19.0, 14: 29.0, 15: 31.0, 16: 19.0, 17: 14.0, 18: 35.0, 19: 22.0, 20: 12.0, 21: 13.0, 22: 25.0, 23: 19.0, 24: 12.0, 25: 15.0, 26: 11.0, 27: 10.0, 28: 22.0, 29: 19.0, 30: 10.0, 31: 10.0, 32: 18.0, 33: 13.0, 34: 12.0, 35: 12.0, 36: 14.0, 37: 5.0, 38: 29.0, 39: 32.0, 40: 33.0, 41: 16.0, 42: 23.0, 43: 6.0, 44: 13.0, 45: 16.0, 46: 10.0, 47: 14.0, 48: 18.0, 49: 4.0, 50: 12.0, 51: 27.0, 52: 8.0, 53: 15.0, 54: 13.0, 55: 7.0, 56: 6.0, 57: 5.0, 58: 11.0, 59: 7.0, 60: 6.0, 61: 12.0, 62: 18.0, 63: 21.0, 64: 14.0, 65: 16.0, 66: 33.0, 67: 15.0, 68: 5.0, 69: 2.0, 70: 15.0, 71: 18.0, 72: 14.0, 73: 19.0, 74: 9.0, 75: 8.0, 76: 26.0, 77: 35.0, 78: 20.0, 79: 20.0, 80: 9.0, 81: 21.0, 82: 3.0, 83: 15.0, 84: 20.0, 85: 15.0, 86: 9.0, 87: 13.0, 88: 8.0, 89: 27.0, 90: 11.0, 91: 23.0, 92: 17.0, 93: 3

Training Progress:  15%|█▌        | 15/100 [02:37<15:21, 10.85s/it]

Avg reward: 16.65  std: 7.72188448502048  coeff : 0.463776845947176
Avg reward per agent, per game:  {1: 13.0, 2: 7.0, 3: 17.0, 4: 24.0, 5: 25.0, 6: 25.0, 7: 14.0, 8: 23.0, 9: 18.0, 10: 13.0, 11: 12.0, 12: 31.0, 13: 10.0, 14: 30.0, 15: 21.0, 16: 39.0, 17: 18.0, 18: 10.0, 19: 37.0, 20: 11.0, 21: 7.0, 22: 19.0, 23: 19.0, 24: 17.0, 25: 20.0, 26: 11.0, 27: 28.0, 28: 17.0, 29: 33.0, 30: 4.0, 31: 14.0, 32: 14.0, 33: 5.0, 34: 6.0, 35: 15.0, 36: 14.0, 37: 11.0, 38: 20.0, 39: 14.0, 40: 28.0, 41: 16.0, 42: 8.0, 43: 10.0, 44: 15.0, 45: 13.0, 46: 22.0, 47: 29.0, 48: 14.0, 49: 9.0, 50: 21.0, 51: 28.0, 52: 16.0, 53: 29.0, 54: 7.0, 55: 9.0, 56: 10.0, 57: 16.0, 58: 20.0, 59: 22.0, 60: 9.0, 61: 12.0, 62: 14.0, 63: 26.0, 64: 10.0, 65: 10.0, 66: 26.0, 67: 10.0, 68: 2.0, 69: 10.0, 70: 13.0, 71: 18.0, 72: 17.0, 73: 30.0, 74: 23.0, 75: 17.0, 76: 28.0, 77: 16.0, 78: 17.0, 79: 9.0, 80: 14.0, 81: 15.0, 82: 6.0, 83: 21.0, 84: 11.0, 85: 17.0, 86: 29.0, 87: 11.0, 88: 15.0, 89: 31.0, 90: 11.0, 91: 11.0, 92: 15.0, 

Training Progress:  16%|█▌        | 16/100 [02:48<15:11, 10.85s/it]

Avg reward: 16.07  std: 8.636266554478272  coeff : 0.5374154669868246
Avg reward per agent, per game:  {1: 26.0, 2: 12.0, 3: 25.0, 4: 25.0, 5: 26.0, 6: 17.0, 7: 12.0, 8: 14.0, 9: 14.0, 10: 27.0, 11: 5.0, 12: 10.0, 13: 14.0, 14: 30.0, 15: 19.0, 16: 22.0, 17: 16.0, 18: 18.0, 19: 27.0, 20: 6.0, 21: 24.0, 22: 26.0, 23: 14.0, 24: 7.0, 25: 19.0, 26: 4.0, 27: 24.0, 28: 17.0, 29: 31.0, 30: 12.0, 31: 7.0, 32: 24.0, 33: 6.0, 34: 13.0, 35: 33.0, 36: 12.0, 37: 16.0, 38: 30.0, 39: 11.0, 40: 26.0, 41: 14.0, 42: 8.0, 43: 16.0, 44: 18.0, 45: 18.0, 46: 29.0, 47: 14.0, 48: 14.0, 49: 4.0, 50: 18.0, 51: 30.0, 52: 9.0, 53: 34.0, 54: 11.0, 55: 7.0, 56: 14.0, 57: 2.0, 58: 10.0, 59: 6.0, 60: 8.0, 61: 8.0, 62: 11.0, 63: 28.0, 64: 11.0, 65: 20.0, 66: 17.0, 67: 18.0, 68: 7.0, 69: 2.0, 70: 5.0, 71: 20.0, 72: 7.0, 73: 13.0, 74: 14.0, 75: 9.0, 76: 32.0, 77: 13.0, 78: 25.0, 79: 9.0, 80: 10.0, 81: 13.0, 82: 5.0, 83: 42.0, 84: 11.0, 85: 9.0, 86: 36.0, 87: 15.0, 88: 13.0, 89: 19.0, 90: 9.0, 91: 13.0, 92: 16.0, 93: 18.0

Training Progress:  17%|█▋        | 17/100 [02:59<15:13, 11.00s/it]

Avg reward: 17.05  std: 7.989211475483673  coeff : 0.4685754531075468
Avg reward per agent, per game:  {1: 21.0, 2: 14.0, 3: 23.0, 4: 21.0, 5: 40.0, 6: 30.0, 7: 12.0, 8: 29.0, 9: 17.0, 10: 9.0, 11: 14.0, 12: 15.0, 13: 11.0, 14: 14.0, 15: 32.0, 16: 18.0, 17: 17.0, 18: 39.0, 19: 29.0, 20: 14.0, 21: 13.0, 22: 27.0, 23: 16.0, 24: 8.0, 25: 22.0, 26: 7.0, 27: 6.0, 28: 12.0, 29: 16.0, 30: 4.0, 31: 20.0, 32: 13.0, 33: 25.0, 34: 5.0, 35: 14.0, 36: 14.0, 37: 16.0, 38: 10.0, 39: 29.0, 40: 31.0, 41: 18.0, 42: 22.0, 43: 16.0, 44: 18.0, 45: 12.0, 46: 20.0, 47: 17.0, 48: 18.0, 49: 10.0, 50: 10.0, 51: 36.0, 52: 6.0, 53: 23.0, 54: 17.0, 55: 11.0, 56: 19.0, 57: 17.0, 58: 14.0, 59: 19.0, 60: 12.0, 61: 9.0, 62: 18.0, 63: 34.0, 64: 16.0, 65: 12.0, 66: 11.0, 67: 10.0, 68: 3.0, 69: 11.0, 70: 3.0, 71: 20.0, 72: 12.0, 73: 20.0, 74: 12.0, 75: 25.0, 76: 21.0, 77: 14.0, 78: 19.0, 79: 21.0, 80: 15.0, 81: 13.0, 82: 19.0, 83: 15.0, 84: 24.0, 85: 6.0, 86: 34.0, 87: 5.0, 88: 23.0, 89: 27.0, 90: 11.0, 91: 12.0, 92: 28.

Training Progress:  18%|█▊        | 18/100 [03:11<15:10, 11.11s/it]

Avg reward: 16.53  std: 6.608259982779128  coeff : 0.399773743664799
Avg reward per agent, per game:  {1: 11.0, 2: 21.0, 3: 31.0, 4: 18.0, 5: 23.0, 6: 23.0, 7: 14.0, 8: 24.0, 9: 16.0, 10: 16.0, 11: 13.0, 12: 17.0, 13: 24.0, 14: 22.0, 15: 22.0, 16: 18.0, 17: 7.0, 18: 18.0, 19: 19.0, 20: 16.0, 21: 10.0, 22: 25.0, 23: 21.0, 24: 16.0, 25: 12.0, 26: 13.0, 27: 26.0, 28: 15.0, 29: 26.0, 30: 5.0, 31: 24.0, 32: 21.0, 33: 16.0, 34: 7.0, 35: 14.0, 36: 22.0, 37: 7.0, 38: 13.0, 39: 25.0, 40: 25.0, 41: 23.0, 42: 24.0, 43: 6.0, 44: 16.0, 45: 17.0, 46: 21.0, 47: 17.0, 48: 22.0, 49: 10.0, 50: 30.0, 51: 23.0, 52: 19.0, 53: 17.0, 54: 18.0, 55: 15.0, 56: 8.0, 57: 10.0, 58: 25.0, 59: 17.0, 60: 8.0, 61: 10.0, 62: 14.0, 63: 15.0, 64: 6.0, 65: 15.0, 66: 23.0, 67: 17.0, 68: 13.0, 69: 12.0, 70: 2.0, 71: 25.0, 72: 11.0, 73: 14.0, 74: 8.0, 75: 11.0, 76: 29.0, 77: 12.0, 78: 26.0, 79: 14.0, 80: 9.0, 81: 16.0, 82: 3.0, 83: 30.0, 84: 11.0, 85: 7.0, 86: 19.0, 87: 14.0, 88: 16.0, 89: 29.0, 90: 22.0, 91: 16.0, 92: 14.0,

Training Progress:  19%|█▉        | 19/100 [03:20<14:24, 10.67s/it]

Avg reward: 16.41  std: 7.652574730115349  coeff : 0.4663360591173278
Avg reward per agent, per game:  {1: 31.0, 2: 15.0, 3: 22.0, 4: 28.0, 5: 38.0, 6: 30.0, 7: 21.0, 8: 30.0, 9: 10.0, 10: 13.0, 11: 32.0, 12: 19.0, 13: 14.0, 14: 20.0, 15: 22.0, 16: 17.0, 17: 21.0, 18: 19.0, 19: 26.0, 20: 7.0, 21: 18.0, 22: 17.0, 23: 20.0, 24: 14.0, 25: 23.0, 26: 11.0, 27: 9.0, 28: 7.0, 29: 33.0, 30: 8.0, 31: 20.0, 32: 10.0, 33: 9.0, 34: 19.0, 35: 9.0, 36: 11.0, 37: 4.0, 38: 13.0, 39: 23.0, 40: 28.0, 41: 15.0, 42: 26.0, 43: 10.0, 44: 21.0, 45: 11.0, 46: 18.0, 47: 19.0, 48: 8.0, 49: 3.0, 50: 18.0, 51: 26.0, 52: 12.0, 53: 36.0, 54: 11.0, 55: 7.0, 56: 15.0, 57: 4.0, 58: 29.0, 59: 12.0, 60: 11.0, 61: 10.0, 62: 10.0, 63: 15.0, 64: 9.0, 65: 14.0, 66: 32.0, 67: 11.0, 68: 7.0, 69: 12.0, 70: 22.0, 71: 27.0, 72: 12.0, 73: 18.0, 74: 10.0, 75: 14.0, 76: 27.0, 77: 17.0, 78: 9.0, 79: 15.0, 80: 11.0, 81: 15.0, 82: 4.0, 83: 10.0, 84: 17.0, 85: 12.0, 86: 15.0, 87: 24.0, 88: 11.0, 89: 21.0, 90: 14.0, 91: 13.0, 92: 16.0, 

Training Progress:  20%|██        | 20/100 [03:30<13:42, 10.28s/it]

Avg reward: 15.75  std: 7.222707248670681  coeff : 0.4585845872171861
Avg reward per agent, per game:  {1: 28.0, 2: 4.0, 3: 25.0, 4: 22.0, 5: 28.0, 6: 27.0, 7: 10.0, 8: 16.0, 9: 21.0, 10: 17.0, 11: 30.0, 12: 17.0, 13: 13.0, 14: 17.0, 15: 21.0, 16: 17.0, 17: 16.0, 18: 11.0, 19: 34.0, 20: 13.0, 21: 11.0, 22: 21.0, 23: 11.0, 24: 14.0, 25: 19.0, 26: 9.0, 27: 13.0, 28: 11.0, 29: 13.0, 30: 13.0, 31: 16.0, 32: 7.0, 33: 8.0, 34: 7.0, 35: 15.0, 36: 18.0, 37: 21.0, 38: 18.0, 39: 23.0, 40: 24.0, 41: 18.0, 42: 21.0, 43: 9.0, 44: 33.0, 45: 14.0, 46: 17.0, 47: 12.0, 48: 11.0, 49: 5.0, 50: 22.0, 51: 32.0, 52: 17.0, 53: 22.0, 54: 10.0, 55: 10.0, 56: 7.0, 57: 7.0, 58: 31.0, 59: 10.0, 60: 5.0, 61: 10.0, 62: 12.0, 63: 22.0, 64: 6.0, 65: 6.0, 66: 28.0, 67: 9.0, 68: 4.0, 69: 6.0, 70: 11.0, 71: 23.0, 72: 7.0, 73: 17.0, 74: 13.0, 75: 14.0, 76: 24.0, 77: 9.0, 78: 10.0, 79: 12.0, 80: 12.0, 81: 14.0, 82: 9.0, 83: 15.0, 84: 19.0, 85: 17.0, 86: 21.0, 87: 18.0, 88: 12.0, 89: 22.0, 90: 14.0, 91: 30.0, 92: 13.0, 93:

Training Progress:  21%|██        | 21/100 [03:39<13:20, 10.13s/it]

Avg reward: 17.16  std: 7.767522127422619  coeff : 0.4526528046283577
Avg reward per agent, per game:  {1: 24.0, 2: 7.0, 3: 31.0, 4: 13.0, 5: 22.0, 6: 22.0, 7: 15.0, 8: 25.0, 9: 17.0, 10: 24.0, 11: 11.0, 12: 27.0, 13: 39.0, 14: 15.0, 15: 13.0, 16: 15.0, 17: 14.0, 18: 18.0, 19: 38.0, 20: 10.0, 21: 9.0, 22: 20.0, 23: 14.0, 24: 11.0, 25: 12.0, 26: 13.0, 27: 24.0, 28: 11.0, 29: 25.0, 30: 8.0, 31: 25.0, 32: 17.0, 33: 16.0, 34: 20.0, 35: 12.0, 36: 9.0, 37: 9.0, 38: 11.0, 39: 25.0, 40: 27.0, 41: 12.0, 42: 26.0, 43: 16.0, 44: 20.0, 45: 10.0, 46: 21.0, 47: 10.0, 48: 11.0, 49: 8.0, 50: 16.0, 51: 33.0, 52: 5.0, 53: 35.0, 54: 18.0, 55: 12.0, 56: 7.0, 57: 4.0, 58: 21.0, 59: 25.0, 60: 14.0, 61: 11.0, 62: 14.0, 63: 31.0, 64: 12.0, 65: 11.0, 66: 21.0, 67: 10.0, 68: 3.0, 69: 24.0, 70: 9.0, 71: 31.0, 72: 8.0, 73: 21.0, 74: 16.0, 75: 11.0, 76: 24.0, 77: 12.0, 78: 32.0, 79: 13.0, 80: 12.0, 81: 13.0, 82: 15.0, 83: 29.0, 84: 17.0, 85: 5.0, 86: 19.0, 87: 16.0, 88: 22.0, 89: 20.0, 90: 14.0, 91: 13.0, 92: 25.0

Training Progress:  22%|██▏       | 22/100 [03:49<12:48,  9.85s/it]

Avg reward: 16.25  std: 7.875753932164209  coeff : 0.48466178044087443
Avg reward per agent, per game:  {1: 21.0, 2: 9.0, 3: 38.0, 4: 17.0, 5: 35.0, 6: 19.0, 7: 15.0, 8: 20.0, 9: 18.0, 10: 19.0, 11: 9.0, 12: 30.0, 13: 8.0, 14: 17.0, 15: 20.0, 16: 19.0, 17: 18.0, 18: 11.0, 19: 24.0, 20: 25.0, 21: 5.0, 22: 25.0, 23: 25.0, 24: 17.0, 25: 14.0, 26: 9.0, 27: 20.0, 28: 18.0, 29: 32.0, 30: 7.0, 31: 12.0, 32: 17.0, 33: 18.0, 34: 12.0, 35: 14.0, 36: 17.0, 37: 27.0, 38: 17.0, 39: 33.0, 40: 21.0, 41: 14.0, 42: 25.0, 43: 16.0, 44: 13.0, 45: 15.0, 46: 12.0, 47: 12.0, 48: 14.0, 49: 6.0, 50: 8.0, 51: 21.0, 52: 12.0, 53: 17.0, 54: 9.0, 55: 11.0, 56: 6.0, 57: 7.0, 58: 17.0, 59: 11.0, 60: 10.0, 61: 10.0, 62: 5.0, 63: 36.0, 64: 8.0, 65: 23.0, 66: 16.0, 67: 14.0, 68: 13.0, 69: 3.0, 70: 10.0, 71: 30.0, 72: 7.0, 73: 21.0, 74: 10.0, 75: 16.0, 76: 35.0, 77: 15.0, 78: 21.0, 79: 12.0, 80: 10.0, 81: 16.0, 82: 9.0, 83: 15.0, 84: 13.0, 85: 4.0, 86: 24.0, 87: 35.0, 88: 13.0, 89: 16.0, 90: 12.0, 91: 17.0, 92: 12.0, 9

Training Progress:  23%|██▎       | 23/100 [03:58<12:28,  9.71s/it]

Avg reward: 16.16  std: 8.040795980498448  coeff : 0.49757400869421087
Avg reward per agent, per game:  {1: 27.0, 2: 9.0, 3: 32.0, 4: 15.0, 5: 13.0, 6: 13.0, 7: 13.0, 8: 17.0, 9: 15.0, 10: 14.0, 11: 9.0, 12: 17.0, 13: 13.0, 14: 15.0, 15: 16.0, 16: 12.0, 17: 17.0, 18: 17.0, 19: 34.0, 20: 26.0, 21: 6.0, 22: 26.0, 23: 22.0, 24: 11.0, 25: 14.0, 26: 9.0, 27: 28.0, 28: 17.0, 29: 20.0, 30: 7.0, 31: 12.0, 32: 16.0, 33: 7.0, 34: 21.0, 35: 10.0, 36: 15.0, 37: 21.0, 38: 19.0, 39: 19.0, 40: 20.0, 41: 8.0, 42: 16.0, 43: 13.0, 44: 32.0, 45: 13.0, 46: 34.0, 47: 31.0, 48: 15.0, 49: 13.0, 50: 10.0, 51: 15.0, 52: 6.0, 53: 21.0, 54: 11.0, 55: 7.0, 56: 12.0, 57: 9.0, 58: 14.0, 59: 13.0, 60: 15.0, 61: 7.0, 62: 12.0, 63: 36.0, 64: 7.0, 65: 15.0, 66: 19.0, 67: 10.0, 68: 8.0, 69: 10.0, 70: 2.0, 71: 16.0, 72: 10.0, 73: 12.0, 74: 16.0, 75: 18.0, 76: 34.0, 77: 6.0, 78: 10.0, 79: 15.0, 80: 7.0, 81: 14.0, 82: 11.0, 83: 18.0, 84: 16.0, 85: 10.0, 86: 36.0, 87: 25.0, 88: 21.0, 89: 22.0, 90: 8.0, 91: 20.0, 92: 44.0, 9

Training Progress:  24%|██▍       | 24/100 [04:08<12:19,  9.74s/it]

Avg reward: 16.59  std: 8.156095879769929  coeff : 0.49162723808137004
Avg reward per agent, per game:  {1: 12.0, 2: 8.0, 3: 20.0, 4: 32.0, 5: 42.0, 6: 20.0, 7: 18.0, 8: 28.0, 9: 16.0, 10: 23.0, 11: 13.0, 12: 32.0, 13: 16.0, 14: 18.0, 15: 20.0, 16: 19.0, 17: 16.0, 18: 16.0, 19: 26.0, 20: 36.0, 21: 11.0, 22: 25.0, 23: 31.0, 24: 12.0, 25: 19.0, 26: 12.0, 27: 25.0, 28: 14.0, 29: 18.0, 30: 11.0, 31: 7.0, 32: 23.0, 33: 14.0, 34: 13.0, 35: 17.0, 36: 6.0, 37: 5.0, 38: 15.0, 39: 33.0, 40: 17.0, 41: 17.0, 42: 25.0, 43: 13.0, 44: 22.0, 45: 20.0, 46: 30.0, 47: 11.0, 48: 20.0, 49: 7.0, 50: 14.0, 51: 33.0, 52: 4.0, 53: 9.0, 54: 6.0, 55: 13.0, 56: 14.0, 57: 8.0, 58: 22.0, 59: 24.0, 60: 20.0, 61: 13.0, 62: 16.0, 63: 37.0, 64: 9.0, 65: 13.0, 66: 13.0, 67: 12.0, 68: 4.0, 69: 11.0, 70: 5.0, 71: 25.0, 72: 13.0, 73: 14.0, 74: 6.0, 75: 15.0, 76: 19.0, 77: 24.0, 78: 20.0, 79: 11.0, 80: 5.0, 81: 10.0, 82: 5.0, 83: 20.0, 84: 16.0, 85: 22.0, 86: 14.0, 87: 7.0, 88: 13.0, 89: 17.0, 90: 10.0, 91: 7.0, 92: 26.0, 9

Training Progress:  25%|██▌       | 25/100 [04:17<12:07,  9.70s/it]

Avg reward: 16.07  std: 7.454200158300017  coeff : 0.46385813057249636
Avg reward per agent, per game:  {1: 14.0, 2: 16.0, 3: 22.0, 4: 22.0, 5: 21.0, 6: 19.0, 7: 15.0, 8: 22.0, 9: 14.0, 10: 24.0, 11: 11.0, 12: 21.0, 13: 12.0, 14: 19.0, 15: 11.0, 16: 23.0, 17: 17.0, 18: 15.0, 19: 32.0, 20: 11.0, 21: 7.0, 22: 21.0, 23: 19.0, 24: 14.0, 25: 12.0, 26: 8.0, 27: 19.0, 28: 17.0, 29: 14.0, 30: 6.0, 31: 16.0, 32: 19.0, 33: 12.0, 34: 15.0, 35: 12.0, 36: 15.0, 37: 12.0, 38: 17.0, 39: 18.0, 40: 26.0, 41: 7.0, 42: 10.0, 43: 17.0, 44: 16.0, 45: 13.0, 46: 32.0, 47: 26.0, 48: 35.0, 49: 7.0, 50: 20.0, 51: 41.0, 52: 10.0, 53: 20.0, 54: 6.0, 55: 7.0, 56: 13.0, 57: 12.0, 58: 17.0, 59: 12.0, 60: 17.0, 61: 7.0, 62: 10.0, 63: 22.0, 64: 5.0, 65: 17.0, 66: 22.0, 67: 10.0, 68: 9.0, 69: 6.0, 70: 4.0, 71: 22.0, 72: 13.0, 73: 22.0, 74: 12.0, 75: 21.0, 76: 40.0, 77: 17.0, 78: 20.0, 79: 7.0, 80: 14.0, 81: 21.0, 82: 16.0, 83: 20.0, 84: 11.0, 85: 10.0, 86: 13.0, 87: 12.0, 88: 14.0, 89: 27.0, 90: 18.0, 91: 8.0, 92: 15.0

Training Progress:  26%|██▌       | 26/100 [04:27<12:06,  9.82s/it]

Avg reward: 16.48  std: 8.159019548940915  coeff : 0.49508613767845355
Avg reward per agent, per game:  {1: 5.0, 2: 6.0, 3: 18.0, 4: 35.0, 5: 35.0, 6: 27.0, 7: 14.0, 8: 26.0, 9: 18.0, 10: 8.0, 11: 7.0, 12: 20.0, 13: 15.0, 14: 20.0, 15: 14.0, 16: 22.0, 17: 18.0, 18: 15.0, 19: 39.0, 20: 28.0, 21: 11.0, 22: 35.0, 23: 16.0, 24: 16.0, 25: 25.0, 26: 15.0, 27: 35.0, 28: 24.0, 29: 24.0, 30: 12.0, 31: 15.0, 32: 20.0, 33: 10.0, 34: 15.0, 35: 9.0, 36: 12.0, 37: 11.0, 38: 13.0, 39: 18.0, 40: 31.0, 41: 28.0, 42: 16.0, 43: 19.0, 44: 16.0, 45: 15.0, 46: 26.0, 47: 21.0, 48: 10.0, 49: 7.0, 50: 8.0, 51: 21.0, 52: 14.0, 53: 23.0, 54: 10.0, 55: 7.0, 56: 5.0, 57: 2.0, 58: 34.0, 59: 12.0, 60: 9.0, 61: 6.0, 62: 12.0, 63: 14.0, 64: 10.0, 65: 6.0, 66: 22.0, 67: 12.0, 68: 6.0, 69: 6.0, 70: 6.0, 71: 25.0, 72: 12.0, 73: 20.0, 74: 21.0, 75: 12.0, 76: 28.0, 77: 8.0, 78: 25.0, 79: 10.0, 80: 17.0, 81: 19.0, 82: 11.0, 83: 10.0, 84: 9.0, 85: 11.0, 86: 17.0, 87: 29.0, 88: 23.0, 89: 24.0, 90: 25.0, 91: 9.0, 92: 18.0, 93:

Training Progress:  27%|██▋       | 27/100 [04:37<11:55,  9.80s/it]

Avg reward: 16.63  std: 7.297472165071958  coeff : 0.4388137200885122
Avg reward per agent, per game:  {1: 17.0, 2: 11.0, 3: 22.0, 4: 16.0, 5: 34.0, 6: 6.0, 7: 21.0, 8: 18.0, 9: 23.0, 10: 20.0, 11: 6.0, 12: 22.0, 13: 17.0, 14: 20.0, 15: 23.0, 16: 27.0, 17: 18.0, 18: 13.0, 19: 31.0, 20: 29.0, 21: 10.0, 22: 20.0, 23: 18.0, 24: 9.0, 25: 13.0, 26: 14.0, 27: 18.0, 28: 20.0, 29: 34.0, 30: 12.0, 31: 17.0, 32: 16.0, 33: 8.0, 34: 15.0, 35: 9.0, 36: 26.0, 37: 10.0, 38: 6.0, 39: 15.0, 40: 11.0, 41: 30.0, 42: 9.0, 43: 20.0, 44: 22.0, 45: 18.0, 46: 22.0, 47: 17.0, 48: 8.0, 49: 12.0, 50: 9.0, 51: 42.0, 52: 19.0, 53: 22.0, 54: 7.0, 55: 8.0, 56: 7.0, 57: 9.0, 58: 28.0, 59: 28.0, 60: 14.0, 61: 8.0, 62: 8.0, 63: 24.0, 64: 13.0, 65: 14.0, 66: 28.0, 67: 16.0, 68: 12.0, 69: 19.0, 70: 3.0, 71: 20.0, 72: 18.0, 73: 14.0, 74: 19.0, 75: 16.0, 76: 25.0, 77: 8.0, 78: 21.0, 79: 9.0, 80: 9.0, 81: 18.0, 82: 6.0, 83: 9.0, 84: 19.0, 85: 11.0, 86: 13.0, 87: 25.0, 88: 18.0, 89: 23.0, 90: 25.0, 91: 13.0, 92: 18.0, 93: 10

Training Progress:  28%|██▊       | 28/100 [04:47<11:47,  9.82s/it]

Avg reward: 16.12  std: 7.725645604090314  coeff : 0.4792584121644115
Avg reward per agent, per game:  {1: 14.0, 2: 8.0, 3: 19.0, 4: 18.0, 5: 21.0, 6: 21.0, 7: 21.0, 8: 18.0, 9: 16.0, 10: 21.0, 11: 14.0, 12: 18.0, 13: 13.0, 14: 12.0, 15: 11.0, 16: 13.0, 17: 16.0, 18: 10.0, 19: 46.0, 20: 15.0, 21: 15.0, 22: 22.0, 23: 16.0, 24: 14.0, 25: 10.0, 26: 15.0, 27: 16.0, 28: 17.0, 29: 33.0, 30: 7.0, 31: 32.0, 32: 12.0, 33: 8.0, 34: 17.0, 35: 12.0, 36: 9.0, 37: 18.0, 38: 11.0, 39: 28.0, 40: 25.0, 41: 29.0, 42: 14.0, 43: 10.0, 44: 21.0, 45: 13.0, 46: 17.0, 47: 14.0, 48: 17.0, 49: 5.0, 50: 12.0, 51: 31.0, 52: 8.0, 53: 17.0, 54: 6.0, 55: 6.0, 56: 6.0, 57: 10.0, 58: 42.0, 59: 11.0, 60: 15.0, 61: 8.0, 62: 17.0, 63: 26.0, 64: 5.0, 65: 14.0, 66: 18.0, 67: 5.0, 68: 13.0, 69: 7.0, 70: 9.0, 71: 26.0, 72: 7.0, 73: 17.0, 74: 16.0, 75: 15.0, 76: 17.0, 77: 12.0, 78: 35.0, 79: 7.0, 80: 13.0, 81: 16.0, 82: 5.0, 83: 18.0, 84: 14.0, 85: 20.0, 86: 16.0, 87: 33.0, 88: 21.0, 89: 27.0, 90: 12.0, 91: 19.0, 92: 15.0, 93

Training Progress:  29%|██▉       | 29/100 [04:57<11:37,  9.83s/it]

Avg reward: 15.52  std: 7.13089054747021  coeff : 0.4594645971308125
Avg reward per agent, per game:  {1: 17.0, 2: 10.0, 3: 22.0, 4: 15.0, 5: 28.0, 6: 22.0, 7: 24.0, 8: 24.0, 9: 12.0, 10: 12.0, 11: 4.0, 12: 10.0, 13: 15.0, 14: 17.0, 15: 16.0, 16: 12.0, 17: 19.0, 18: 15.0, 19: 24.0, 20: 25.0, 21: 16.0, 22: 14.0, 23: 23.0, 24: 19.0, 25: 6.0, 26: 24.0, 27: 13.0, 28: 19.0, 29: 28.0, 30: 7.0, 31: 12.0, 32: 20.0, 33: 7.0, 34: 11.0, 35: 19.0, 36: 5.0, 37: 19.0, 38: 18.0, 39: 6.0, 40: 24.0, 41: 25.0, 42: 11.0, 43: 21.0, 44: 13.0, 45: 16.0, 46: 19.0, 47: 16.0, 48: 16.0, 49: 7.0, 50: 17.0, 51: 29.0, 52: 5.0, 53: 19.0, 54: 7.0, 55: 4.0, 56: 7.0, 57: 2.0, 58: 17.0, 59: 17.0, 60: 10.0, 61: 12.0, 62: 11.0, 63: 24.0, 64: 14.0, 65: 17.0, 66: 21.0, 67: 17.0, 68: 5.0, 69: 11.0, 70: 2.0, 71: 24.0, 72: 7.0, 73: 18.0, 74: 21.0, 75: 13.0, 76: 30.0, 77: 16.0, 78: 22.0, 79: 8.0, 80: 8.0, 81: 11.0, 82: 5.0, 83: 14.0, 84: 9.0, 85: 18.0, 86: 28.0, 87: 31.0, 88: 7.0, 89: 29.0, 90: 8.0, 91: 11.0, 92: 24.0, 93: 11.

Training Progress:  30%|███       | 30/100 [05:07<11:30,  9.87s/it]

Avg reward: 16.58  std: 6.911121471946505  coeff : 0.4168348294298254
Avg reward per agent, per game:  {1: 17.0, 2: 15.0, 3: 25.0, 4: 18.0, 5: 32.0, 6: 16.0, 7: 9.0, 8: 34.0, 9: 19.0, 10: 26.0, 11: 20.0, 12: 6.0, 13: 25.0, 14: 18.0, 15: 21.0, 16: 24.0, 17: 16.0, 18: 18.0, 19: 36.0, 20: 27.0, 21: 8.0, 22: 21.0, 23: 17.0, 24: 16.0, 25: 10.0, 26: 10.0, 27: 13.0, 28: 9.0, 29: 20.0, 30: 13.0, 31: 14.0, 32: 19.0, 33: 5.0, 34: 12.0, 35: 17.0, 36: 14.0, 37: 13.0, 38: 17.0, 39: 15.0, 40: 37.0, 41: 21.0, 42: 12.0, 43: 12.0, 44: 19.0, 45: 13.0, 46: 23.0, 47: 16.0, 48: 15.0, 49: 10.0, 50: 9.0, 51: 16.0, 52: 12.0, 53: 23.0, 54: 7.0, 55: 6.0, 56: 8.0, 57: 10.0, 58: 27.0, 59: 21.0, 60: 15.0, 61: 13.0, 62: 9.0, 63: 24.0, 64: 15.0, 65: 20.0, 66: 32.0, 67: 13.0, 68: 5.0, 69: 14.0, 70: 7.0, 71: 19.0, 72: 8.0, 73: 16.0, 74: 19.0, 75: 20.0, 76: 23.0, 77: 18.0, 78: 24.0, 79: 12.0, 80: 16.0, 81: 12.0, 82: 7.0, 83: 8.0, 84: 30.0, 85: 15.0, 86: 16.0, 87: 15.0, 88: 9.0, 89: 25.0, 90: 12.0, 91: 11.0, 92: 23.0, 9

Training Progress:  31%|███       | 31/100 [05:17<11:29,  9.99s/it]

Avg reward: 16.09  std: 7.575084157948346  coeff : 0.4707945405810035
Avg reward per agent, per game:  {1: 5.0, 2: 14.0, 3: 38.0, 4: 23.0, 5: 41.0, 6: 15.0, 7: 16.0, 8: 14.0, 9: 16.0, 10: 19.0, 11: 10.0, 12: 13.0, 13: 10.0, 14: 22.0, 15: 13.0, 16: 16.0, 17: 17.0, 18: 19.0, 19: 32.0, 20: 11.0, 21: 6.0, 22: 26.0, 23: 21.0, 24: 14.0, 25: 23.0, 26: 11.0, 27: 18.0, 28: 14.0, 29: 21.0, 30: 10.0, 31: 15.0, 32: 25.0, 33: 15.0, 34: 17.0, 35: 10.0, 36: 9.0, 37: 7.0, 38: 10.0, 39: 19.0, 40: 34.0, 41: 15.0, 42: 21.0, 43: 19.0, 44: 22.0, 45: 16.0, 46: 14.0, 47: 21.0, 48: 8.0, 49: 7.0, 50: 13.0, 51: 32.0, 52: 11.0, 53: 24.0, 54: 10.0, 55: 4.0, 56: 8.0, 57: 2.0, 58: 17.0, 59: 21.0, 60: 9.0, 61: 9.0, 62: 13.0, 63: 20.0, 64: 10.0, 65: 10.0, 66: 21.0, 67: 7.0, 68: 14.0, 69: 21.0, 70: 2.0, 71: 12.0, 72: 9.0, 73: 13.0, 74: 14.0, 75: 20.0, 76: 29.0, 77: 6.0, 78: 17.0, 79: 14.0, 80: 10.0, 81: 17.0, 82: 11.0, 83: 11.0, 84: 19.0, 85: 15.0, 86: 15.0, 87: 10.0, 88: 14.0, 89: 28.0, 90: 15.0, 91: 18.0, 92: 32.0, 

Training Progress:  32%|███▏      | 32/100 [05:28<11:28, 10.13s/it]

Avg reward: 15.67  std: 7.134500683299428  coeff : 0.45529678897890413
Avg reward per agent, per game:  {1: 27.0, 2: 10.0, 3: 19.0, 4: 11.0, 5: 30.0, 6: 12.0, 7: 16.0, 8: 22.0, 9: 16.0, 10: 4.0, 11: 21.0, 12: 15.0, 13: 12.0, 14: 23.0, 15: 14.0, 16: 10.0, 17: 18.0, 18: 18.0, 19: 23.0, 20: 28.0, 21: 9.0, 22: 16.0, 23: 23.0, 24: 9.0, 25: 11.0, 26: 13.0, 27: 25.0, 28: 9.0, 29: 18.0, 30: 14.0, 31: 18.0, 32: 22.0, 33: 16.0, 34: 14.0, 35: 10.0, 36: 9.0, 37: 21.0, 38: 15.0, 39: 30.0, 40: 26.0, 41: 12.0, 42: 14.0, 43: 11.0, 44: 23.0, 45: 17.0, 46: 17.0, 47: 14.0, 48: 10.0, 49: 21.0, 50: 9.0, 51: 22.0, 52: 10.0, 53: 14.0, 54: 5.0, 55: 4.0, 56: 7.0, 57: 4.0, 58: 26.0, 59: 13.0, 60: 7.0, 61: 9.0, 62: 20.0, 63: 30.0, 64: 9.0, 65: 11.0, 66: 24.0, 67: 9.0, 68: 3.0, 69: 25.0, 70: 32.0, 71: 19.0, 72: 13.0, 73: 14.0, 74: 17.0, 75: 13.0, 76: 8.0, 77: 15.0, 78: 23.0, 79: 10.0, 80: 11.0, 81: 19.0, 82: 9.0, 83: 16.0, 84: 19.0, 85: 14.0, 86: 24.0, 87: 10.0, 88: 14.0, 89: 25.0, 90: 18.0, 91: 21.0, 92: 10.0, 9

Training Progress:  33%|███▎      | 33/100 [05:38<11:30, 10.30s/it]

Avg reward: 16.95  std: 7.502499583472165  coeff : 0.44262534415764987
Avg reward per agent, per game:  {1: 20.0, 2: 12.0, 3: 27.0, 4: 24.0, 5: 32.0, 6: 19.0, 7: 12.0, 8: 24.0, 9: 9.0, 10: 18.0, 11: 14.0, 12: 20.0, 13: 7.0, 14: 10.0, 15: 30.0, 16: 16.0, 17: 15.0, 18: 11.0, 19: 27.0, 20: 24.0, 21: 9.0, 22: 25.0, 23: 24.0, 24: 15.0, 25: 13.0, 26: 13.0, 27: 12.0, 28: 18.0, 29: 21.0, 30: 12.0, 31: 10.0, 32: 20.0, 33: 16.0, 34: 3.0, 35: 6.0, 36: 17.0, 37: 20.0, 38: 17.0, 39: 18.0, 40: 31.0, 41: 32.0, 42: 16.0, 43: 9.0, 44: 14.0, 45: 14.0, 46: 21.0, 47: 16.0, 48: 13.0, 49: 13.0, 50: 16.0, 51: 34.0, 52: 10.0, 53: 21.0, 54: 6.0, 55: 10.0, 56: 5.0, 57: 7.0, 58: 28.0, 59: 15.0, 60: 6.0, 61: 6.0, 62: 13.0, 63: 29.0, 64: 13.0, 65: 15.0, 66: 30.0, 67: 26.0, 68: 8.0, 69: 35.0, 70: 18.0, 71: 17.0, 72: 7.0, 73: 24.0, 74: 12.0, 75: 15.0, 76: 23.0, 77: 13.0, 78: 25.0, 79: 13.0, 80: 14.0, 81: 3.0, 82: 27.0, 83: 20.0, 84: 20.0, 85: 16.0, 86: 32.0, 87: 15.0, 88: 23.0, 89: 22.0, 90: 14.0, 91: 20.0, 92: 21.0

Training Progress:  34%|███▍      | 34/100 [05:48<11:13, 10.21s/it]

Avg reward: 16.13  std: 8.354226475263884  coeff : 0.5179309656084243
Avg reward per agent, per game:  {1: 24.0, 2: 7.0, 3: 20.0, 4: 23.0, 5: 39.0, 6: 24.0, 7: 16.0, 8: 27.0, 9: 9.0, 10: 14.0, 11: 18.0, 12: 18.0, 13: 10.0, 14: 18.0, 15: 19.0, 16: 17.0, 17: 10.0, 18: 11.0, 19: 33.0, 20: 37.0, 21: 8.0, 22: 20.0, 23: 24.0, 24: 14.0, 25: 29.0, 26: 6.0, 27: 18.0, 28: 10.0, 29: 33.0, 30: 10.0, 31: 24.0, 32: 21.0, 33: 16.0, 34: 8.0, 35: 11.0, 36: 6.0, 37: 3.0, 38: 15.0, 39: 20.0, 40: 15.0, 41: 21.0, 42: 20.0, 43: 11.0, 44: 13.0, 45: 12.0, 46: 35.0, 47: 19.0, 48: 10.0, 49: 4.0, 50: 9.0, 51: 34.0, 52: 8.0, 53: 27.0, 54: 10.0, 55: 7.0, 56: 11.0, 57: 2.0, 58: 19.0, 59: 11.0, 60: 6.0, 61: 12.0, 62: 10.0, 63: 32.0, 64: 6.0, 65: 10.0, 66: 20.0, 67: 13.0, 68: 4.0, 69: 30.0, 70: 18.0, 71: 22.0, 72: 5.0, 73: 19.0, 74: 9.0, 75: 14.0, 76: 25.0, 77: 17.0, 78: 25.0, 79: 6.0, 80: 20.0, 81: 13.0, 82: 7.0, 83: 17.0, 84: 13.0, 85: 21.0, 86: 17.0, 87: 32.0, 88: 17.0, 89: 14.0, 90: 13.0, 91: 8.0, 92: 12.0, 93: 1

Training Progress:  35%|███▌      | 35/100 [05:59<11:03, 10.21s/it]

Avg reward: 16.45  std: 8.584142356694697  coeff : 0.5218323621091001
Avg reward per agent, per game:  {1: 15.0, 2: 6.0, 3: 30.0, 4: 30.0, 5: 36.0, 6: 24.0, 7: 22.0, 8: 30.0, 9: 22.0, 10: 13.0, 11: 11.0, 12: 17.0, 13: 22.0, 14: 19.0, 15: 16.0, 16: 19.0, 17: 10.0, 18: 19.0, 19: 21.0, 20: 21.0, 21: 7.0, 22: 24.0, 23: 14.0, 24: 13.0, 25: 18.0, 26: 16.0, 27: 13.0, 28: 11.0, 29: 27.0, 30: 3.0, 31: 24.0, 32: 25.0, 33: 9.0, 34: 6.0, 35: 6.0, 36: 26.0, 37: 29.0, 38: 3.0, 39: 28.0, 40: 11.0, 41: 19.0, 42: 24.0, 43: 10.0, 44: 22.0, 45: 15.0, 46: 26.0, 47: 15.0, 48: 6.0, 49: 3.0, 50: 20.0, 51: 25.0, 52: 13.0, 53: 18.0, 54: 10.0, 55: 6.0, 56: 9.0, 57: 5.0, 58: 24.0, 59: 12.0, 60: 7.0, 61: 25.0, 62: 21.0, 63: 40.0, 64: 7.0, 65: 6.0, 66: 14.0, 67: 18.0, 68: 4.0, 69: 7.0, 70: 6.0, 71: 23.0, 72: 8.0, 73: 13.0, 74: 18.0, 75: 12.0, 76: 30.0, 77: 19.0, 78: 17.0, 79: 6.0, 80: 17.0, 81: 14.0, 82: 5.0, 83: 16.0, 84: 19.0, 85: 2.0, 86: 19.0, 87: 40.0, 88: 10.0, 89: 28.0, 90: 15.0, 91: 7.0, 92: 19.0, 93: 22.0

Training Progress:  36%|███▌      | 36/100 [06:09<10:56, 10.26s/it]

Avg reward: 15.85  std: 8.229671925417197  coeff : 0.5192222034963532
Avg reward per agent, per game:  {1: 26.0, 2: 13.0, 3: 22.0, 4: 10.0, 5: 40.0, 6: 19.0, 7: 9.0, 8: 20.0, 9: 6.0, 10: 15.0, 11: 13.0, 12: 16.0, 13: 27.0, 14: 20.0, 15: 34.0, 16: 17.0, 17: 14.0, 18: 14.0, 19: 42.0, 20: 13.0, 21: 6.0, 22: 26.0, 23: 22.0, 24: 16.0, 25: 8.0, 26: 4.0, 27: 14.0, 28: 8.0, 29: 12.0, 30: 11.0, 31: 14.0, 32: 24.0, 33: 12.0, 34: 17.0, 35: 9.0, 36: 5.0, 37: 4.0, 38: 15.0, 39: 17.0, 40: 22.0, 41: 9.0, 42: 10.0, 43: 14.0, 44: 14.0, 45: 12.0, 46: 19.0, 47: 11.0, 48: 16.0, 49: 5.0, 50: 14.0, 51: 29.0, 52: 10.0, 53: 23.0, 54: 9.0, 55: 10.0, 56: 19.0, 57: 7.0, 58: 22.0, 59: 20.0, 60: 12.0, 61: 17.0, 62: 17.0, 63: 28.0, 64: 14.0, 65: 4.0, 66: 24.0, 67: 19.0, 68: 9.0, 69: 31.0, 70: 29.0, 71: 19.0, 72: 8.0, 73: 11.0, 74: 9.0, 75: 20.0, 76: 30.0, 77: 3.0, 78: 25.0, 79: 7.0, 80: 8.0, 81: 7.0, 82: 8.0, 83: 10.0, 84: 19.0, 85: 16.0, 86: 28.0, 87: 20.0, 88: 6.0, 89: 33.0, 90: 17.0, 91: 19.0, 92: 18.0, 93: 10.0

Training Progress:  37%|███▋      | 37/100 [06:19<10:52, 10.35s/it]

Avg reward: 15.97  std: 7.128050224289949  coeff : 0.4463400265679367
Avg reward per agent, per game:  {1: 21.0, 2: 10.0, 3: 23.0, 4: 18.0, 5: 39.0, 6: 16.0, 7: 23.0, 8: 25.0, 9: 14.0, 10: 19.0, 11: 15.0, 12: 12.0, 13: 32.0, 14: 26.0, 15: 16.0, 16: 16.0, 17: 7.0, 18: 19.0, 19: 35.0, 20: 24.0, 21: 9.0, 22: 18.0, 23: 16.0, 24: 16.0, 25: 23.0, 26: 11.0, 27: 25.0, 28: 19.0, 29: 25.0, 30: 11.0, 31: 16.0, 32: 12.0, 33: 15.0, 34: 14.0, 35: 14.0, 36: 15.0, 37: 19.0, 38: 12.0, 39: 12.0, 40: 21.0, 41: 15.0, 42: 27.0, 43: 7.0, 44: 23.0, 45: 20.0, 46: 20.0, 47: 13.0, 48: 8.0, 49: 9.0, 50: 8.0, 51: 20.0, 52: 17.0, 53: 29.0, 54: 8.0, 55: 9.0, 56: 2.0, 57: 2.0, 58: 18.0, 59: 22.0, 60: 7.0, 61: 14.0, 62: 14.0, 63: 35.0, 64: 9.0, 65: 11.0, 66: 16.0, 67: 12.0, 68: 10.0, 69: 14.0, 70: 4.0, 71: 14.0, 72: 11.0, 73: 10.0, 74: 17.0, 75: 10.0, 76: 26.0, 77: 10.0, 78: 19.0, 79: 7.0, 80: 14.0, 81: 17.0, 82: 6.0, 83: 19.0, 84: 9.0, 85: 16.0, 86: 29.0, 87: 18.0, 88: 11.0, 89: 18.0, 90: 13.0, 91: 18.0, 92: 14.0, 9

Training Progress:  38%|███▊      | 38/100 [06:30<10:44, 10.39s/it]

Avg reward: 16.22  std: 8.197048249217518  coeff : 0.505366723133016
Avg reward per agent, per game:  {1: 27.0, 2: 13.0, 3: 21.0, 4: 27.0, 5: 26.0, 6: 12.0, 7: 25.0, 8: 36.0, 9: 6.0, 10: 10.0, 11: 25.0, 12: 13.0, 13: 28.0, 14: 30.0, 15: 36.0, 16: 12.0, 17: 14.0, 18: 17.0, 19: 40.0, 20: 26.0, 21: 10.0, 22: 29.0, 23: 20.0, 24: 7.0, 25: 12.0, 26: 9.0, 27: 21.0, 28: 14.0, 29: 18.0, 30: 13.0, 31: 10.0, 32: 14.0, 33: 28.0, 34: 7.0, 35: 13.0, 36: 21.0, 37: 6.0, 38: 18.0, 39: 20.0, 40: 14.0, 41: 14.0, 42: 13.0, 43: 12.0, 44: 23.0, 45: 8.0, 46: 25.0, 47: 10.0, 48: 12.0, 49: 11.0, 50: 10.0, 51: 26.0, 52: 13.0, 53: 28.0, 54: 16.0, 55: 11.0, 56: 3.0, 57: 5.0, 58: 24.0, 59: 18.0, 60: 6.0, 61: 17.0, 62: 18.0, 63: 32.0, 64: 24.0, 65: 5.0, 66: 17.0, 67: 8.0, 68: 4.0, 69: 20.0, 70: 5.0, 71: 22.0, 72: 2.0, 73: 13.0, 74: 12.0, 75: 22.0, 76: 24.0, 77: 5.0, 78: 23.0, 79: 13.0, 80: 9.0, 81: 15.0, 82: 4.0, 83: 23.0, 84: 22.0, 85: 20.0, 86: 15.0, 87: 7.0, 88: 8.0, 89: 26.0, 90: 9.0, 91: 13.0, 92: 20.0, 93: 19

Training Progress:  39%|███▉      | 39/100 [06:41<10:38, 10.47s/it]

Avg reward: 16.76  std: 8.61988398993861  coeff : 0.5143128872278407
Avg reward per agent, per game:  {1: 25.0, 2: 7.0, 3: 17.0, 4: 24.0, 5: 32.0, 6: 28.0, 7: 10.0, 8: 25.0, 9: 23.0, 10: 11.0, 11: 12.0, 12: 20.0, 13: 13.0, 14: 23.0, 15: 22.0, 16: 19.0, 17: 16.0, 18: 9.0, 19: 43.0, 20: 29.0, 21: 10.0, 22: 28.0, 23: 33.0, 24: 17.0, 25: 16.0, 26: 12.0, 27: 19.0, 28: 14.0, 29: 21.0, 30: 16.0, 31: 12.0, 32: 8.0, 33: 19.0, 34: 16.0, 35: 16.0, 36: 12.0, 37: 7.0, 38: 2.0, 39: 23.0, 40: 10.0, 41: 22.0, 42: 22.0, 43: 20.0, 44: 34.0, 45: 15.0, 46: 21.0, 47: 31.0, 48: 9.0, 49: 18.0, 50: 11.0, 51: 36.0, 52: 12.0, 53: 25.0, 54: 15.0, 55: 10.0, 56: 8.0, 57: 7.0, 58: 13.0, 59: 30.0, 60: 4.0, 61: 10.0, 62: 9.0, 63: 34.0, 64: 7.0, 65: 6.0, 66: 18.0, 67: 13.0, 68: 6.0, 69: 31.0, 70: 10.0, 71: 23.0, 72: 6.0, 73: 24.0, 74: 9.0, 75: 14.0, 76: 32.0, 77: 4.0, 78: 22.0, 79: 12.0, 80: 8.0, 81: 19.0, 82: 7.0, 83: 14.0, 84: 18.0, 85: 4.0, 86: 35.0, 87: 17.0, 88: 12.0, 89: 17.0, 90: 19.0, 91: 21.0, 92: 17.0, 93: 1

Training Progress:  40%|████      | 40/100 [06:52<10:44, 10.75s/it]

Avg reward: 14.82  std: 7.426143009665245  coeff : 0.5010892719072365
Avg reward per agent, per game:  {1: 15.0, 2: 11.0, 3: 28.0, 4: 34.0, 5: 30.0, 6: 19.0, 7: 22.0, 8: 22.0, 9: 8.0, 10: 25.0, 11: 25.0, 12: 20.0, 13: 11.0, 14: 26.0, 15: 21.0, 16: 20.0, 17: 19.0, 18: 15.0, 19: 38.0, 20: 17.0, 21: 7.0, 22: 17.0, 23: 16.0, 24: 8.0, 25: 9.0, 26: 8.0, 27: 21.0, 28: 18.0, 29: 23.0, 30: 11.0, 31: 20.0, 32: 14.0, 33: 32.0, 34: 9.0, 35: 9.0, 36: 3.0, 37: 13.0, 38: 16.0, 39: 12.0, 40: 15.0, 41: 16.0, 42: 22.0, 43: 7.0, 44: 14.0, 45: 8.0, 46: 11.0, 47: 18.0, 48: 13.0, 49: 15.0, 50: 12.0, 51: 19.0, 52: 9.0, 53: 20.0, 54: 4.0, 55: 10.0, 56: 4.0, 57: 2.0, 58: 10.0, 59: 13.0, 60: 10.0, 61: 13.0, 62: 10.0, 63: 22.0, 64: 6.0, 65: 7.0, 66: 12.0, 67: 19.0, 68: 5.0, 69: 22.0, 70: 4.0, 71: 10.0, 72: 8.0, 73: 19.0, 74: 4.0, 75: 11.0, 76: 22.0, 77: 9.0, 78: 19.0, 79: 11.0, 80: 19.0, 81: 9.0, 82: 7.0, 83: 11.0, 84: 14.0, 85: 9.0, 86: 27.0, 87: 21.0, 88: 4.0, 89: 26.0, 90: 14.0, 91: 12.0, 92: 13.0, 93: 20.0, 

Training Progress:  41%|████      | 41/100 [07:02<10:27, 10.63s/it]

Avg reward: 16.5  std: 9.07028114227999  coeff : 0.5497140086230297
Avg reward per agent, per game:  {1: 31.0, 2: 8.0, 3: 26.0, 4: 18.0, 5: 44.0, 6: 16.0, 7: 14.0, 8: 28.0, 9: 6.0, 10: 8.0, 11: 18.0, 12: 8.0, 13: 9.0, 14: 9.0, 15: 32.0, 16: 16.0, 17: 16.0, 18: 8.0, 19: 42.0, 20: 29.0, 21: 6.0, 22: 27.0, 23: 12.0, 24: 12.0, 25: 8.0, 26: 10.0, 27: 21.0, 28: 25.0, 29: 30.0, 30: 8.0, 31: 11.0, 32: 12.0, 33: 9.0, 34: 8.0, 35: 14.0, 36: 23.0, 37: 42.0, 38: 14.0, 39: 15.0, 40: 32.0, 41: 23.0, 42: 39.0, 43: 6.0, 44: 29.0, 45: 14.0, 46: 25.0, 47: 21.0, 48: 13.0, 49: 14.0, 50: 7.0, 51: 17.0, 52: 15.0, 53: 14.0, 54: 11.0, 55: 13.0, 56: 8.0, 57: 8.0, 58: 14.0, 59: 12.0, 60: 22.0, 61: 12.0, 62: 12.0, 63: 26.0, 64: 11.0, 65: 2.0, 66: 33.0, 67: 6.0, 68: 11.0, 69: 15.0, 70: 9.0, 71: 24.0, 72: 5.0, 73: 10.0, 74: 17.0, 75: 17.0, 76: 18.0, 77: 13.0, 78: 19.0, 79: 15.0, 80: 16.0, 81: 1.0, 82: 8.0, 83: 11.0, 84: 20.0, 85: 11.0, 86: 29.0, 87: 9.0, 88: 24.0, 89: 19.0, 90: 19.0, 91: 27.0, 92: 20.0, 93: 21.0, 

Training Progress:  42%|████▏     | 42/100 [07:13<10:13, 10.58s/it]

Avg reward: 16.85  std: 8.71249103299395  coeff : 0.5170617823735282
Avg reward per agent, per game:  {1: 25.0, 2: 6.0, 3: 32.0, 4: 34.0, 5: 31.0, 6: 24.0, 7: 22.0, 8: 23.0, 9: 20.0, 10: 12.0, 11: 13.0, 12: 19.0, 13: 19.0, 14: 27.0, 15: 8.0, 16: 14.0, 17: 26.0, 18: 6.0, 19: 47.0, 20: 26.0, 21: 10.0, 22: 21.0, 23: 11.0, 24: 12.0, 25: 16.0, 26: 10.0, 27: 10.0, 28: 14.0, 29: 18.0, 30: 14.0, 31: 12.0, 32: 12.0, 33: 5.0, 34: 17.0, 35: 14.0, 36: 10.0, 37: 36.0, 38: 31.0, 39: 17.0, 40: 32.0, 41: 20.0, 42: 22.0, 43: 14.0, 44: 10.0, 45: 20.0, 46: 34.0, 47: 16.0, 48: 12.0, 49: 20.0, 50: 11.0, 51: 32.0, 52: 7.0, 53: 20.0, 54: 12.0, 55: 10.0, 56: 8.0, 57: 11.0, 58: 17.0, 59: 28.0, 60: 9.0, 61: 8.0, 62: 12.0, 63: 37.0, 64: 18.0, 65: 14.0, 66: 36.0, 67: 12.0, 68: 3.0, 69: 18.0, 70: 21.0, 71: 21.0, 72: 7.0, 73: 10.0, 74: 18.0, 75: 7.0, 76: 24.0, 77: 9.0, 78: 24.0, 79: 8.0, 80: 11.0, 81: 24.0, 82: 15.0, 83: 18.0, 84: 16.0, 85: 7.0, 86: 17.0, 87: 15.0, 88: 6.0, 89: 24.0, 90: 4.0, 91: 22.0, 92: 15.0, 93

Training Progress:  43%|████▎     | 43/100 [07:24<10:08, 10.68s/it]

Avg reward: 16.47  std: 7.827458080373219  coeff : 0.4752554997190783
Avg reward per agent, per game:  {1: 23.0, 2: 5.0, 3: 25.0, 4: 33.0, 5: 30.0, 6: 15.0, 7: 17.0, 8: 42.0, 9: 14.0, 10: 24.0, 11: 10.0, 12: 9.0, 13: 22.0, 14: 19.0, 15: 15.0, 16: 15.0, 17: 20.0, 18: 20.0, 19: 33.0, 20: 14.0, 21: 6.0, 22: 22.0, 23: 28.0, 24: 13.0, 25: 9.0, 26: 9.0, 27: 10.0, 28: 19.0, 29: 20.0, 30: 6.0, 31: 15.0, 32: 25.0, 33: 14.0, 34: 10.0, 35: 7.0, 36: 20.0, 37: 20.0, 38: 16.0, 39: 21.0, 40: 26.0, 41: 23.0, 42: 12.0, 43: 16.0, 44: 28.0, 45: 18.0, 46: 17.0, 47: 23.0, 48: 12.0, 49: 16.0, 50: 5.0, 51: 34.0, 52: 11.0, 53: 22.0, 54: 15.0, 55: 8.0, 56: 6.0, 57: 2.0, 58: 9.0, 59: 14.0, 60: 15.0, 61: 12.0, 62: 26.0, 63: 25.0, 64: 4.0, 65: 1.0, 66: 31.0, 67: 9.0, 68: 14.0, 69: 14.0, 70: 5.0, 71: 18.0, 72: 11.0, 73: 14.0, 74: 19.0, 75: 21.0, 76: 18.0, 77: 17.0, 78: 20.0, 79: 10.0, 80: 6.0, 81: 9.0, 82: 11.0, 83: 13.0, 84: 23.0, 85: 15.0, 86: 23.0, 87: 21.0, 88: 17.0, 89: 35.0, 90: 11.0, 91: 12.0, 92: 13.0, 93:

Training Progress:  44%|████▍     | 44/100 [07:34<09:56, 10.65s/it]

Avg reward: 15.88  std: 7.906048317585721  coeff : 0.4978619847346171
Avg reward per agent, per game:  {1: 12.0, 2: 5.0, 3: 25.0, 4: 22.0, 5: 32.0, 6: 18.0, 7: 13.0, 8: 20.0, 9: 15.0, 10: 8.0, 11: 6.0, 12: 16.0, 13: 16.0, 14: 20.0, 15: 25.0, 16: 17.0, 17: 16.0, 18: 17.0, 19: 43.0, 20: 26.0, 21: 13.0, 22: 29.0, 23: 20.0, 24: 16.0, 25: 10.0, 26: 8.0, 27: 21.0, 28: 12.0, 29: 27.0, 30: 7.0, 31: 9.0, 32: 13.0, 33: 11.0, 34: 12.0, 35: 10.0, 36: 9.0, 37: 7.0, 38: 18.0, 39: 26.0, 40: 23.0, 41: 13.0, 42: 26.0, 43: 18.0, 44: 31.0, 45: 22.0, 46: 24.0, 47: 22.0, 48: 20.0, 49: 8.0, 50: 12.0, 51: 21.0, 52: 1.0, 53: 35.0, 54: 19.0, 55: 7.0, 56: 10.0, 57: 10.0, 58: 15.0, 59: 16.0, 60: 6.0, 61: 4.0, 62: 8.0, 63: 30.0, 64: 8.0, 65: 10.0, 66: 22.0, 67: 10.0, 68: 12.0, 69: 32.0, 70: 4.0, 71: 14.0, 72: 4.0, 73: 28.0, 74: 17.0, 75: 15.0, 76: 15.0, 77: 7.0, 78: 31.0, 79: 11.0, 80: 12.0, 81: 16.0, 82: 10.0, 83: 16.0, 84: 15.0, 85: 12.0, 86: 16.0, 87: 24.0, 88: 15.0, 89: 15.0, 90: 19.0, 91: 12.0, 92: 15.0, 93:

Training Progress:  45%|████▌     | 45/100 [07:45<09:48, 10.70s/it]

Avg reward: 16.26  std: 8.208069200488017  coeff : 0.5048013038430514
Avg reward per agent, per game:  {1: 19.0, 2: 6.0, 3: 25.0, 4: 17.0, 5: 41.0, 6: 34.0, 7: 22.0, 8: 22.0, 9: 13.0, 10: 10.0, 11: 15.0, 12: 10.0, 13: 9.0, 14: 26.0, 15: 18.0, 16: 20.0, 17: 12.0, 18: 21.0, 19: 42.0, 20: 23.0, 21: 7.0, 22: 24.0, 23: 11.0, 24: 8.0, 25: 21.0, 26: 9.0, 27: 34.0, 28: 26.0, 29: 11.0, 30: 5.0, 31: 28.0, 32: 13.0, 33: 22.0, 34: 19.0, 35: 17.0, 36: 11.0, 37: 28.0, 38: 8.0, 39: 18.0, 40: 31.0, 41: 16.0, 42: 26.0, 43: 18.0, 44: 24.0, 45: 12.0, 46: 29.0, 47: 21.0, 48: 18.0, 49: 10.0, 50: 13.0, 51: 13.0, 52: 5.0, 53: 23.0, 54: 11.0, 55: 1.0, 56: 10.0, 57: 2.0, 58: 25.0, 59: 18.0, 60: 19.0, 61: 7.0, 62: 24.0, 63: 23.0, 64: 13.0, 65: 3.0, 66: 17.0, 67: 11.0, 68: 10.0, 69: 3.0, 70: 12.0, 71: 14.0, 72: 10.0, 73: 12.0, 74: 13.0, 75: 23.0, 76: 26.0, 77: 17.0, 78: 19.0, 79: 8.0, 80: 6.0, 81: 11.0, 82: 18.0, 83: 11.0, 84: 9.0, 85: 14.0, 86: 27.0, 87: 24.0, 88: 12.0, 89: 22.0, 90: 15.0, 91: 15.0, 92: 12.0, 9

Training Progress:  46%|████▌     | 46/100 [07:56<09:46, 10.87s/it]

Avg reward: 16.3  std: 8.738993077008358  coeff : 0.5361345446017397
Avg reward per agent, per game:  {1: 8.0, 2: 6.0, 3: 28.0, 4: 27.0, 5: 25.0, 6: 13.0, 7: 12.0, 8: 43.0, 9: 4.0, 10: 14.0, 11: 27.0, 12: 15.0, 13: 7.0, 14: 14.0, 15: 24.0, 16: 9.0, 17: 13.0, 18: 9.0, 19: 43.0, 20: 36.0, 21: 5.0, 22: 14.0, 23: 30.0, 24: 16.0, 25: 14.0, 26: 10.0, 27: 29.0, 28: 14.0, 29: 17.0, 30: 12.0, 31: 12.0, 32: 17.0, 33: 14.0, 34: 7.0, 35: 22.0, 36: 5.0, 37: 15.0, 38: 15.0, 39: 30.0, 40: 16.0, 41: 24.0, 42: 21.0, 43: 12.0, 44: 11.0, 45: 10.0, 46: 19.0, 47: 21.0, 48: 9.0, 49: 9.0, 50: 11.0, 51: 37.0, 52: 25.0, 53: 25.0, 54: 11.0, 55: 9.0, 56: 6.0, 57: 2.0, 58: 15.0, 59: 27.0, 60: 6.0, 61: 6.0, 62: 13.0, 63: 30.0, 64: 7.0, 65: 23.0, 66: 28.0, 67: 9.0, 68: 6.0, 69: 22.0, 70: 10.0, 71: 26.0, 72: 7.0, 73: 12.0, 74: 14.0, 75: 19.0, 76: 37.0, 77: 17.0, 78: 16.0, 79: 4.0, 80: 15.0, 81: 25.0, 82: 10.0, 83: 13.0, 84: 12.0, 85: 21.0, 86: 29.0, 87: 14.0, 88: 12.0, 89: 19.0, 90: 17.0, 91: 15.0, 92: 18.0, 93: 22.

Training Progress:  47%|████▋     | 47/100 [08:07<09:39, 10.93s/it]

Avg reward: 15.97  std: 7.254591649431414  coeff : 0.45426372256928077
Avg reward per agent, per game:  {1: 21.0, 2: 9.0, 3: 20.0, 4: 16.0, 5: 25.0, 6: 15.0, 7: 20.0, 8: 35.0, 9: 18.0, 10: 6.0, 11: 16.0, 12: 10.0, 13: 35.0, 14: 22.0, 15: 25.0, 16: 19.0, 17: 11.0, 18: 9.0, 19: 28.0, 20: 27.0, 21: 9.0, 22: 21.0, 23: 18.0, 24: 11.0, 25: 27.0, 26: 12.0, 27: 26.0, 28: 18.0, 29: 14.0, 30: 10.0, 31: 16.0, 32: 9.0, 33: 13.0, 34: 22.0, 35: 8.0, 36: 11.0, 37: 29.0, 38: 30.0, 39: 20.0, 40: 13.0, 41: 20.0, 42: 33.0, 43: 11.0, 44: 22.0, 45: 21.0, 46: 23.0, 47: 16.0, 48: 8.0, 49: 21.0, 50: 16.0, 51: 23.0, 52: 13.0, 53: 16.0, 54: 8.0, 55: 8.0, 56: 8.0, 57: 3.0, 58: 17.0, 59: 7.0, 60: 8.0, 61: 12.0, 62: 10.0, 63: 22.0, 64: 9.0, 65: 18.0, 66: 29.0, 67: 3.0, 68: 16.0, 69: 13.0, 70: 26.0, 71: 9.0, 72: 6.0, 73: 21.0, 74: 16.0, 75: 8.0, 76: 22.0, 77: 6.0, 78: 13.0, 79: 10.0, 80: 22.0, 81: 6.0, 82: 4.0, 83: 11.0, 84: 19.0, 85: 13.0, 86: 20.0, 87: 19.0, 88: 19.0, 89: 21.0, 90: 17.0, 91: 13.0, 92: 6.0, 93: 22

Training Progress:  48%|████▊     | 48/100 [08:19<09:41, 11.18s/it]

Avg reward: 15.74  std: 8.61001742158516  coeff : 0.5470150839634791
Avg reward per agent, per game:  {1: 13.0, 2: 5.0, 3: 32.0, 4: 19.0, 5: 34.0, 6: 31.0, 7: 10.0, 8: 10.0, 9: 19.0, 10: 13.0, 11: 7.0, 12: 6.0, 13: 26.0, 14: 14.0, 15: 18.0, 16: 11.0, 17: 23.0, 18: 8.0, 19: 35.0, 20: 17.0, 21: 9.0, 22: 22.0, 23: 11.0, 24: 13.0, 25: 11.0, 26: 13.0, 27: 29.0, 28: 20.0, 29: 12.0, 30: 2.0, 31: 14.0, 32: 17.0, 33: 13.0, 34: 7.0, 35: 8.0, 36: 19.0, 37: 22.0, 38: 14.0, 39: 30.0, 40: 17.0, 41: 22.0, 42: 35.0, 43: 11.0, 44: 13.0, 45: 22.0, 46: 16.0, 47: 8.0, 48: 16.0, 49: 4.0, 50: 8.0, 51: 34.0, 52: 4.0, 53: 21.0, 54: 9.0, 55: 14.0, 56: 17.0, 57: 1.0, 58: 27.0, 59: 11.0, 60: 17.0, 61: 8.0, 62: 10.0, 63: 22.0, 64: 10.0, 65: 7.0, 66: 43.0, 67: 12.0, 68: 10.0, 69: 26.0, 70: 15.0, 71: 24.0, 72: 8.0, 73: 15.0, 74: 7.0, 75: 8.0, 76: 20.0, 77: 10.0, 78: 25.0, 79: 7.0, 80: 9.0, 81: 14.0, 82: 6.0, 83: 15.0, 84: 10.0, 85: 11.0, 86: 18.0, 87: 35.0, 88: 11.0, 89: 22.0, 90: 8.0, 91: 13.0, 92: 16.0, 93: 28.0,

Training Progress:  49%|████▉     | 49/100 [08:30<09:27, 11.12s/it]

Avg reward: 16.52  std: 8.192044921751833  coeff : 0.49588649647408195
Avg reward per agent, per game:  {1: 23.0, 2: 8.0, 3: 23.0, 4: 28.0, 5: 33.0, 6: 19.0, 7: 6.0, 8: 28.0, 9: 6.0, 10: 8.0, 11: 17.0, 12: 19.0, 13: 8.0, 14: 19.0, 15: 19.0, 16: 22.0, 17: 8.0, 18: 12.0, 19: 31.0, 20: 26.0, 21: 10.0, 22: 16.0, 23: 22.0, 24: 13.0, 25: 7.0, 26: 14.0, 27: 32.0, 28: 16.0, 29: 25.0, 30: 12.0, 31: 30.0, 32: 14.0, 33: 16.0, 34: 15.0, 35: 7.0, 36: 15.0, 37: 9.0, 38: 17.0, 39: 25.0, 40: 14.0, 41: 21.0, 42: 18.0, 43: 13.0, 44: 22.0, 45: 14.0, 46: 13.0, 47: 26.0, 48: 9.0, 49: 9.0, 50: 10.0, 51: 28.0, 52: 5.0, 53: 20.0, 54: 6.0, 55: 23.0, 56: 6.0, 57: 6.0, 58: 14.0, 59: 22.0, 60: 4.0, 61: 13.0, 62: 25.0, 63: 44.0, 64: 5.0, 65: 12.0, 66: 25.0, 67: 11.0, 68: 9.0, 69: 26.0, 70: 12.0, 71: 17.0, 72: 20.0, 73: 20.0, 74: 10.0, 75: 16.0, 76: 24.0, 77: 13.0, 78: 18.0, 79: 8.0, 80: 13.0, 81: 14.0, 82: 8.0, 83: 21.0, 84: 16.0, 85: 10.0, 86: 24.0, 87: 21.0, 88: 7.0, 89: 24.0, 90: 11.0, 91: 6.0, 92: 12.0, 93: 41

Training Progress:  50%|█████     | 50/100 [08:41<09:17, 11.15s/it]

Avg reward: 16.59  std: 8.637239142225946  coeff : 0.5206292430515941
Avg reward per agent, per game:  {1: 15.0, 2: 4.0, 3: 21.0, 4: 15.0, 5: 46.0, 6: 21.0, 7: 13.0, 8: 33.0, 9: 19.0, 10: 11.0, 11: 6.0, 12: 16.0, 13: 25.0, 14: 14.0, 15: 15.0, 16: 23.0, 17: 15.0, 18: 14.0, 19: 27.0, 20: 29.0, 21: 8.0, 22: 21.0, 23: 28.0, 24: 11.0, 25: 9.0, 26: 11.0, 27: 18.0, 28: 9.0, 29: 23.0, 30: 6.0, 31: 24.0, 32: 12.0, 33: 17.0, 34: 10.0, 35: 13.0, 36: 13.0, 37: 18.0, 38: 11.0, 39: 31.0, 40: 20.0, 41: 18.0, 42: 34.0, 43: 14.0, 44: 21.0, 45: 17.0, 46: 16.0, 47: 12.0, 48: 11.0, 49: 20.0, 50: 23.0, 51: 30.0, 52: 10.0, 53: 21.0, 54: 9.0, 55: 2.0, 56: 20.0, 57: 1.0, 58: 17.0, 59: 15.0, 60: 7.0, 61: 9.0, 62: 14.0, 63: 46.0, 64: 6.0, 65: 3.0, 66: 19.0, 67: 24.0, 68: 3.0, 69: 22.0, 70: 18.0, 71: 30.0, 72: 13.0, 73: 28.0, 74: 19.0, 75: 15.0, 76: 16.0, 77: 3.0, 78: 28.0, 79: 11.0, 80: 7.0, 81: 13.0, 82: 25.0, 83: 18.0, 84: 17.0, 85: 10.0, 86: 20.0, 87: 31.0, 88: 16.0, 89: 28.0, 90: 17.0, 91: 10.0, 92: 14.0, 9

Training Progress:  51%|█████     | 51/100 [08:52<08:55, 10.93s/it]

Avg reward: 17.55  std: 8.854800957672621  coeff : 0.5045470631152491
Avg reward per agent, per game:  {1: 26.0, 2: 15.0, 3: 22.0, 4: 23.0, 5: 40.0, 6: 17.0, 7: 10.0, 8: 24.0, 9: 10.0, 10: 17.0, 11: 17.0, 12: 21.0, 13: 12.0, 14: 17.0, 15: 38.0, 16: 25.0, 17: 10.0, 18: 14.0, 19: 36.0, 20: 28.0, 21: 20.0, 22: 32.0, 23: 25.0, 24: 18.0, 25: 3.0, 26: 6.0, 27: 29.0, 28: 21.0, 29: 11.0, 30: 8.0, 31: 24.0, 32: 11.0, 33: 8.0, 34: 15.0, 35: 9.0, 36: 11.0, 37: 20.0, 38: 16.0, 39: 19.0, 40: 18.0, 41: 20.0, 42: 36.0, 43: 8.0, 44: 37.0, 45: 12.0, 46: 30.0, 47: 16.0, 48: 15.0, 49: 13.0, 50: 13.0, 51: 27.0, 52: 7.0, 53: 24.0, 54: 31.0, 55: 9.0, 56: 7.0, 57: 5.0, 58: 27.0, 59: 12.0, 60: 7.0, 61: 13.0, 62: 22.0, 63: 47.0, 64: 10.0, 65: 7.0, 66: 16.0, 67: 5.0, 68: 13.0, 69: 18.0, 70: 13.0, 71: 22.0, 72: 7.0, 73: 17.0, 74: 22.0, 75: 20.0, 76: 21.0, 77: 15.0, 78: 21.0, 79: 7.0, 80: 13.0, 81: 13.0, 82: 6.0, 83: 14.0, 84: 9.0, 85: 15.0, 86: 23.0, 87: 26.0, 88: 21.0, 89: 27.0, 90: 9.0, 91: 10.0, 92: 25.0, 93:

Training Progress:  52%|█████▏    | 52/100 [09:02<08:39, 10.82s/it]

Avg reward: 17.64  std: 8.856093947107833  coeff : 0.5020461421262944
Avg reward per agent, per game:  {1: 23.0, 2: 10.0, 3: 24.0, 4: 23.0, 5: 35.0, 6: 16.0, 7: 16.0, 8: 23.0, 9: 10.0, 10: 18.0, 11: 20.0, 12: 11.0, 13: 13.0, 14: 17.0, 15: 34.0, 16: 12.0, 17: 15.0, 18: 11.0, 19: 51.0, 20: 22.0, 21: 11.0, 22: 23.0, 23: 10.0, 24: 17.0, 25: 11.0, 26: 10.0, 27: 28.0, 28: 27.0, 29: 30.0, 30: 4.0, 31: 24.0, 32: 20.0, 33: 11.0, 34: 9.0, 35: 10.0, 36: 11.0, 37: 19.0, 38: 15.0, 39: 28.0, 40: 29.0, 41: 16.0, 42: 17.0, 43: 24.0, 44: 18.0, 45: 11.0, 46: 28.0, 47: 17.0, 48: 8.0, 49: 28.0, 50: 9.0, 51: 30.0, 52: 5.0, 53: 16.0, 54: 6.0, 55: 15.0, 56: 7.0, 57: 7.0, 58: 21.0, 59: 20.0, 60: 7.0, 61: 14.0, 62: 18.0, 63: 35.0, 64: 9.0, 65: 6.0, 66: 21.0, 67: 12.0, 68: 9.0, 69: 21.0, 70: 17.0, 71: 25.0, 72: 8.0, 73: 19.0, 74: 17.0, 75: 28.0, 76: 18.0, 77: 7.0, 78: 31.0, 79: 3.0, 80: 15.0, 81: 25.0, 82: 5.0, 83: 30.0, 84: 14.0, 85: 20.0, 86: 27.0, 87: 30.0, 88: 16.0, 89: 26.0, 90: 15.0, 91: 4.0, 92: 34.0, 93

Training Progress:  53%|█████▎    | 53/100 [09:14<08:35, 10.97s/it]

Avg reward: 17.26  std: 9.118793779881196  coeff : 0.528319454222549
Avg reward per agent, per game:  {1: 18.0, 2: 3.0, 3: 19.0, 4: 24.0, 5: 47.0, 6: 11.0, 7: 19.0, 8: 27.0, 9: 20.0, 10: 8.0, 11: 14.0, 12: 14.0, 13: 26.0, 14: 15.0, 15: 25.0, 16: 11.0, 17: 20.0, 18: 9.0, 19: 51.0, 20: 25.0, 21: 13.0, 22: 19.0, 23: 36.0, 24: 21.0, 25: 12.0, 26: 10.0, 27: 21.0, 28: 10.0, 29: 24.0, 30: 2.0, 31: 10.0, 32: 15.0, 33: 22.0, 34: 10.0, 35: 7.0, 36: 9.0, 37: 20.0, 38: 18.0, 39: 43.0, 40: 33.0, 41: 21.0, 42: 34.0, 43: 10.0, 44: 14.0, 45: 14.0, 46: 10.0, 47: 20.0, 48: 20.0, 49: 20.0, 50: 15.0, 51: 27.0, 52: 6.0, 53: 16.0, 54: 5.0, 55: 10.0, 56: 6.0, 57: 8.0, 58: 20.0, 59: 9.0, 60: 12.0, 61: 7.0, 62: 22.0, 63: 27.0, 64: 13.0, 65: 14.0, 66: 17.0, 67: 18.0, 68: 8.0, 69: 19.0, 70: 18.0, 71: 29.0, 72: 13.0, 73: 36.0, 74: 17.0, 75: 9.0, 76: 24.0, 77: 11.0, 78: 21.0, 79: 13.0, 80: 16.0, 81: 25.0, 82: 8.0, 83: 15.0, 84: 9.0, 85: 11.0, 86: 19.0, 87: 16.0, 88: 14.0, 89: 29.0, 90: 29.0, 91: 7.0, 92: 14.0, 93:

In [None]:
plt.plot(rewards)

# Optimizations

In [None]:
import cProfile

In [None]:
def stress_test():
    train_loop(custom_gym, model, games=1, optimization_passes = 1, seed=42)

In [None]:

cProfile.run('stress_test()', sort = 'time')

# Visualization

In [None]:
from models.base import RenderWrapper

renderer = RenderWrapper(model=model)

In [None]:
renderer.render()