In [1]:
import glob, os
import numpy as np
import math, random

from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import torch.nn.functional as F

from geometry.model import Model, combine_observations, get_mesh
from geometry.utils.visualisation import illustrate_points, illustrate_mesh, illustrate_voxels
from geometry.voxel_grid import VoxelGrid

from rl.environment import Environment, CombiningObservationsWrapper
from rl.environment import StepPenaltyRewardWrapper, DepthMapWrapper
from rl.environment import VoxelGridWrapper, VoxelWrapper
from rl.environment import FrameStackWrapper, ActionMaskWrapper
from rl.environment import MeshReconstructionWrapper
from rl.validation import validate
from rl.utils import build_epsilon_func, plot


from rl.dqn import CnnDQN, CnnDQNA, VoxelDQN
from rl.agent import DQNAgent
from rl.replay_buffer import DiskReplayBuffer, ReplayBuffer


# !conda install -c conda-forge pyembree
# !conda install -c conda-forge igl
# !pip install Cython
# !pip install gym

In [2]:
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')

experiment_save_path = "./models/abc-vdqn-occl-sgd/"

train_dataset_path = "./data/1kabc/simple/train/"
val_dataset_path = "./data/1kabc/simple/val/"
number_of_view_points = 100

num_stack = 4
reconstruction_depth = 7
grid_size = 64
raycast_resolution = 512


learning_rate = 0.001
weight_decay = 0.01
buffer_capacity = 100000
epsilon_decay = 10000
batch_size = 256
start_frame = 0
num_frames = 150000

log_interval = 100
save_interval = 500
val_interval = 1000
train_interval = 10
max_novp = 50

In [4]:
env = Environment(models_path=train_dataset_path,
#                   similarity_threshold=similarity_threshold,
                  image_size=raycast_resolution,
                  number_of_view_points=number_of_view_points)
# env = CombiningObservationsWrapper(env)
# env = StepPenaltyRewardWrapper(env, weight=1.0)
# env = DepthMapWrapper(env)

env = MeshReconstructionWrapper(env, reconstruction_depth=reconstruction_depth)
env = VoxelGridWrapper(env, occlusion_reward=True, grid_size=grid_size)
env = CombiningObservationsWrapper(env)
env = VoxelWrapper(env)
env = StepPenaltyRewardWrapper(env)
env = FrameStackWrapper(env, num_stack=num_stack, lz4_compress=False)
env = ActionMaskWrapper(env)

In [4]:
agent = DDQNAgent(env.observation_space.shape, env.action_space.n,
                  device=device, learning_rate=learning_rate, weight_decay=weight_decay)

replay_buffer = DiskReplayBuffer(capacity=buffer_capacity,
                                 overwrite=True,
                                 location="buffer_voxels/",
                                 num_actions=env.action_space.n,
                                 observation_dtype=env.observation_space.dtype,
                                 observation_shape=env.observation_space.shape)

epsilon_by_frame = build_epsilon_func(epsilon_decay=epsilon_decay)

### Training

1. + Validation
2. + Saver
3. + Save loss plot
4. Config saver
5. Starter

In [None]:
if not os.path.exists(experiment_save_path):
    os.makedirs(experiment_save_path)
else:
#     model = torch.load(save_model_path.format(start_frame)).to(device)    
    pass


losses, all_rewards, all_nofs = [], [], []
episode_reward = 0
nof_vp = 0
best_metric = 0

state, _, mask = env.reset()
for frame_idx in range(start_frame + 1, num_frames + 1):
    epsilon = epsilon_by_frame(frame_idx)
    action = agent.act(state, mask, epsilon)

    next_state, reward, done, _, mask = env.step(action)
    replay_buffer.push(state, action, reward, next_state, done, mask)

    state = next_state
    episode_reward += reward
    nof_vp += 1

    if done or nof_vp > max_novp:
        # if done: final_reward = env.final_reward()
        print("Frame: ", frame_idx, "Number of View Points: ", nof_vp)
        print()

        state, _, mask = env.reset()
        all_rewards.append(episode_reward)
        all_nofs.append(nof_vp)
        episode_reward = 0
        nof_vp = 0
        
    if frame_idx % train_interval == 0 and frame_idx > batch_size:
        batch = replay_buffer.sample(batch_size)
        loss = agent.compute_td_loss(*batch)
        losses.append(loss)
        
    if frame_idx % log_interval == 0:
        save_path = os.path.join(experiment_save_path, 'loss.png')
        plot(save_path, frame_idx, all_rewards, all_nofs, losses)
        
    if frame_idx % save_interval ==  0:
        for f in glob.glob(os.path.join(experiment_save_path, "last-*.pt")): os.remove(f)
        save_path = os.path.join(experiment_save_path,
                                 'last-{}.pt'.format(frame_idx))
        torch.save(agent.model, save_path)
        
    if frame_idx % val_interval == 0:
        reward, hausdorff, novp = validate(agent, models_path=val_dataset_path)
        print ("Validation metrics: ", reward, hausdorff, novp)
        if novp > best_metric:
            best_metric = novp
            save_path = os.path.join(experiment_save_path,
                                 'best-{}-{:.2f}.pt'.format(frame_idx, best_metric))
            torch.save(agent.model, save_path)


  0%|          | 0/72 [00:00<?, ?it/s]

Action:  10
0.21993170290764827 0.26628065842357207 0.4862123613312203
0.26628065842357207
Action:  35
0.24371013910510722 0.38094913189330804 0.6246592709984152
0.38094913189330804
Action:  88
0.2324360970220459 0.5756780075738336 0.8081141045958795
0.5756780075738336
Action:  97
0.18954954050118014 0.6546342946810703 0.8441838351822504
0.6546342946810703
Action:  61
0.1999447597047155 0.6804355889482163 0.8803803486529318
0.6804355889482163
Action:  78
0.2682418520564455 0.6172732034110664 0.8855150554675119
0.6172732034110664
Action:  98
0.19459649475217194 0.7039138063571784 0.8985103011093503
0.7039138063571784
Action:  59
0.10111484959574148 0.7987266717988702 0.8998415213946117
0.7987266717988702
Action:  99
0.20825591322251796 0.6923463054779575 0.9006022187004754
0.6923463054779575
Action:  46
0.06352633957716065 0.8415766715163416 0.9051030110935023
0.8415766715163416
Action:  22 (random)
0.27625169487269624 0.6565533764426761 0.9328050713153724
0.6565533764426761
Action:  95

  1%|▏         | 1/72 [00:11<13:03, 11.04s/it]

Action:  10
0.1321186440677966 -0.0537886241870809 0.07833001988071571
-0.0537886241870809
Action:  84
0.15728813559322033 0.0192526198739765 0.17654075546719683
0.0192526198739765
Action:  46
0.07254237288135593 0.11632442632341544 0.18886679920477137
0.11632442632341544
Action:  44
0.021271186440677967 0.17554789904639956 0.19681908548707752
0.17554789904639956
Action:  42
0.08483050847457627 0.13027883546180544 0.2151093439363817
0.13027883546180544
Action:  53
0.04135593220338983 0.18965798429760422 0.23101391650099404
0.18965798429760422
Action:  34
0.10516949152542372 0.15506907706304546 0.26023856858846917
0.15506907706304546
Action:  27
0.07677966101694915 0.2083098022037268 0.28508946322067596
0.2083098022037268
Action:  23
0.09686440677966102 0.2259984162819692 0.3228628230616302
0.2259984162819692
Action:  61
0.04347457627118644 0.30404033426559285 0.3475149105367793
0.30404033426559285
Action:  49
0.04169491525423729 0.31635677460659767 0.35805168986083497
0.316356774606597

  3%|▎         | 2/72 [00:15<10:32,  9.04s/it]

Action:  46
0.26332384619677435 0.06465580496601636 0.3279796511627907
0.06465580496601636
Action:  42
0.27473631341034654 0.3105834540315139 0.5853197674418604
0.3105834540315139
Action:  43
0.29532898041185335 0.4050343916811699 0.7003633720930232
0.4050343916811699
Action:  47
0.3117919526759306 0.44380397755662754 0.7555959302325581
0.44380397755662754
Action:  99
0.11716613650315308 0.7039094448921958 0.8210755813953489
0.7039094448921958
Action:  27
0.1237234220659635 0.7449539035154318 0.8686773255813953
0.7449539035154318
Action:  44
0.12059824767007088 0.7739511709345803 0.8945494186046512
0.7739511709345803
Action:  25
0.30855516490875606 0.598276230440081 0.9068313953488372
0.598276230440081
Action:  61
0.13281991182543668 0.7862934602675866 0.9191133720930232
0.7862934602675866
Action:  40
0.17141023494614654 0.7548252301701326 0.9262354651162791
0.7548252301701326
Action:  48
0.1607232546459066 0.7689279081447911 0.9296511627906977
0.7689279081447911
Action:  82
0.13622411

  4%|▍         | 3/72 [00:24<10:34,  9.20s/it]

Action:  77 (random)
0.2564600507292327 0.6304017600263834 0.8868618107556161
0.6304017600263834
Action:  43
0.0802155992390615 0.8135897104954517 0.8938053097345132
0.8135897104954517
Action:  47
0.045616677235256815 0.8539068081289365 0.8995234853641934
0.8539068081289365
Action:  49
0.029486366518706404 0.877252911901988 0.9067392784206944
0.877252911901988
Action:  54
0.06452124286620164 0.844804829291729 0.9093260721579306
0.844804829291729
Action:  11
0.3375475586556753 0.5798111887915677 0.917358747447243
0.5798111887915677
Action:  84
0.31103360811667724 0.6098649623394153 0.9208985704560926
0.6098649623394153
Action:  36 (random)
0.13700856055802155 0.7909696559157702 0.9279782164737918
0.7909696559157702
Action:  99
0.3798747622067216 0.5494649246550891 0.9293396868618108
0.5494649246550891
Action:  20
0.2688253012048193 0.6665048553642753 0.9353301565690946
0.6665048553642753
Action:  50
0.029327837666455296 0.9083168185622718 0.9376446562287271
0.9083168185622718
Action:  9

  6%|▌         | 4/72 [00:30<09:03,  7.99s/it]

Action:  50
0.08979621228487074 0.25246017897115003 0.34225639125602075
0.25246017897115003
Action:  47
0.1049902786779002 0.28256066611646813 0.3875509447943683
0.28256066611646813
Action:  49
0.08518758551162958 0.34098877610378353 0.4261763616154131
0.34098877610378353
Action:  53
0.0870598401382588 0.3996018864271358 0.4866617265653946
0.3996018864271358
Action:  0
0.19255418736948224 0.3170790101110661 0.5096331974805484
0.3170790101110661
Action:  10
0.19485850075610284 0.33570837586486785 0.5305668766209707
0.33570837586486785
Action:  35
0.13285806869734285 0.4260526389721644 0.5589107076695072
0.4260526389721644
Action:  95
0.19946712752934398 0.5640563997029642 0.7635235272323082
0.5640563997029642
Action:  51
0.09822135810470224 0.6829012799093771 0.7811226380140793
0.6829012799093771
Action:  92
0.22312234463887087 0.5785634649202251 0.801685809559096
0.5785634649202251
Action:  59
0.1266292215741341 0.6866349503413902 0.8132641719155242
0.6866349503413902
Action:  34
0.160

  7%|▋         | 5/72 [00:43<10:43,  9.60s/it]

Action:  10
0.2244235046236685 0.3505110258664175 0.574934530490086
0.3505110258664175
Action:  34
0.15673650942291936 0.5039443734801857 0.6606808829031051
0.5039443734801857
Action:  84
0.23267587498536813 0.5091123779139959 0.741788252899364
0.5091123779139959
Action:  18 (random)
0.19515977993679035 0.5872569802577476 0.782416760194538
0.5872569802577476
Action:  40 (random)
0.11526981154161302 0.7207945356338453 0.8360643471754583
0.7207945356338453
Action:  49
0.10400327753716493 0.7819675417669878 0.8859708193041527
0.7819675417669878
Action:  47
0.11594287720941121 0.7774353495021489 0.8933782267115601
0.7774353495021489
Action:  61
0.11544539388973428 0.7907274456164386 0.9061728395061729
0.7907274456164386
Action:  46
0.10359358539154864 0.8346780195467229 0.9382716049382716
0.8346780195467229
Action:  54
0.11828397518436147 0.8285921939140298 0.9468761690983913
0.8285921939140298
Action:  50
0.09759452183073862 0.85436956346668 0.9519640852974186
0.85436956346668


  8%|▊         | 6/72 [00:49<09:27,  8.61s/it]

Action:  46
0.0864998353638459 0.07778882442996854 0.16428865979381443
0.07778882442996854
Action:  49
0.07662166611787949 0.14259482872748133 0.21921649484536082
0.14259482872748133
Action:  50
0.08116562397102403 0.21631891211145018 0.2974845360824742
0.21631891211145018
Action:  84
0.20095488969377676 0.3238286154608625 0.5247835051546392
0.3238286154608625
Action:  34
0.13042476127757655 0.4946474036708771 0.6250721649484536
0.4946474036708771
Action:  47
0.09025353967731314 0.5644268726938209 0.6546804123711341
0.5644268726938209
Action:  10
0.19384260783668095 0.5668996602045562 0.7607422680412371
0.5668996602045562
Action:  44
0.060981231478432665 0.7077610365628044 0.7687422680412371
0.7077610365628044
Action:  53
0.07826802765887389 0.7270927970833941 0.805360824742268
0.7270927970833941
Action:  35
0.10846229832071123 0.7406923408545465 0.8491546391752577
0.7406923408545465
Action:  48
0.05851168916694106 0.806148104647492 0.864659793814433
0.806148104647492
Action:  99
0.234

 10%|▉         | 7/72 [00:57<08:54,  8.22s/it]

Action:  46
0.11770361715593204 0.11202051260963745 0.2297241297655695
0.11202051260963745
Action:  53
0.129411907891534 0.27776308618850387 0.4071749940800379
0.27776308618850387
Action:  51
0.12688038557032277 0.291986533681394 0.41886691925171676
0.291986533681394
Action:  89 (random)
0.16016746993817244 0.3640451751008993 0.5242126450390717
0.3640451751008993
Action:  48
0.11229979066257728 0.4195792053118485 0.5318789959744258
0.4195792053118485
Action:  99
0.10956136507472859 0.4295695844871942 0.5391309495619228
0.4295695844871942
Action:  54
0.13019083783652208 0.4089993113465231 0.5391901491830452
0.4089993113465231
Action:  47
0.13503480843191665 0.419961639590816 0.5549964480227326
0.419961639590816
Action:  27
0.12101406942213135 0.4741493215321666 0.5951633909542979
0.4741493215321666
Action:  25
0.14646317121853852 0.48786076910824333 0.6343239403267819
0.48786076910824333
Action:  49
0.12863297794654593 0.5058389614330421 0.634471939379588
0.5058389614330421
Action:  82


 11%|█         | 8/72 [01:19<13:22, 12.53s/it]

Action:  84
0.04735990586851008 0.3981288159360012 0.44548872180451127
0.3981288159360012
Action:  46
0.027430504485953817 0.4725694955140462 0.5
0.4725694955140462
Action:  91
0.1710545668480659 0.7236822752571973 0.8947368421052632
0.7236822752571973
Action:  44
0.26856890719223414 0.6280476341611493 0.8966165413533834
0.6280476341611493
Action:  10
0.18818943962347404 0.7091789814291576 0.8973684210526316
0.7091789814291576
Action:  99
0.04883071039858803 0.8496655302029158 0.8984962406015038
0.8496655302029158
Action:  47
0.1863509339608766 0.7249272615278453 0.9112781954887218
0.7249272615278453
Action:  50
0.08155611119282248 0.8304739639951475 0.91203007518797
0.8304739639951475
Action:  34 (random)
0.12634210913369615 0.7879436051520181 0.9142857142857143
0.7879436051520181
Action:  49
0.2364318282100309 0.6789817056245555 0.9154135338345865
0.6789817056245555
Action:  13
0.12229739667598176 0.7934920770082288 0.9157894736842105
0.7934920770082288
Action:  54
0.1268568907192234

 12%|█▎        | 9/72 [01:28<12:06, 11.54s/it]

Action:  53
0.2012414433228913 0.1831183106189881 0.3843597539418794
0.1831183106189881
Action:  43 (random)
0.20347488107669104 0.31741884726948727 0.5208937283461783
0.31741884726948727
Action:  48
0.20245968209769113 0.3352621590958321 0.5377218411935233
0.3352621590958321
Action:  46
0.20063232393549135 0.4392602024026264 0.6398925263381178
0.4392602024026264
Action:  49
0.19578837452140618 0.5314972084525031 0.7272855829739093
0.5314972084525031
Action:  54
0.20231465367211973 0.5439909392006795 0.7463055928727993
0.5439909392006795
Action:  50
0.1988919828286344 0.5641710165350083 0.7630629993636427
0.5641710165350083
Action:  47
0.20164752291449123 0.5966979483433748 0.798345471257866
0.5966979483433748
Action:  51
0.1985729202923773 0.6042270514250802 0.8027999717174574
0.6042270514250802
Action:  0
0.1081041884209305 0.7779878712552344 0.8860920596761649
0.7779878712552344
Action:  99
0.10804617705070194 0.8503077789699443 0.9583539560206462
0.8503077789699443


 14%|█▍        | 10/72 [01:36<10:37, 10.28s/it]

Action:  84
0.23812363347247067 0.17375532148022654 0.4118789549526972
0.17375532148022654
Action:  99
0.1857483601669648 0.24611681118821346 0.43186517135517827
0.24611681118821346
Action:  44
0.10542138739813159 0.46634729877441405 0.5717686861725456
0.46634729877441405
Action:  48
0.09632776783939574 0.5990547269917551 0.6953824948311509
0.5990547269917551
Action:  54
0.15568475452196381 0.6239029906067874 0.7795877451287513
0.6239029906067874
Action:  61 (random)
0.13150964023057046 0.6664353506847858 0.7979449909153562
0.6664353506847858
Action:  47
0.1397088054064798 0.6796634143792479 0.8193722197857277
0.6796634143792479
Action:  27
0.16385907374279468 0.7810065361813956 0.9448656099241902
0.7810065361813956
Action:  92
0.23899324190021864 0.7071880750598716 0.9461813169600902
0.7071880750598716
Action:  82
0.19481713377062215 0.7533064173853204 0.9481235511559426
0.7533064173853204
Action:  25
0.21603557940767243 0.7589659869102274 0.9750015663178999
0.7589659869102274


 15%|█▌        | 11/72 [01:45<10:15, 10.09s/it]

Action:  93
0.2581990656878635 0.16578866437348622 0.4239877300613497
0.16578866437348622
Action:  35
0.21763275812756222 0.43518932776200836 0.6528220858895706
0.43518932776200836
Action:  12
0.27626561159309754 0.4417098485296019 0.7179754601226994
0.4417098485296019
Action:  91
0.26728000762703785 0.5145604831705082 0.781840490797546
0.5145604831705082
Action:  53
0.16448183811612166 0.7124506772213017 0.8769325153374233
0.7124506772213017
Action:  63
0.19382209934216799 0.7436625632345192 0.9374846625766872
0.7436625632345192
Action:  49
0.14829821717990274 0.815689512881447 0.9639877300613497
0.815689512881447


 17%|█▋        | 12/72 [01:55<10:04, 10.08s/it]

Action:  46
0.14124156381102937 0.2632215298376451 0.40446309364867444
0.2632215298376451
Action:  63
0.15808354107132 0.37657219038013906 0.5346557314514591
0.37657219038013906
Action:  50
0.15426825560892565 0.4458843287893196 0.6001525843982453
0.4458843287893196
Action:  51
0.17256946555602845 0.5107797619854554 0.6833492275414839
0.5107797619854554
Action:  48
0.14788411260412232 0.5620148002320402 0.7098989128361625
0.5620148002320402
Action:  54
0.17098863014531526 0.5977315682144024 0.7687201983597177
0.5977315682144024
Action:  25
0.17373989177357574 0.639344220375957 0.8130841121495327
0.639344220375957
Action:  44
0.14384082203441356 0.6902617909734063 0.83410261300782
0.6902617909734063
Action:  49
0.17132303763604304 0.6639239583586165 0.8352469959946596
0.6639239583586165
Action:  47
0.18026083784276767 0.6615854333760002 0.8418462712187679
0.6615854333760002
Action:  80
0.17697756429744027 0.7207145966790999 0.8976921609765401
0.7207145966790999
Action:  84
0.18416732534

 18%|█▊        | 13/72 [02:16<12:53, 13.11s/it]

Action:  45
0.22252891692954785 0.3514903280741531 0.5740192450037009
0.3514903280741531
Action:  46
0.1787779780681989 0.4682890093485294 0.6470669874167283
0.4682890093485294
Action:  51
0.20703770467177407 0.4869389792660498 0.6939766839378239
0.4869389792660498
Action:  48
0.18236442842121076 0.5222062599577677 0.7045706883789785
0.5222062599577677
Action:  50
0.18399804716839419 0.5566107611217612 0.7406088082901554
0.5566107611217612
Action:  44
0.17628060687997596 0.6522260548520744 0.8285066617320503
0.6522260548520744
Action:  54
0.20271894246657654 0.6270830560530386 0.8298019985196151
0.6270830560530386
Action:  55
0.19676656151419558 0.6572767397441316 0.8540433012583272
0.6572767397441316
Action:  47
0.2191677933002854 0.6433414591053401 0.8625092524056255
0.6433414591053401
Action:  99
0.17573606729758148 0.7230426151598575 0.898778682457439
0.7230426151598575
Action:  27
0.20546041760552802 0.7510532759547977 0.9565136935603257
0.7510532759547977


 19%|█▉        | 14/72 [02:28<12:23, 12.82s/it]

Action:  50
0.13961379480535505 0.3102324910314934 0.4498462858368485
0.3102324910314934
Action:  51 (random)
0.14757233635634523 0.3802034554503324 0.5277757918066777
0.3802034554503324
Action:  48
0.1473872539946943 0.411989309957547 0.5593765639522413
0.411989309957547
Action:  49
0.14346967733974952 0.45644452870872404 0.5999142060484736
0.45644452870872404
Action:  47
0.15139737183046456 0.48240544507094396 0.6338028169014085
0.48240544507094396
Action:  46
0.15469800727990624 0.4851818811879568 0.639879888467863
0.4851818811879568
Action:  99
0.09303473378986983 0.5853809379052757 0.6784156716951455
0.5853809379052757
Action:  40
0.1922388796347708 0.531862071248192 0.7241009508829628
0.531862071248192
Action:  0
0.09303473378986983 0.654516563843647 0.7475512976335168
0.654516563843647
Action:  74
0.2684002714541304 0.5634435835540914 0.8318438550082219
0.5634435835540914
Action:  42
0.1779566907273737 0.6697590453132354 0.8477157360406091
0.6697590453132354
Action:  44
0.172435

 21%|██        | 15/72 [02:43<12:47, 13.47s/it]

Action:  50
0.06978527607361963 0.26200652661045726 0.3317918026840769
0.26200652661045726
Action:  84
0.27223926380368096 0.09083654323295304 0.363075807036634
0.09083654323295304
Action:  34
0.15800880234729262 0.38007607251668996 0.5380848748639826
0.38007607251668996
Action:  35
0.15164043744998668 0.4937168349475469 0.6453572723975336
0.4937168349475469
Action:  54
0.0944251800480128 0.5822160967020779 0.6766412767500907
0.5822160967020779
Action:  99
0.2713723659642571 0.4121604595707447 0.6835328255350018
0.4121604595707447
Action:  49
0.07038543611629768 0.6301949048340106 0.7005803409503083
0.6301949048340106
Action:  47
0.08172179247799413 0.6332763939565361 0.7149981864345303
0.6332763939565361
Action:  48
0.0767204587890104 0.6558693127017403 0.7325897714907508
0.6558693127017403
Action:  61
0.13553614297145905 0.6102019781747143 0.7457381211461733
0.6102019781747143
Action:  13
0.2848759669245132 0.5098284219039235 0.7947043888284367
0.5098284219039235
Action:  46
0.087489

 22%|██▏       | 16/72 [03:00<13:41, 14.66s/it]

Action:  49
0.028310986784990836 0.041127442549497406 0.06943842933448824
0.041127442549497406
Action:  53
0.0568148934117874 0.06430536066645715 0.12112025407824455
0.06430536066645715
Action:  50
0.024983119513842 0.10479889290134495 0.12978201241518694
0.10479889290134495
Action:  84
0.26275682453940385 0.21566096093771464 0.4784177854771185
0.21566096093771464
Action:  44
0.0745152888974631 0.4610701016034752 0.5355853905009383
0.4610701016034752
Action:  47
0.0478923507282724 0.5180091939519644 0.5659015446802368
0.5180091939519644
Action:  10
0.29304523970290347 0.531698516612962 0.8247437563158655
0.531698516612962
Action:  54
0.0640976174399537 0.7648326554054339 0.8289302728453876
0.7648326554054339
Action:  25
0.19721230828590722 0.6389360965069324 0.8361484047928396
0.6389360965069324
Action:  48
0.03728175942895727 0.8004546343923218 0.8377363938212791
0.8004546343923218
Action:  24
0.2098485579241825 0.6339510667329562 0.8437996246571388
0.6339510667329562
Action:  42
0.08

 24%|██▎       | 17/72 [03:15<13:25, 14.64s/it]

Action:  54
0.03771195771856419 0.37488555745046215 0.4125975151690263
0.37488555745046215
Action:  44
0.03694120237833076 0.45294611342634994 0.4898873158046807
0.45294611342634994
Action:  46
0.03363796520590178 0.47907512349678527 0.5127130887026871
0.47907512349678527
Action:  50
0.012442193349482492 0.5014266306898125 0.513868824039295
0.5014266306898125
Action:  47
0.025765249944946046 0.5129518838314192 0.5387171337763652
0.5129518838314192
Action:  23
0.16593261396168246 0.6315247682977801 0.7974573822594626
0.6315247682977801
Action:  48
0.013873596124201719 0.7886401282329205 0.8025137243571222
0.7886401282329205
Action:  49
0.012882624972473023 0.7907868347212571 0.8036694596937302
0.7907868347212571
Action:  84
0.21773838361594364 0.6279709489468994 0.8457093325628431
0.6279709489468994
Action:  91
0.25478969390002204 0.6162013491511192 0.8709910430511413
0.6162013491511192
Action:  99
0.2931072451001982 0.580484202458311 0.8735914475585091
0.580484202458311
Action:  57
0.0

 25%|██▌       | 18/72 [03:21<10:48, 12.01s/it]

Action:  50
0.13045052160463355 0.2747813349718188 0.40523185657645233
0.2747813349718188
Action:  51
0.13773982295060389 0.29912451078495894 0.4368643337355628
0.29912451078495894
Action:  49
0.11119886793694672 0.37966477626232925 0.490863644199276
0.37966477626232925
Action:  47
0.13551847829664002 0.3849521302191331 0.5204706085157731
0.3849521302191331
Action:  99
0.07113239214137625 0.46347371025476236 0.5346061023961386
0.46347371025476236
Action:  44
0.04965939381972554 0.5092959587057011 0.5589553525254266
0.5092959587057011
Action:  27
0.07156020666732485 0.545143809881546 0.6167040165488709
0.545143809881546
Action:  40
0.08113667028663574 0.5641400061484617 0.6452766764350975
0.5641400061484617
Action:  61
0.06675552045282522 0.5917085374682229 0.6584640579210481
0.5917085374682229
Action:  82
0.08256820350807911 0.5822740650663046 0.6648422685743838
0.5822740650663046
Action:  20 (random)
0.1551485832757429 0.5374475208442364 0.6925961041199793
0.5374475208442364
Action:  

 26%|██▋       | 19/72 [03:40<12:26, 14.08s/it]

Action:  53
0.10221341885178897 0.28223451923020404 0.384447938081993
0.28223451923020404
Action:  46
0.12643840784757593 0.2672004844005798 0.39363889224815574
0.2672004844005798
Action:  49
0.11364208011067094 0.3169218736120686 0.43056395372273953
0.3169218736120686
Action:  84
0.15064767653901778 0.36956839150629206 0.5202160680453098
0.36956839150629206
Action:  44
0.055288310381688986 0.4821003299214513 0.5373886403031403
0.4821003299214513
Action:  99
0.07984342576872289 0.48612585709498496 0.5659692828637078
0.48612585709498496
Action:  27
0.08389926428975665 0.5446330048278311 0.6285322691175877
0.5446330048278311
Action:  61
0.06995535433565994 0.574257972547881 0.6442133268835409
0.574257972547881
Action:  25
0.15448343079922028 0.5093211404911413 0.6638045712903616
0.5093211404911413
Action:  10
0.0879865434194806 0.5932324673436105 0.6812190107630911
0.5932324673436105
Action:  80
0.15354021253851474 0.5363457067584578 0.6898859192969726
0.5363457067584578
Action:  78
0.09

 28%|██▊       | 20/72 [04:00<13:57, 16.11s/it]

Action:  46
0.12673857507805847 0.15492134395028162 0.2816599190283401
0.15492134395028162
Action:  45
0.13375595294414483 0.3489161118331831 0.48267206477732794
0.3489161118331831
Action:  48
0.0703472419339578 0.45216287952353207 0.5225101214574899
0.45216287952353207
Action:  44
0.05612325354022771 0.4799496209536994 0.5360728744939272
0.4799496209536994
Action:  54
0.14168795534109188 0.4113079960759122 0.5529959514170041
0.4113079960759122
Action:  27
0.08335698741602801 0.5295579923410569 0.612914979757085
0.5295579923410569
Action:  61
0.07036301132242091 0.5615398226856763 0.6319028340080972
0.5615398226856763
Action:  99
0.08014003216955247 0.5659328423243747 0.6460728744939271
0.5659328423243747
Action:  80
0.15405115589617435 0.5008476295289268 0.6548987854251012
0.5008476295289268
Action:  42
0.13542750812123505 0.52748747163585 0.662914979757085
0.52748747163585
Action:  82
0.08897088970889709 0.5787619038133701 0.6677327935222672
0.5787619038133701
Action:  65
0.085091620

 29%|██▉       | 21/72 [04:20<14:39, 17.24s/it]

Action:  0
0.21915644252540678 0.3954365116901889 0.6145929542155957
0.3954365116901889
Action:  99
0.17405613185192986 0.5140293675543242 0.6880854994062541
0.5140293675543242
Action:  61
0.18553525354115108 0.5576366688760543 0.7431719224172054
0.5576366688760543
Action:  41
0.2985098204412617 0.6504940059210553 0.949003826362317
0.6504940059210553
Action:  42
0.23603285766942236 0.7206236933267512 0.9566565509961736
0.7206236933267512


 31%|███       | 22/72 [04:27<11:42, 14.05s/it]

Action:  84
0.14219204655674103 0.19699902736236638 0.3391910739191074
0.19699902736236638
Action:  99
0.14384093113482055 0.29688431293770384 0.4407252440725244
0.29688431293770384
Action:  48
0.3046556741028128 0.14248519061127368 0.4471408647140865
0.14248519061127368
Action:  49
0.33210475266731326 0.3373513142782934 0.6694560669456067
0.3373513142782934
Action:  46
0.12259941804073715 0.5574563699648416 0.6800557880055788
0.5574563699648416
Action:  50
0.19000969932104753 0.5028773299676554 0.6928870292887029
0.5028773299676554
Action:  44
0.2949563530552861 0.4057409970144489 0.700697350069735
0.4057409970144489
Action:  10
0.29078564500484966 0.4219061262643275 0.7126917712691772
0.4219061262643275
Action:  34
0.24801163918525704 0.4733272729486342 0.7213389121338912
0.4733272729486342
Action:  54
0.24161008729388941 0.4922811260952319 0.7338912133891213
0.4922811260952319
Action:  24
0.2880698351115422 0.45586321928176327 0.7439330543933055
0.45586321928176327
Action:  16
0.191

 32%|███▏      | 23/72 [04:35<09:56, 12.17s/it]

Action:  53
0.14934044478308492 0.3660042903340117 0.5153447351170967
0.3660042903340117
Action:  46
0.14876898899947616 0.3916636431440194 0.5404326321434956
0.3916636431440194
Action:  63
0.19634268298490404 0.5076082138627211 0.7039508968476252
0.5076082138627211
Action:  51
0.1386018381827706 0.5737514184765464 0.712353256659317
0.5737514184765464
Action:  93
0.2729177579884756 0.5123155415765086 0.7852332995649842
0.5123155415765086
Action:  12
0.2817753226344112 0.5527399029182972 0.8345152255527084
0.5527399029182972
Action:  50
0.13145864088766132 0.7108034412290183 0.8422620821166796
0.7108034412290183
Action:  4
0.26472689175675035 0.5845073612675032 0.8492342530242536
0.5845073612675032
Action:  49
0.1343397304633554 0.7325335190450171 0.8668732495083725
0.7325335190450171
Action:  48
0.13936377922758228 0.7297739360456434 0.8691377152732257
0.7297739360456434
Action:  54
0.15384065907900377 0.7166080626896632 0.8704487217686669
0.7166080626896632
Action:  95
0.2644173532072

 33%|███▎      | 24/72 [04:59<12:40, 15.85s/it]

Action:  51
0.10344827586206896 0.13612905362880517 0.23957732949087415
0.13612905362880517
Action:  30 (random)
0.15371055120818644 0.19038166781198645 0.3440922190201729
0.19038166781198645
Action:  42
0.09265629030871098 0.31253102957601525 0.4051873198847262
0.31253102957601525
Action:  47
0.10981167770229598 0.3444630581286358 0.4542747358309318
0.3444630581286358
Action:  27
0.1715108779774701 0.3162893141454886 0.4878001921229587
0.3162893141454886
Action:  44
0.15229168458164932 0.4088994777622508 0.5611911623439001
0.4088994777622508
Action:  48
0.15001289878751398 0.4231859484747338 0.5731988472622478
0.4231859484747338
Action:  49
0.1263651216785622 0.5126358389362313 0.6390009606147935
0.5126358389362313
Action:  99
0.06457992948662826 0.5844114249802305 0.6489913544668587
0.5844114249802305
Action:  45
0.14223063032074984 0.5190565935024971 0.6612872238232469
0.5190565935024971
Action:  46
0.09037750451457563 0.5983833024018509 0.6887608069164265
0.5983833024018509
Action:

 35%|███▍      | 25/72 [05:11<11:27, 14.63s/it]

Action:  46
0.11962310528471938 -0.06676159370761148 0.05286151157710791
-0.06676159370761148
Action:  48
0.1216714461286358 -0.04827695071579176 0.07339449541284404
-0.04827695071579176
Action:  50
0.12228594838181073 -0.03185344510527076 0.09043250327653997
-0.03185344510527076
Action:  49
0.12412945514133551 -0.009668992057019224 0.11446046308431629
-0.009668992057019224
Action:  35
0.1220811142974191 0.0019905327100077364 0.12407164700742683
0.0019905327100077364
Action:  53
0.12228594838181073 0.04197792230407832 0.16426387068588905
0.04197792230407832
Action:  51
0.12433428922572716 0.0622100532819181 0.18654434250764526
0.0622100532819181
Action:  99
0.023555919705038918 0.16910463075367668 0.1926605504587156
0.16910463075367668
Action:  45
0.1220811142974191 0.08281185206343718 0.20489296636085627
0.08281185206343718
Action:  0
0.02376075378943056 0.18637467652948597 0.21013543031891654
0.18637467652948597
Action:  56
0.12412945514133551 0.09430654311117649 0.218435998252512
0.

 36%|███▌      | 26/72 [05:15<08:50, 11.54s/it]

Action:  10
0.25483091787439616 0.09419505615157786 0.349025974025974
0.09419505615157786
Action:  46
0.0821256038647343 0.2717705000313696 0.3538961038961039
0.2717705000313696
Action:  47
0.08001207729468598 0.28200090971830105 0.362012987012987
0.28200090971830105
Action:  50
0.06521739130434782 0.30166572557876903 0.36688311688311687
0.30166572557876903
Action:  49
0.07216183574879227 0.2955329694460129 0.3676948051948052
0.2955329694460129
Action:  84
0.21829710144927536 0.4407938076416337 0.6590909090909091
0.4407938076416337
Action:  44
0.11201690821256038 0.5511324424367903 0.6631493506493507
0.5511324424367903
Action:  34
0.1322463768115942 0.5373964803312629 0.6696428571428571
0.5373964803312629
Action:  99
0.24667874396135267 0.46030177551916673 0.7069805194805194
0.46030177551916673
Action:  54
0.08303140096618357 0.6280075600727775 0.711038961038961
0.6280075600727775
Action:  13
0.21588164251207728 0.5032742016437668 0.7191558441558441
0.5032742016437668
Action:  48
0.091

 38%|███▊      | 27/72 [05:20<07:10,  9.56s/it]

Action:  99
0.14649625125450144 0.29844399984215564 0.4449402510966571
0.29844399984215564
Action:  27
0.19378357636224097 0.4310084369489071 0.6247920133111481
0.4310084369489071
Action:  91
0.19446248302733338 0.52902866808445 0.7234911511117834
0.52902866808445
Action:  84
0.21108093748155146 0.5471250827876968 0.7582060202692482
0.5471250827876968
Action:  41
0.17377058858256095 0.6555290635124322 0.8292996520949932
0.6555290635124322
Action:  54
0.171497727138556 0.6644574989996985 0.8359552261382545
0.6644574989996985
Action:  45 (random)
0.1704646083003719 0.6705579298935473 0.8410225381939193
0.6705579298935473
Action:  3
0.18610897927858788 0.6797206985311233 0.8658296778097111
0.6797206985311233
Action:  46
0.1663616506287266 0.7024932881097397 0.8688549387384662
0.7024932881097397
Action:  49
0.1654761201959974 0.7052696066229407 0.8707457268189381
0.7052696066229407
Action:  10
0.20001180707243638 0.6753474426628533 0.8753592497352897
0.6753474426628533
Action:  44
0.171792

 39%|███▉      | 28/72 [05:37<08:34, 11.69s/it]

Action:  91
0.17120859965136548 0.31839069966985195 0.48959929932121743
0.31839069966985195
Action:  42
0.13268448576409064 0.4903357317382815 0.6230202175023721
0.4903357317382815
Action:  27
0.15990703079604882 0.4901915021577502 0.650098532953799
0.4901915021577502
Action:  44
0.15970366066240557 0.5644040687004146 0.7241077293628202
0.5644040687004146
Action:  93
0.18538640325392214 0.566310553172616 0.7516969564265382
0.566310553172616
Action:  54
0.17178965717606043 0.6085913369119623 0.7803809940880228
0.6085913369119623
Action:  35
0.1768158047646717 0.6121047119859304 0.7889205167506022
0.6121047119859304
Action:  49
0.18442765833817548 0.6104048356403662 0.7948324939785417
0.6104048356403662
Action:  46
0.1579895409645555 0.6402733595536548 0.7982629005182104
0.6402733595536548
Action:  34
0.17251597908192912 0.6609998226843654 0.8335158017662945
0.6609998226843654
Action:  92
0.20360255665310867 0.6495468484997999 0.8531494051529086
0.6495468484997999
Action:  84
0.195380592

 40%|████      | 29/72 [05:50<08:40, 12.10s/it]

Action:  99
0.13779625263680356 0.30296702585466967 0.44076327849147323
0.30296702585466967
Action:  49
0.15293460727137362 0.4715113396897863 0.62444594696116
0.4715113396897863
Action:  54
0.16183769698473757 0.5098624006788489 0.6717000976635865
0.5098624006788489
Action:  47
0.1546407742896141 0.5468091543408419 0.701449928630456
0.5468091543408419
Action:  46
0.1573396203002854 0.5684510791212457 0.7257906994215311
0.5684510791212457
Action:  84
0.19608512222360094 0.5568260039126773 0.7529111261362783
0.5568260039126773
Action:  53
0.15870455391487778 0.6076540968251116 0.7663586507399894
0.6076540968251116
Action:  34
0.17362576001985358 0.6036261369074998 0.7772518969273533
0.6036261369074998
Action:  61
0.17244695371634197 0.6114160167592045 0.7838629704755465
0.6114160167592045
Action:  27
0.1815361707407867 0.6150230659807219 0.7965592367215085
0.6150230659807219
Action:  50
0.1529656284898871 0.6444199924251457 0.7973856209150327
0.6444199924251457
Action:  44
0.15948008437

 42%|████▏     | 30/72 [06:07<09:27, 13.52s/it]

Action:  46
0.21933381607530775 0.02097101034298185 0.2403048264182896
0.02097101034298185
Action:  47
0.2984793627805938 0.13081784297723859 0.42929720575783237
0.13081784297723859
Action:  84
0.2968139029688631 0.2007305508838041 0.4975444538526672
0.2007305508838041
Action:  44
0.1326574945691528 0.38757959264507247 0.5202370872142252
0.38757959264507247
Action:  49
0.2951484431571325 0.26962717072940434 0.5647756138865369
0.26962717072940434
Action:  34
0.29007965242577843 0.30314642716778634 0.5932260795935648
0.30314642716778634
Action:  10
0.15177407675597393 0.46211246007721823 0.6138865368331922
0.46211246007721823
Action:  35
0.27038377986965967 0.4147982692412633 0.685182049110923
0.4147982692412633
Action:  50
0.24489500362056482 0.4624716348214335 0.7073666384419983
0.4624716348214335
Action:  54
0.27436640115858074 0.4487496022283795 0.7231160033869602
0.4487496022283795
Action:  48
0.2276611151339609 0.5279697062038884 0.7556308213378493
0.5279697062038884
Action:  42
0.

 43%|████▎     | 31/72 [06:13<07:46, 11.39s/it]

Action:  42
0.13304117079446767 0.012519510873925138 0.1455606816683928
0.012519510873925138
Action:  46
0.12210517851399164 0.10197918265727285 0.22408436117126448
0.10197918265727285
Action:  84
0.19318912833708587 0.17607930213085501 0.3692684304679409
0.17607930213085501
Action:  49
0.08234158893534899 0.3289191209789717 0.4112607099143207
0.3289191209789717
Action:  0
0.16496461884850433 0.4056972773947872 0.5706618962432916
0.4056972773947872
Action:  54
0.1191299453200386 0.4910762499534761 0.6102061952735147
0.4910762499534761
Action:  78
0.12841749758764875 0.5481666282008834 0.6765841257885321
0.5481666282008834
Action:  91
0.16926664522354454 0.5504395971265167 0.7197062423500612
0.5504395971265167
Action:  95
0.16367803152138952 0.5701511747680371 0.7338292062894266
0.5701511747680371
Action:  99
0.1539080090061113 0.5824633307924012 0.7363713397985124
0.5824633307924012
Action:  50
0.11185268575104536 0.642878507168642 0.7547311929196874
0.642878507168642
Action:  10
0.155

 44%|████▍     | 32/72 [06:23<07:19, 10.99s/it]

Action:  0
0.13142382731423827 0.25341355752314654 0.38483738483738483
0.25341355752314654
Action:  99
0.12273419122734192 0.2854132169200662 0.40814740814740813
0.2854132169200662
Action:  60 (random)
0.14744707347447072 0.29485136882397156 0.4422984422984423
0.29485136882397156
Action:  47
0.12959734329597344 0.3319411182424881 0.46153846153846156
0.3319411182424881
Action:  14 (random)
0.17632489276324892 0.5189798025414464 0.6953046953046953
0.5189798025414464
Action:  48
0.12954199529541996 0.5889987232452986 0.7185407185407185
0.5889987232452986
Action:  46
0.13294589732945897 0.6315488671653056 0.7644947644947645
0.6315488671653056
Action:  50
0.12934827729348278 0.6373294893842839 0.7666777666777667
0.6373294893842839
Action:  10
0.1798394907983949 0.668090357131453 0.8479298479298479
0.668090357131453
Action:  51
0.1317835893178359 0.716183258649012 0.847966847966848
0.716183258649012
Action:  49
0.13064895530648954 0.7183908937333595 0.8490398490398491
0.7183908937333595
Acti

 46%|████▌     | 33/72 [06:49<09:57, 15.32s/it]

Action:  48
0.1333887196529124 0.1573604390931765 0.2907491587460889
0.1573604390931765
Action:  49
0.13805040155081694 0.31427854348726086 0.4523289450380778
0.31427854348726086
Action:  55 (random)
0.14155820179082435 0.48734551153345695 0.6289037133242813
0.48734551153345695
Action:  50
0.11377273146866057 0.5434679557029551 0.6572406871716158
0.5434679557029551
Action:  25
0.18621342195144466 0.6035026179564602 0.7897160399079048
0.6035026179564602
Action:  47
0.14610449552293917 0.6668714770846528 0.8129759726075919
0.6668714770846528
Action:  44
0.1452044678297794 0.6732617934607337 0.818466261290513
0.6732617934607337
Action:  27
0.20686790362780394 0.6177970706918136 0.8246649743196175
0.6177970706918136
Action:  61
0.1668974429982461 0.7034018662880164 0.8702993092862624
0.7034018662880164
Action:  82
0.22484537985784178 0.7360732103777092 0.960918590235551
0.7360732103777092


 47%|████▋     | 34/72 [06:56<08:07, 12.83s/it]

Action:  84
0.10347432024169184 -0.08038514826716955 0.02308917197452229
-0.08038514826716955
Action:  0
0.0222809667673716 0.002400561895048784 0.024681528662420384
0.002400561895048784
Action:  34
0.13293051359516617 -0.09710248811745917 0.035828025477707005
-0.09710248811745917
Action:  35
0.13444108761329304 -0.08667038697635036 0.04777070063694268
-0.08667038697635036
Action:  99
0.0222809667673716 0.026285912213520123 0.04856687898089172
0.026285912213520123
Action:  49
0.13897280966767372 -0.08085179055939347 0.058121019108280256
-0.08085179055939347
Action:  10
0.08836858006042296 -0.027062847576346527 0.06130573248407643
-0.027062847576346527
Action:  93
0.07137462235649547 0.0034661419747147176 0.07484076433121019
0.0034661419747147176
Action:  46
0.138595166163142 -0.05738497508033945 0.08121019108280254
-0.05738497508033945
Action:  7
0.07666163141993958 0.01251034310235341 0.08917197452229299
0.01251034310235341
Action:  48
0.13821752265861026 -0.04187994304077586 0.096337

 49%|████▊     | 35/72 [07:00<06:19, 10.25s/it]

Action:  84
0.10308502633559068 -0.08544268471570295 0.01764234161988773
-0.08544268471570295
Action:  34
0.1313017306245297 -0.10163051971835489 0.02967121090617482
-0.10163051971835489
Action:  47
0.13732129420617006 -0.10043276172822299 0.03688853247794707
-0.10043276172822299
Action:  46
0.13581640331075998 -0.0844932276892684 0.05132317562149158
-0.0844932276892684
Action:  44
0.1313017306245297 -0.06955353495492266 0.061748195669607056
-0.06955353495492266
Action:  49
0.1361926260346125 -0.06562325955506157 0.07056936647955092
-0.06562325955506157
Action:  99
0.02219714070729872 0.049174150391338003 0.07137129109863673
0.049174150391338003
Action:  10
0.08916478555304741 -0.01618964521623907 0.07297514033680834
-0.01618964521623907
Action:  61
0.1343115124153499 -0.05091135203042607 0.08340016038492382
-0.05091135203042607
Action:  48
0.1361926260346125 -0.0423674456015732 0.09382518043303929
-0.0423674456015732
Action:  2 (random)
0.046275395033860044 0.05877673006638053 0.10505

 50%|█████     | 36/72 [07:03<04:55,  8.21s/it]

Action:  10
0.08475349338640793 0.2661703587972427 0.35092385218365063
0.2661703587972427
Action:  61
0.08759381349255711 0.32247057620509123 0.41006438969764836
0.32247057620509123
Action:  44
0.08046191483186134 0.3882530907672428 0.4687150055991041
0.3882530907672428
Action:  4 (random)
0.16320437865406145 0.42694119805366526 0.5901455767077267
0.42694119805366526
Action:  50
0.1769913339138367 0.5702091140144948 0.7472004479283315
0.5702091140144948
Action:  99
0.07925944354604636 0.6983161443598886 0.7775755879059351
0.6983161443598886
Action:  46
0.16067504250114029 0.6383171187530591 0.7989921612541994
0.6383171187530591
Action:  42
0.16975577393539826 0.6432341476771437 0.812989921612542
0.6432341476771437
Action:  84
0.20659700626114358 0.6448167675350491 0.8514137737961927
0.6448167675350491
Action:  27
0.07948749844508024 0.7810948083858269 0.8605823068309071
0.7810948083858269
Action:  0
0.10936269021851806 0.7636076345295222 0.8729703247480403
0.7636076345295222
Action:  4

 51%|█████▏    | 37/72 [07:13<04:57,  8.50s/it]

Action:  48
0.11053510236358843 0.07041889525990502 0.18095399762349346
0.07041889525990502
Action:  50
0.1126219137158422 0.16582543534770106 0.27844734906354324
0.16582543534770106
Action:  47
0.11451102715051402 0.23257209399473577 0.3470831211452498
0.23257209399473577
Action:  49
0.11336877251559617 0.32356892906308315 0.43693770157867934
0.32356892906308315
Action:  46
0.11409366488006327 0.3717321711409858 0.48582583602104906
0.3717321711409858
Action:  10
0.10133116597838503 0.4356461440425508 0.5369773100209358
0.4356461440425508
Action:  51
0.11273174589227661 0.4463131248144512 0.5590448707067278
0.4463131248144512
Action:  0
0.04485546085581232 0.5396519798729831 0.5845074407287953
0.5396519798729831
Action:  42
0.11558738247957122 0.4777470825235408 0.593334465003112
0.4777470825235408
Action:  40
0.11257798084526843 0.49829736572860134 0.6108753465738698
0.49829736572860134
Action:  2 (random)
0.07044635796502943 0.5673061458543561 0.6377525038193855
0.5673061458543561
Ac

 53%|█████▎    | 38/72 [07:29<06:07, 10.80s/it]

Action:  53
0.11829186922123724 0.07491690037441268 0.19320876959564992
0.07491690037441268
Action:  84
0.1120722524068249 0.15899849439713182 0.2710707468039567
0.15899849439713182
Action:  85
0.11085692498331903 0.2615616554528468 0.3724185804361659
0.2615616554528468
Action:  0
0.04918501572776666 0.3668501552099322 0.4160351709376989
0.3668501552099322
Action:  61
0.11972166619006767 0.314188208282079 0.4339098744721467
0.314188208282079
Action:  50
0.11547993518253741 0.34602292824084435 0.46150286342338176
0.34602292824084435
Action:  46
0.11664760270708226 0.410800768901641 0.5274483716087233
0.410800768901641
Action:  48
0.11624249356591364 0.42011430634153124 0.5363567999074449
0.42011430634153124
Action:  99
0.04930416547516919 0.527314102587537 0.5766182680627061
0.527314102587537
Action:  49
0.11519397578877133 0.5063135153895708 0.6215074911783421
0.5063135153895708
Action:  47
0.11626632351539415 0.5394865543697218 0.6557528778851159
0.5394865543697218
Action:  51
0.11545

 54%|█████▍    | 39/72 [07:46<07:00, 12.73s/it]

Action:  99
0.10096849604809084 0.14798171390991754 0.2489502099580084
0.14798171390991754
Action:  25
0.2972280975175331 0.1865751418345965 0.4838032393521296
0.1865751418345965
Action:  42
0.2669486808415897 0.3615756142993822 0.6285242951409719
0.3615756142993822
Action:  84
0.2880997439608149 0.3716183124279074 0.6597180563887223
0.3716183124279074
Action:  10
0.09968830012245353 0.5991719278319556 0.6988602279544092
0.5991719278319556
Action:  61
0.10291662028275632 0.6254876988534165 0.7284043191361728
0.6254876988534165
Action:  50
0.28047422909941 0.4611274505646572 0.7416016796640672
0.4611274505646572
Action:  24
0.272292107313815 0.49660411344203387 0.7688962207558488
0.49660411344203387
Action:  44
0.07970611154402761 0.7019375597217192 0.7816436712657469
0.7019375597217192
Action:  46
0.26516753868418125 0.5424209436193581 0.8075884823035393
0.5424209436193581
Action:  55
0.288656350885005 0.5271804817484682 0.8158368326334733
0.5271804817484682
Action:  47
0.2966714905933

 56%|█████▌    | 40/72 [08:00<06:57, 13.05s/it]

Action:  46
0.26307077269036167 0.18056171570120416 0.4436324883915658
0.18056171570120416
Action:  27
0.19226684989693296 0.3181236502172483 0.5103905001141813
0.3181236502172483
Action:  99
0.19360984446248986 0.4337023272662154 0.6273121717287052
0.4337023272662154
Action:  41
0.2667561996377038 0.413840588061162 0.6805967876988658
0.413840588061162
Action:  61
0.19692048222874634 0.5005751408206561 0.6974956230494025
0.5005751408206561
Action:  34
0.257886189018677 0.6480284033540108 0.9059145923726878
0.6480284033540108
Action:  42
0.22134424386282717 0.7041334146589053 0.9254776585217325
0.7041334146589053
Action:  10
0.19554625523143232 0.7316060626493623 0.9271523178807947
0.7316060626493623
Action:  44
0.19189206071584733 0.7361737077244359 0.9280657684402832
0.7361737077244359
Action:  35
0.22259354113311264 0.7112574141839307 0.9338509553170434
0.7112574141839307
Action:  93
0.2445811730901368 0.7067775846171023 0.9513587577072391
0.7067775846171023


 57%|█████▋    | 41/72 [08:10<06:21, 12.30s/it]

Action:  43
0.15699642200947683 0.15737307945679885 0.3143695014662757
0.15737307945679885
Action:  50
0.09994197853205686 0.2868028895031337 0.3867448680351906
0.2868028895031337
Action:  47
0.14616574799342424 0.41324774174264617 0.5594134897360704
0.41324774174264617
Action:  44
0.14311962092640942 0.49031146411757887 0.6334310850439883
0.49031146411757887
Action:  27
0.1450536698578474 0.5294331336612141 0.6744868035190615
0.5294331336612141
Action:  46
0.08587177255584566 0.6372953828693743 0.7231671554252199
0.6372953828693743
Action:  42
0.09191567546658931 0.6663247937416219 0.7582404692082112
0.6663247937416219
Action:  28
0.16086451987235276 0.6475812279282338 0.8084457478005865
0.6475812279282338
Action:  84
0.15138768010830675 0.7087882729708722 0.8601759530791789
0.7087882729708722
Action:  53
0.15506237307803888 0.7134420257489407 0.8685043988269795
0.7134420257489407
Action:  65
0.1451503723044193 0.732972794851006 0.8781231671554253
0.732972794851006
Action:  48
0.14988

 58%|█████▊    | 42/72 [08:19<05:38, 11.28s/it]

Action:  46
0.24669980443285527 0.06410669859551577 0.31080650302837104
0.06410669859551577
Action:  50
0.2635674706649283 0.302259752796978 0.5658272234619063
0.302259752796978
Action:  44
0.08568285528031291 0.5077716128952263 0.5934544681755393
0.5077716128952263
Action:  47
0.2849168839634941 0.3459406232089637 0.6308575071724578
0.3459406232089637
Action:  27
0.09688722294654498 0.5757299271969042 0.6726171501434491
0.5757299271969042
Action:  54
0.28190189048239894 0.4088854859919396 0.6907873764743385
0.4088854859919396
Action:  82
0.10858050847457627 0.6206724933318205 0.7292530018063967
0.6206724933318205
Action:  48
0.13282268578878748 0.6204447671917672 0.7532674529805546
0.6204447671917672
Action:  61
0.10540254237288135 0.664866291969909 0.7702688343427904
0.664866291969909
Action:  65
0.12601857887874837 0.6635893267635851 0.7896079056423334
0.6635893267635851
Action:  49
0.2501222294654498 0.5572308679737171 0.807353097439167
0.5572308679737171
Action:  10
0.098191003911

 60%|█████▉    | 43/72 [08:29<05:18, 10.97s/it]

Action:  46
0.019085886489201405 0.02007720363955396 0.039163090128755365
0.02007720363955396
Action:  84
0.09919638372677046 -0.03589166269672754 0.06330472103004292
-0.03589166269672754
Action:  50
0.03691612255148167 0.03389932809229516 0.07081545064377683
0.03389932809229516
Action:  44
0.15168257157207435 -0.05297012522014302 0.09871244635193133
-0.05297012522014302
Action:  47
0.08940231039678553 0.033988247543128625 0.12339055793991416
0.033988247543128625
Action:  34
0.0743345052737318 0.0662234346404313 0.1405579399141631
0.0662234346404313
Action:  24 (random)
0.1047212456052235 0.05675944109005546 0.16148068669527896
0.05675944109005546
Action:  49
0.12154696132596685 0.05549166528347521 0.17703862660944206
0.05549166528347521
Action:  54
0.06278252134605726 0.12659516105737623 0.18937768240343347
0.12659516105737623
Action:  53
0.12531391260673028 0.08605947795120963 0.2113733905579399
0.08605947795120963
Action:  82 (random)
0.13385233550979408 0.10488157006960505 0.238733

 61%|██████    | 44/72 [08:34<04:14,  9.10s/it]

Action:  49
0.1535415870241931 0.20555306839963242 0.3590946554238255
0.20555306839963242
Action:  47
0.16386325159740048 0.3156904160625247 0.4795536676599252
0.3156904160625247
Action:  46
0.1659658128993501 0.3683650055879383 0.5343308184872884
0.3683650055879383
Action:  35
0.2229807219704003 0.4038499380181878 0.6268306599885881
0.4038499380181878
Action:  37 (random)
0.21044727213150566 0.45721265305710784 0.6676599251886135
0.45721265305710784
Action:  50
0.15266779531429195 0.5222830700886117 0.6749508654029037
0.5222830700886117
Action:  54
0.17139970509529792 0.5328417201250153 0.7042414252203132
0.5328417201250153
Action:  63
0.21645459013707607 0.5568288689385594 0.7732834590756356
0.5568288689385594
Action:  51
0.157719403637158 0.6203824159279215 0.7781018195650795
0.6203824159279215
Action:  48
0.1577467096280924 0.620798906297857 0.7785456159259494
0.620798906297857
Action:  53
0.16757686636448035 0.6169917001732741 0.7845685665377544
0.6169917001732741
Action:  67
0.23

 62%|██████▎   | 45/72 [08:49<04:48, 10.68s/it]

Action:  11 (random)
0.22375768217734854 0.22434381639762618 0.4481014985749747
0.22434381639762618
Action:  0
0.10029850746268656 0.40241363743020675 0.5027121448928933
0.40241363743020675
Action:  42
0.16981562774363476 0.3477903297814181 0.5176059575250529
0.3477903297814181
Action:  41
0.17144863915715539 0.49431397921188025 0.6657626183690356
0.49431397921188025
Action:  46
0.16816505706760315 0.5237996390802317 0.6919646961478348
0.5237996390802317
Action:  49
0.16835820895522388 0.5265944434305442 0.6949526523857681
0.5265944434305442
Action:  54
0.169657594381036 0.542349392839705 0.712006987220741
0.542349392839705
Action:  50
0.16575943810359964 0.5563146632111011 0.7220741013147007
0.5563146632111011
Action:  48
0.1677260755048288 0.5622546177010185 0.7299806932058472
0.5622546177010185
Action:  51
0.166900790166813 0.567906601577234 0.7348073917440471
0.567906601577234
Action:  82
0.2091132572431958 0.5722143146975966 0.7813275719407925
0.5722143146975966
Action:  44
0.1714

 64%|██████▍   | 46/72 [09:10<06:02, 13.93s/it]

Action:  99
0.04410853673646388 0.17374804407453126 0.21785658081099513
0.17374804407453126
Action:  92
0.10077231239416842 0.2908764312453427 0.3916487436395111
0.2908764312453427
Action:  27
0.11946062032792301 0.3657200962434456 0.48518071657136863
0.3657200962434456
Action:  95
0.08883657539338372 0.44518220444189927 0.534018779835283
0.44518220444189927
Action:  48
0.11398835336389543 0.446783823103607 0.5607721764675024
0.446783823103607
Action:  50
0.11326560112336348 0.4752566671450098 0.5885222682683733
0.4752566671450098
Action:  47
0.11471110560442738 0.5000924405320675 0.6148035461364948
0.5000924405320675
Action:  10
0.10971379011274934 0.5398691716456308 0.6495829617583802
0.5398691716456308
Action:  42
0.11597075950935448 0.5391202544968354 0.65509101400619
0.5391202544968354
Action:  40
0.11646636104571924 0.5418245690282459 0.6582909300739652
0.5418245690282459
Action:  46
0.11456655515632098 0.5685368388530165 0.6831033940093375
0.5685368388530165
Action:  51
0.113141

 65%|██████▌   | 47/72 [09:30<06:32, 15.70s/it]

Action:  45
0.19266805472932777 0.21038855586933383 0.4030566105986616
0.21038855586933383
Action:  50
0.14109904818560381 0.3133230896331699 0.4544221378187737
0.3133230896331699
Action:  53
0.1731856038072576 0.2937161867516138 0.46690179055887143
0.2937161867516138
Action:  90 (random)
0.17675490779298036 0.3661099864012682 0.5428648941942485
0.3661099864012682
Action:  44
0.15452111838191554 0.5623354560438396 0.7168565744257551
0.5623354560438396
Action:  42
0.13254759071980965 0.634046730133871 0.7665943208536806
0.634046730133871
Action:  91
0.16946757882212968 0.6209104280507225 0.7903780068728522
0.6209104280507225
Action:  67 (random)
0.16586109458655562 0.6916176538308797 0.8574787484174353
0.6916176538308797
Action:  99
0.15362879238548482 0.7073768144150216 0.8610056068005064
0.7073768144150216
Action:  84
0.19166418798334325 0.671602225472978 0.8632664134563213
0.671602225472978
Action:  27
0.15745835812016656 0.7263361797709529 0.8837945378911195
0.7263361797709529
Actio

 67%|██████▋   | 48/72 [09:39<05:28, 13.70s/it]

Action:  47
0.35360185902401237 0.40908833404592754 0.7626901930699399
0.40908833404592754
Action:  49
0.2831138652207591 0.5002897916006193 0.7834036568213784
0.5002897916006193
Action:  38
0.36629658318271796 0.4251623095419975 0.7914588927247155
0.4251623095419975
Action:  44
0.054049401841810826 0.7481498054206875 0.8021992072624984
0.7481498054206875
Action:  99
0.048713314398829505 0.7639704856267426 0.8126838000255722
0.7639704856267426
Action:  82
0.07504948790773733 0.7478631831061995 0.8229126710139368
0.7478631831061995
Action:  27
0.05796540149754712 0.772107479208245 0.8300728807057921
0.772107479208245
Action:  61
0.07259660900249591 0.7654036467192789 0.8380002557217747
0.7654036467192789
Action:  40
0.13206816421378775 0.7187181802434428 0.8507863444572306
0.7187181802434428
Action:  42
0.36220845167398225 0.49254158029123957 0.8547500319652218
0.49254158029123957
Action:  54
0.36681297874171614 0.49509726291536094 0.8619102416570771
0.49509726291536094
Action:  41
0.35

 68%|██████▊   | 49/72 [09:53<05:14, 13.69s/it]

Action:  35
0.1656507554945055 0.23907674244108493 0.40472749793559043
0.23907674244108493
Action:  93
0.17522321428571427 0.28751419281585466 0.46273740710156896
0.28751419281585466
Action:  50
0.1262448489010989 0.4721242675316014 0.5983691164327003
0.4721242675316014
Action:  44
0.1486092032967033 0.5844626381566411 0.7330718414533444
0.5844626381566411
Action:  46
0.11847527472527472 0.662697309915518 0.7811725846407928
0.662697309915518
Action:  99
0.15380322802197802 0.642439546544496 0.796242774566474
0.642439546544496
Action:  10
0.14899553571428573 0.6919417062345169 0.8409372419488026
0.6919417062345169
Action:  3
0.18011675824175824 0.7070632582735185 0.8871800165152767
0.7070632582735185
Action:  42
0.1180889423076923 0.8028441708219526 0.9209331131296449
0.8028441708219526
Action:  84
0.1773695054945055 0.7551243012767579 0.9324938067712634
0.7551243012767579
Action:  27
0.14903846153846154 0.7874809439115797 0.9365194054500413
0.7874809439115797
Action:  78
0.152644230769

 69%|██████▉   | 50/72 [10:00<04:16, 11.67s/it]

Action:  46
0.09502746996927088 0.1750995316872725 0.27012700165654335
0.1750995316872725
Action:  10
0.1452649222460192 0.2703065852084259 0.41557150745444504
0.2703065852084259
Action:  84
0.1583946363721017 0.3876020505412059 0.5459966869133076
0.3876020505412059
Action:  35
0.15606667287456932 0.5063297931662921 0.6623964660408614
0.5063297931662921
Action:  55 (random)
0.1195176459633113 0.6170919619880968 0.7366096079514081
0.6170919619880968
Action:  42
0.09959027842443431 0.6735737193668412 0.7731639977912755
0.6735737193668412
Action:  44
0.14195921407952322 0.6516354849817689 0.7935946990612921
0.6516354849817689
Action:  3
0.16942918335040508 0.6743035609897385 0.8437327443401436
0.6743035609897385
Action:  28
0.1622590557780054 0.7094140530016744 0.8716731087796797
0.7094140530016744
Action:  18
0.1588136698016575 0.7208108470398664 0.879624516841524
0.7208108470398664
Action:  5
0.16775304963218177 0.7177245870326443 0.885477636664826
0.7177245870326443
Action:  27
0.14526

 71%|███████   | 51/72 [10:08<03:42, 10.60s/it]

Action:  49
0.1591673937429889 0.15996638599771487 0.3191337797407038
0.15996638599771487
Action:  99
0.14813660725414435 0.2237112343187293 0.37184784157287365
0.2237112343187293
Action:  46
0.07833728031908263 0.3396709829662856 0.4180082632853683
0.3396709829662856
Action:  44
0.14583073663218246 0.3126391308703108 0.45846986750249324
0.3126391308703108
Action:  10
0.1483858905646267 0.3845960156898255 0.5329819062544522
0.3845960156898255
Action:  3
0.17057210519755703 0.49319766257563 0.6637697677731871
0.49319766257563
Action:  42
0.08830861273837716 0.6100814713191809 0.6983900840575581
0.6100814713191809
Action:  48
0.15224978187710334 0.5982844822630875 0.7505342641401909
0.5982844822630875
Action:  5
0.16994889692135112 0.604805341574161 0.7747542384955122
0.604805341574161
Action:  41
0.14346254518260002 0.6559390789803862 0.7994016241629862
0.6559390789803862
Action:  27
0.14763804063317962 0.671994385638369 0.8196324262715486
0.671994385638369
Action:  97
0.156799202293406

 72%|███████▏  | 52/72 [10:16<03:21, 10.06s/it]

Action:  84
0.13217753120665743 0.10794353824012246 0.24012106944677988
0.10794353824012246
Action:  46
0.061234396671289874 0.22142913115786766 0.28266352782915755
0.22142913115786766
Action:  99
0.13744798890429957 0.18994397342965033 0.3273919623339499
0.18994397342965033
Action:  44
0.13460471567267684 0.2775358594071954 0.41214057507987223
0.2775358594071954
Action:  10
0.13661581137309292 0.3636364166410318 0.5002522280141247
0.3636364166410318
Action:  97
0.14084604715672677 0.38816017446762163 0.5290062216243484
0.38816017446762163
Action:  42
0.07066574202496533 0.4983606578405131 0.5690263998654784
0.4983606578405131
Action:  3
0.15388349514563107 0.524273558831164 0.678157053976795
0.524273558831164
Action:  18
0.14375866851595007 0.5707192194948116 0.7144778880107617
0.5707192194948116
Action:  81
0.14535367545076283 0.5945151659818922 0.7398688414326551
0.5945151659818922
Action:  5
0.1535367545076283 0.6078555441303405 0.7613922986379688
0.6078555441303405
Action:  83
0.1

 74%|███████▎  | 53/72 [10:25<03:00,  9.48s/it]

Action:  46
0.05414625612316305 0.07929724728522365 0.1334435034083867
0.07929724728522365
Action:  84
0.13068579426172147 0.1262859058002492 0.2569717000619707
0.1262859058002492
Action:  99
0.14004548635409378 0.18013629426974428 0.32018178062383806
0.18013629426974428
Action:  42
0.06473058082575227 0.2905679112213454 0.3552984920470977
0.2905679112213454
Action:  67 (random)
0.09464660601819454 0.30630361087914076 0.4009502168973353
0.30630361087914076
Action:  3
0.15439118264520643 0.3523223063033579 0.5067134889485644
0.3523223063033579
Action:  10
0.14004548635409378 0.4071555051765817 0.5472009915306755
0.4071555051765817
Action:  44
0.14127011896431071 0.4383621884101987 0.5796323073745094
0.4383621884101987
Action:  18
0.14573128061581525 0.46633234260252804 0.6120636232183433
0.46633234260252804
Action:  41
0.1201889433170049 0.5164563778976201 0.636645321214625
0.5164563778976201
Action:  97
0.14406927921623514 0.525627064514399 0.6696963437306341
0.525627064514399
Action: 

 75%|███████▌  | 54/72 [10:33<02:45,  9.19s/it]

Action:  42
0.06149325206791467 0.05981580305019557 0.12130905511811024
0.05981580305019557
Action:  84
0.13354375272094035 0.08668262523181555 0.2202263779527559
0.08668262523181555
Action:  50
0.06987374836743579 0.18135656659319413 0.2512303149606299
0.18135656659319413
Action:  44
0.14714845450587724 0.1801153250216818 0.32726377952755903
0.1801153250216818
Action:  46
0.050282977797126684 0.2993725340138969 0.3496555118110236
0.2993725340138969
Action:  10
0.14758380496299522 0.25768194306850084 0.40526574803149606
0.25768194306850084
Action:  3
0.15977361776229865 0.32497047672589036 0.484744094488189
0.32497047672589036
Action:  28
0.1437744884632129 0.39731803122182646 0.5410925196850394
0.39731803122182646
Action:  35
0.151937309534175 0.4322162337729116 0.5841535433070866
0.4322162337729116
Action:  34
0.11025250326512843 0.4938321424041629 0.6040846456692913
0.4938321424041629
Action:  5
0.16162385720505007 0.461161575865816 0.6227854330708661
0.461161575865816
Action:  27
0

 76%|███████▋  | 55/72 [10:40<02:23,  8.46s/it]

Action:  46
0.12832790653131584 0.2193133329421532 0.34764123947346903
0.2193133329421532
Action:  50
0.11875119708868033 0.3397558751049261 0.4585070721936064
0.3397558751049261
Action:  42
0.1509927855455532 0.36188432998056896 0.5128771155261221
0.36188432998056896
Action:  84
0.2772776607291068 0.46280900430237054 0.7400866650314774
0.46280900430237054
Action:  53
0.12874289727383004 0.6401230989652347 0.7688659962390647
0.6401230989652347
Action:  47
0.13190321138989977 0.6695030513850164 0.8014062627749162
0.6695030513850164
Action:  24
0.2533039647577093 0.5772086670794259 0.8305126318371352
0.5772086670794259
Action:  91
0.2834705995020111 0.5938902050111112 0.8773608045131224
0.5938902050111112
Action:  10
0.2866628359828896 0.6092573667805803 0.8959202027634698
0.6092573667805803
Action:  51
0.12922173274596183 0.7850943493405397 0.9143160820865015
0.7850943493405397
Action:  22
0.25477239353891334 0.6803923517803574 0.9351647453192707
0.6803923517803574
Action:  54
0.1457255

 78%|███████▊  | 56/72 [10:49<02:20,  8.76s/it]

Action:  46
0.21170655992157503 0.3110343628193477 0.5227409227409228
0.3110343628193477
Action:  48
0.18480924761048934 0.4707736079723662 0.6555828555828556
0.4707736079723662
Action:  42
0.2098480516297688 0.550020908239191 0.7598689598689599
0.550020908239191
Action:  0
0.2143615717670125 0.5827446253391846 0.7971061971061971
0.5827446253391846
Action:  50
0.22079486970018788 0.5921999432946251 0.812994812994813
0.5921999432946251
Action:  43
0.2464055224246385 0.6016971256780096 0.8481026481026481
0.6016971256780096
Action:  35
0.22148925741361 0.6459418100174574 0.8674310674310675
0.6459418100174574
Action:  28 (random)
0.23112899272935217 0.6789447173443579 0.9100737100737101
0.6789447173443579
Action:  93 (random)
0.22318438036108162 0.6909297337530325 0.9141141141141141
0.6909297337530325
Action:  16 (random)
0.2292296381014623 0.6883788795070553 0.9176085176085176
0.6883788795070553
Action:  99
0.20790785066579529 0.7112294684715238 0.9191373191373191
0.7112294684715238
Actio

 79%|███████▉  | 57/72 [11:07<02:49, 11.32s/it]

Action:  46
0.17132947445791988 0.29460535739431915 0.46593483185223905
0.29460535739431915
Action:  44
0.11002388827636898 0.46074918484903027 0.5707730731253993
0.46074918484903027
Action:  42
0.175808526277104 0.4661732359346634 0.6419817622117674
0.4661732359346634
Action:  48
0.11615674384417493 0.6502369368202846 0.7663936806644596
0.6502369368202846
Action:  50
0.18054024255788312 0.6040331442110081 0.7845733867688912
0.6040331442110081
Action:  27
0.1649898934215362 0.6879461581553936 0.8529360515769298
0.6879461581553936
Action:  47
0.1903941565600882 0.6668399724984005 0.8572341290584887
0.6668399724984005
Action:  49
0.17691106210951857 0.689616207449638 0.8665272695591566
0.689616207449638
Action:  43
0.18720139654538773 0.7017455744716303 0.888946971017018
0.7017455744716303
Action:  80
0.1712146269753767 0.7211011074731335 0.8923157344485102
0.7211011074731335
Action:  82
0.18754593899301727 0.7319522314199466 0.9194981704129639
0.7319522314199466
Action:  54
0.1850652333

 81%|████████  | 58/72 [11:20<02:48, 12.04s/it]

Action:  50
0.09618777860326895 0.3109140865330295 0.4071018651362984
0.3109140865330295
Action:  48
0.1018991456166419 0.3758626908252519 0.47776183644189385
0.3758626908252519
Action:  49
0.10491734769687965 0.4027942735369797 0.5077116212338594
0.4027942735369797
Action:  95
0.2599368499257058 0.4645968659996887 0.7245337159253945
0.4645968659996887
Action:  25
0.23667347696879643 0.5498521088752972 0.7865255858440937
0.5498521088752972
Action:  44
0.12527860326894502 0.6863545865923654 0.8116331898613104
0.6863545865923654
Action:  46
0.11197529717682021 0.7073097339087848 0.819285031085605
0.7073097339087848
Action:  42
0.13674777117384845 0.7214540461384422 0.8582018173122907
0.7214540461384422
Action:  27
0.2254596953937593 0.6588062060887849 0.8842659014825442
0.6588062060887849
Action:  82
0.2731937221396731 0.6294246422792652 0.9026183644189383
0.6294246422792652
Action:  61
0.16110234026745915 0.7469560050218761 0.9080583452893353
0.7469560050218761
Action:  99
0.23792719167

 82%|████████▏ | 59/72 [11:31<02:31, 11.67s/it]

Action:  48
0.29315420031700506 0.058418845057307045 0.3515730453743121
0.058418845057307045
Action:  19 (random)
0.31281606158955394 0.10084814773450379 0.41366420932405773
0.10084814773450379
Action:  49
0.309079930560797 0.4044492979720656 0.7135292285328626
0.4044492979720656
Action:  50
0.17642840969129744 0.5727149814415029 0.7491433911328004
0.5727149814415029
Action:  61
0.29806023096082723 0.4685268316494936 0.7665870626103208
0.4685268316494936
Action:  10
0.2905124915087931 0.5024477410734932 0.7929602325822863
0.5024477410734932
Action:  46
0.1166125745339271 0.7080162282902864 0.8246288028242135
0.7080162282902864
Action:  47
0.2919088233074194 0.5515134589062656 0.843422282213685
0.5515134589062656
Action:  51
0.25503811608423277 0.6154841557463144 0.8705222718305472
0.6154841557463144
Action:  24
0.27817193750471736 0.6068867272237636 0.8850586647284809
0.6068867272237636
Action:  27
0.31089138802928523 0.581850798659532 0.8927421866888173
0.581850798659532
Action:  99
0

 83%|████████▎ | 60/72 [11:41<02:14, 11.25s/it]

Action:  0
0.15228550476656075 0.31379754592552833 0.4660830506920891
0.31379754592552833
Action:  42
0.09129797115619653 0.4756192531540061 0.5669172243102026
0.4756192531540061
Action:  99
0.14132066906449697 0.456580146775635 0.597900815840132
0.456580146775635
Action:  50
0.10723888675489751 0.5602375088817328 0.6674763956366303
0.5602375088817328
Action:  49
0.1562663686838705 0.5684563373386797 0.7247227060225502
0.5684563373386797
Action:  46
0.06868736250305549 0.6838105749797568 0.7524979374828124
0.6838105749797568
Action:  84
0.1595488354227049 0.6257660422012753 0.7853148776239802
0.6257660422012753
Action:  39
0.15563781122324266 0.6453521970268261 0.8009900082500687
0.6453521970268261
Action:  34
0.15331564060481195 0.6654120154589885 0.8187276560638005
0.6654120154589885
Action:  53
0.14985857457135873 0.6903192602439314 0.8401778348152902
0.6903192602439314
Action:  35
0.15085379055068618 0.7031658262794541 0.8540196168301403
0.7031658262794541
Action:  10
0.14446345636

 85%|████████▍ | 61/72 [12:03<02:37, 14.30s/it]

Action:  47
0.35082839521102915 0.05731988887779449 0.40814828408882364
0.05731988887779449
Action:  49
0.2797799008344419 0.5157789172973255 0.7955588181317673
0.5157789172973255
Action:  50
0.37235457733704197 0.42815927841630724 0.8005138557533492
0.42815927841630724
Action:  48
0.12051033982343694 0.6908311907326283 0.8113415305560653
0.6908311907326283
Action:  44
0.04438263393397025 0.770262255036483 0.8146448889704533
0.770262255036483
Action:  99
0.037126617486999636 0.7824733091050357 0.8195999265920353
0.7824733091050357
Action:  27
0.049522312250574435 0.7750326519630427 0.8245549642136172
0.7750326519630427
Action:  61
0.07014149232071593 0.7588179497787519 0.8289594420994678
0.7588179497787519
Action:  42
0.3709638408513726 0.46038136010293096 0.8313452009543035
0.46038136010293096
Action:  46
0.36697303180553875 0.46859312712270496 0.8355661589282437
0.46859312712270496
Action:  40
0.13272463417583746 0.7103658411407344 0.8430904753165719
0.7103658411407344
Action:  54
0.

 86%|████████▌ | 62/72 [12:14<02:13, 13.36s/it]

Action:  46
0.09866746518939962 0.24398184568349474 0.34264931087289435
0.24398184568349474
Action:  50
0.11064530618356042 0.41085801183583726 0.5215033180193976
0.41085801183583726
Action:  71 (random)
0.14530618356041325 0.4941144392063045 0.6394206227667177
0.4941144392063045
Action:  39 (random)
0.16027848480311424 0.5539251905414493 0.7142036753445635
0.5539251905414493
Action:  47
0.15588660977192195 0.5774594851744793 0.7333460949464012
0.5774594851744793
Action:  27
0.13522483405699456 0.6380013017265684 0.773226135783563
0.6380013017265684
Action:  63
0.11291610520537007 0.6792610259840123 0.7921771311893824
0.6792610259840123
Action:  49
0.1599540849428557 0.6519397384364195 0.8118938233792752
0.6519397384364195
Action:  99
0.1357987722712981 0.6961307836245672 0.8319295558958653
0.6961307836245672
Action:  54
0.1296850825971952 0.7211316606391498 0.850816743236345
0.7211316606391498
Action:  85
0.16908718870090333 0.705402346776381 0.8744895354772844
0.705402346776381
Actio

 88%|████████▊ | 63/72 [12:37<02:26, 16.24s/it]

Action:  61
0.07339867791201277 0.26899212242184606 0.3423908003338588
0.26899212242184606
Action:  50
0.264037687105843 0.36908853008788783 0.6331262171937309
0.36908853008788783
Action:  10
0.08415014056682622 0.6061957742991665 0.6903459148659927
0.6061957742991665
Action:  47
0.23649418737178027 0.505785326678113 0.7422795140498933
0.505785326678113
Action:  42
0.2541600182356964 0.508058288358016 0.7622183065937124
0.508058288358016
Action:  84
0.23805181977053416 0.5505969792649847 0.7886487990355189
0.5505969792649847
Action:  46
0.26164425195653823 0.6054613772746591 0.8671056292311973
0.6054613772746591
Action:  53
0.15496542815895448 0.7222487051991091 0.8772141333580636
0.7222487051991091
Action:  51
0.2556416685662184 0.6408620873458655 0.8965037559120839
0.6408620873458655
Action:  35
0.15295190335080922 0.7561642980773647 0.909116201428174
0.7561642980773647
Action:  34
0.23846972114580958 0.6734286373815019 0.9118983585273115
0.6734286373815019
Action:  54 (random)
0.254

 89%|████████▉ | 64/72 [12:46<01:52, 14.10s/it]

Action:  46
0.1971183153228041 0.1473938797991471 0.3445121951219512
0.1473938797991471
Action:  50
0.19781102798559158 0.3519857199818881 0.5497967479674797
0.3519857199818881
Action:  44
0.06433915211970075 0.535728598557806 0.6000677506775067
0.535728598557806
Action:  99
0.0916320310335273 0.5795061803485866 0.6711382113821138
0.5795061803485866
Action:  61
0.08362427265170408 0.6219990335813583 0.7056233062330624
0.6219990335813583
Action:  47
0.1918814075921308 0.5520210314322594 0.7439024390243902
0.5520210314322594
Action:  95
0.10967026877251317 0.6695980239104137 0.7792682926829269
0.6695980239104137
Action:  81 (random)
0.19745081740094209 0.6175220823280553 0.8149728997289973
0.6175220823280553
Action:  48
0.08376281518426157 0.7478767512114024 0.8316395663956639
0.7478767512114024
Action:  40
0.10440565253532835 0.7537244287654847 0.858130081300813
0.7537244287654847
Action:  78
0.11742865059573289 0.7562840865316384 0.8737127371273713
0.7562840865316384
Action:  28 (rando

 90%|█████████ | 65/72 [12:56<01:29, 12.81s/it]

Action:  10
0.2118491381898985 0.21654272872322536 0.42839186691312386
0.21654272872322536
Action:  53
0.09419854663383581 0.37596781196135826 0.47016635859519407
0.37596781196135826
Action:  46
0.0561527836165996 0.44654592248321556 0.5026987060998152
0.44654592248321556
Action:  50
0.05840490060657018 0.47061543211062024 0.5290203327171904
0.47061543211062024
Action:  61
0.12155426100534503 0.4448782713421595 0.5664325323475046
0.4448782713421595
Action:  51
0.07425980421596301 0.5215997151925398 0.5958595194085028
0.5215997151925398
Action:  54
0.08260765119212059 0.5525124966821863 0.6351201478743068
0.5525124966821863
Action:  99
0.19365203291093627 0.5094163589559769 0.7030683918669132
0.5094163589559769
Action:  44
0.09660080475647108 0.6294620418239356 0.7260628465804067
0.6294620418239356
Action:  48
0.08516005044742057 0.6454129624915813 0.7305730129390019
0.6454129624915813
Action:  78
0.18353252056933517 0.5645635977301102 0.7480961182994454
0.5645635977301102
Action:  42
0

In [54]:
def compute_metrics(env, iter_cnt=10, max_iter=30):
    rewards, final_rewards, novp = [], [], []
    for _ in range(iter_cnt):
        state, action, mask = env.reset()
        episode_reward = 0.0
        for t in range(max_iter):
#             action = model.act(state, mask, epsilon=0.0)
            

            s   = autograd.Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0), volatile=True).cuda()
            m = autograd.Variable(torch.FloatTensor(np.float32(mask)), volatile=True).cuda()
            q_value = model.forward(s)
            q_value *= m
            a = torch.nn.functional.softmax(q_value)
            
            action = a.argmax().item()
            print(action)

            state, reward, done, info, mask = env.step(action)
            # print("REWARD: ", reward)
#             env.render(action, state)
            episode_reward += reward

            if done:
                break

        final_reward = 0
        # final_reward = env.final_reward()
        # episode_reward += 1.0 / final_reward
        rewards.append(episode_reward)
        final_rewards.append(final_reward)
        novp.append(t + 1)
    return np.mean(rewards), np.mean(final_rewards), np.mean(novp)

In [55]:
agent_func = lambda s : model.act(s, epsilon=0.0)
result = compute_metrics(env)

  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  


75
0.45846322352346447 0.007501328236103999 0.46596455175956847
0.007501328236103999
36
0.38924930491195553 0.41681285794445344 0.806062162856409
0.41681285794445344
60
0.4547982138343584 0.36333690047337336 0.8181351143077318
0.36333690047337336
33
0.4140197152245345 0.46807635464446107 0.8820960698689956
0.46807635464446107
83
0.4585896031679164 0.42350646670107917 0.8820960698689956
0.42350646670107917
59
0.4459937652708737 0.44445062209105796 0.8904443873619317
0.44445062209105796
85
0.4557249978936726 0.4388293303878583 0.8945543282815309
0.4388293303878583
98
0.45475608728620776 0.440825726225223 0.8955818135114307
0.440825726225223
13
0.476535512680091 0.4916124451930146 0.9681479578731056
0.4916124451930146
75
0.4717649647887324 0.08067738581875583 0.5524423506074883
0.08067738581875583
36
0.4639964788732394 0.19543818514858052 0.6594346640218199
0.19543818514858052
60
0.4647887323943662 0.24739822520543547 0.7121869575998017
0.24739822520543547
33
0.46307218309859155 0.2692610

0.4856278366111952 0.45664858615303244 0.9422764227642276
0.45664858615303244
88
0.4876449823499748 0.4554444485443342 0.943089430894309
0.4554444485443342
79
0.4868885526979324 0.45620087819637656 0.943089430894309
0.45620087819637656
95
0.402672718103883 0.5412297209205073 0.9439024390243902
0.5412297209205073
9
0.47402924861321233 0.46987319041117787 0.9439024390243902
0.46987319041117787
75
0.4959910430511413 0.3945139414234461 0.8905049844745874
0.3945139414234461
36
0.4826278532216122 0.4654028707528934 0.9480307239745056
0.4654028707528934
60
0.48949003178272177 0.47455638102982933 0.9640464128125511
0.47455638102982933
75
0.4495060373216246 0.2607704793931918 0.7102765167148164
0.2607704793931918
36
0.4339553604098061 0.2895279247738506 0.7234832851836567
0.2895279247738506
60
0.4606659348701061 0.27602411878239086 0.736690053652497
0.27602411878239086
33
0.42993047932674716 0.3174900737066824 0.7474205530334296
0.3174900737066824
83
0.4610318331503842 0.3074370071302597 0.7684

In [56]:
result

(-6.88126904400267, 0.0, 12.2)

In [151]:
print((6 * 256 * 256 * 8) / 1e9 * 3000, 'G')
print((10 * 512 * 512 * 8) / 1e9 * 5000, 'G')
print(1e6 * 10 * 512 * 512 * 4 / 2 ** 40, 'T')
print(1e5 * 64 * 64 * 64 * 4 * 4 / 2 ** 40, 'T')

9.437184 G
104.8576 G
9.5367431640625 T
0.3814697265625 T


In [None]:
TODO Today:
    1. + check DQN with A, S as input -> send Sergey
    2. + Smaller depth_maps (6 * 256 * 256)
    3. + Illustrate reward as area + novp
    4. + Distributed Buffer
    5. + Buffer on hard disk (memmap)
    6. Floats to int
    7. GPU raycasting
    8. * Greedy algo
    9. * Voxels
    10. CNN different shapes input
    11. Preprocessed depth_maps
    
    10. Overfit experiments 
    11. ABC experiment
    
    12. Experiment radius = 1.0
    13. Experiments with smaller reward fine
    14. Experiment with different DQN-s
    15. ABC - write random model reading
    

Ideas:
    1. Add fine for same view_point
    
Big Experiments:
    1. Voxels
    2. PointNet
    3. Context (N first view_points)
    4. Meta-learning, подавать от простой к сложной детальке

In [15]:
difficult = [
    "00020107_b27a1602d1d44a3d89140ce4_007.obj",
    "00010095_5ae1ee45b583467fa009adc4_006.obj",
    "00010163_ccef4063b69f428e91b498c9_008.obj",
    "00010145_77759770d8cd48af80775d86_002.obj",
    "00010153_556de37e0a7447fcbfbdfd22_000.obj",
    "00010162_ccef4063b69f428e91b498c9_007.obj",
    "00020074_37170a1ba80747f1a1478985_000.obj",
    "00020077_bf146f0c5dee4199be920a21_000.obj",
    "00020080_7a689565e1e0481ca3ad4a6f_000.obj",
    "00010164_ccef4063b69f428e91b498c9_009.obj",
    "00010179_f91d806ac1e34ea1b14e23be_000.obj",
    "00020095_842a932142a9431784488344_000.obj",
    "00020097_e24ecc9c647f4bd1832bfb1d_000.obj",
    "00020106_b27a1602d1d44a3d89140ce4_006.obj",
    "00020141_b27a1602d1d44a3d89140ce4_041.obj",
    "00020155_b27a1602d1d44a3d89140ce4_055.obj",
    "00020186_b27a1602d1d44a3d89140ce4_086.obj",
    "00020202_b27a1602d1d44a3d89140ce4_102.obj",
    "00020203_b27a1602d1d44a3d89140ce4_103.obj",
    "00020213_1f65839d7f6c42bf8c2b3391_000.obj"]