In [None]:
#!/usr/bin/env/ python
"""
sarsa_learner.py
An easy-to-follow script to train, test and evaluate a SARSA agent on the Mountain Car
problem using the OpenAI Gym. | Ronak Mehta
"""
import gym
import numpy as np

# Assign the hyperparameters
MAX_NUM_EPISODES = 6000 # Please increase the MAX_NUM_EPISODES until it reaches best-reward value of -119.0
STEPS_PER_EPISODE = 200 #  This is specific to MountainCar. May change with env
EPSILON_MIN = 0.005
max_num_steps = MAX_NUM_EPISODES * STEPS_PER_EPISODE
EPSILON_DECAY = 500 * EPSILON_MIN / max_num_steps
ALPHA = 0.05  # Learning rate
GAMMA = 0.98  # Discount factor
NUM_DISCRETE_BINS = 30  # Number of bins to Discretize each observation dim


class SARSA(object):
    def __init__(self, env):
        self.obs_shape = env.observation_space.shape
        self.obs_high = env.observation_space.high
        self.obs_low = env.observation_space.low
        self.obs_bins = NUM_DISCRETE_BINS  # Number of bins to Discretize each observation dim
        self.bin_width = (self.obs_high - self.obs_low) / self.obs_bins
        self.action_shape = env.action_space.n
        # Create a multi-dimensional array (aka. Table) to represent the
        # Q-values
        self.Q = np.zeros((self.obs_bins + 1, self.obs_bins + 1,
                           self.action_shape))  # (51 x 51 x 3)
        self.alpha = ALPHA  # Learning rate
        self.gamma = GAMMA  # Discount factor
        self.epsilon = 1.0

    def discretize(self, obs):
        return tuple(((obs - self.obs_low) / self.bin_width).astype(int))

    def get_action(self, obs):
        # Epsilon-Greedy action selection
        if self.epsilon > EPSILON_MIN:
            self.epsilon -= EPSILON_DECAY
        if np.random.random() > self.epsilon:
            discretized_obs = self.discretize(obs)
            return np.argmax(self.Q[discretized_obs])
        else:  # Choose a random action
            return np.random.choice([a for a in range(self.action_shape)])

    def learn(self, obs, action, reward, next_obs):
        discretized_obs = self.discretize(obs)
        discretized_next_obs = self.discretize(next_obs)
        # Action for the next state using epsilon greedy
        next_action = agent.get_action(next_obs)
        td_target = reward + self.gamma * self.Q[discretized_next_obs][next_action]
        td_error = td_target - self.Q[discretized_obs][action]
        self.Q[discretized_obs][action] += self.alpha * td_error
        return next_action

def train(agent, env):
    best_reward = -float('inf')
    for episode in range(MAX_NUM_EPISODES):
        done = False
        obs = env.reset()
        total_reward = 0.0
        action = agent.get_action(obs)
        while not done:
            next_obs, reward, done, info = env.step(action)
            # Get the next_action and assign it to action
            next_action = agent.learn(obs, action, reward, next_obs)
            action = next_action
            obs = next_obs
            total_reward += reward
        if total_reward > best_reward:
            best_reward = total_reward
        print("Episode#:{} reward:{} best_reward:{} eps:{}".format(episode,
                                     total_reward, best_reward, agent.epsilon))
    # Return the trained policy
    return np.argmax(agent.Q, axis=2)


def test(agent, env, policy):
    done = False
    obs = env.reset()
    total_reward = 0.0
    while not done:
        action = policy[agent.discretize(obs)]
        next_obs, reward, done, info = env.step(action)
        obs = next_obs
        total_reward += reward
    return total_reward


if __name__ == "__main__":
    env = gym.make('MountainCar-v0')
    agent = SARSA(env)
    learned_policy = train(agent, env)
    # Use the Gym Monitor wrapper to evalaute the agent and record video
    gym_monitor_path = "./gym_monitor_output"
    env = gym.wrappers.Monitor(env, gym_monitor_path, force=True)
    for _ in range(1000):
        test(agent, env, learned_policy)
    env.close()

Episode#:0 reward:-200.0 best_reward:-200.0 eps:0.9995812500000084
Episode#:1 reward:-200.0 best_reward:-200.0 eps:0.9991625000000168
Episode#:2 reward:-200.0 best_reward:-200.0 eps:0.9987437500000252
Episode#:3 reward:-200.0 best_reward:-200.0 eps:0.9983250000000337
Episode#:4 reward:-200.0 best_reward:-200.0 eps:0.9979062500000421
Episode#:5 reward:-200.0 best_reward:-200.0 eps:0.9974875000000505
Episode#:6 reward:-200.0 best_reward:-200.0 eps:0.9970687500000589
Episode#:7 reward:-200.0 best_reward:-200.0 eps:0.9966500000000673
Episode#:8 reward:-200.0 best_reward:-200.0 eps:0.9962312500000757
Episode#:9 reward:-200.0 best_reward:-200.0 eps:0.9958125000000841
Episode#:10 reward:-200.0 best_reward:-200.0 eps:0.9953937500000926
Episode#:11 reward:-200.0 best_reward:-200.0 eps:0.994975000000101
Episode#:12 reward:-200.0 best_reward:-200.0 eps:0.9945562500001094
Episode#:13 reward:-200.0 best_reward:-200.0 eps:0.9941375000001178
Episode#:14 reward:-200.0 best_reward:-200.0 eps:0.99371875

Episode#:122 reward:-200.0 best_reward:-200.0 eps:0.948493750001035
Episode#:123 reward:-200.0 best_reward:-200.0 eps:0.9480750000010434
Episode#:124 reward:-200.0 best_reward:-200.0 eps:0.9476562500010518
Episode#:125 reward:-200.0 best_reward:-200.0 eps:0.9472375000010602
Episode#:126 reward:-200.0 best_reward:-200.0 eps:0.9468187500010686
Episode#:127 reward:-200.0 best_reward:-200.0 eps:0.946400000001077
Episode#:128 reward:-200.0 best_reward:-200.0 eps:0.9459812500010855
Episode#:129 reward:-200.0 best_reward:-200.0 eps:0.9455625000010939
Episode#:130 reward:-200.0 best_reward:-200.0 eps:0.9451437500011023
Episode#:131 reward:-200.0 best_reward:-200.0 eps:0.9447250000011107
Episode#:132 reward:-200.0 best_reward:-200.0 eps:0.9443062500011191
Episode#:133 reward:-200.0 best_reward:-200.0 eps:0.9438875000011275
Episode#:134 reward:-200.0 best_reward:-200.0 eps:0.943468750001136
Episode#:135 reward:-200.0 best_reward:-200.0 eps:0.9430500000011444
Episode#:136 reward:-200.0 best_rewar

Episode#:245 reward:-200.0 best_reward:-200.0 eps:0.89698750000207
Episode#:246 reward:-200.0 best_reward:-200.0 eps:0.8965687500020784
Episode#:247 reward:-200.0 best_reward:-200.0 eps:0.8961500000020868
Episode#:248 reward:-200.0 best_reward:-200.0 eps:0.8957312500020952
Episode#:249 reward:-200.0 best_reward:-200.0 eps:0.8953125000021036
Episode#:250 reward:-200.0 best_reward:-200.0 eps:0.894893750002112
Episode#:251 reward:-200.0 best_reward:-200.0 eps:0.8944750000021204
Episode#:252 reward:-200.0 best_reward:-200.0 eps:0.8940562500021288
Episode#:253 reward:-200.0 best_reward:-200.0 eps:0.8936375000021373
Episode#:254 reward:-200.0 best_reward:-200.0 eps:0.8932187500021457
Episode#:255 reward:-200.0 best_reward:-200.0 eps:0.8928000000021541
Episode#:256 reward:-200.0 best_reward:-200.0 eps:0.8923812500021625
Episode#:257 reward:-200.0 best_reward:-200.0 eps:0.8919625000021709
Episode#:258 reward:-200.0 best_reward:-200.0 eps:0.8915437500021793
Episode#:259 reward:-200.0 best_rewar

Episode#:365 reward:-200.0 best_reward:-200.0 eps:0.8467375000030797
Episode#:366 reward:-200.0 best_reward:-200.0 eps:0.8463187500030881
Episode#:367 reward:-200.0 best_reward:-200.0 eps:0.8459000000030965
Episode#:368 reward:-200.0 best_reward:-200.0 eps:0.8454812500031049
Episode#:369 reward:-200.0 best_reward:-200.0 eps:0.8450625000031133
Episode#:370 reward:-200.0 best_reward:-200.0 eps:0.8446437500031218
Episode#:371 reward:-200.0 best_reward:-200.0 eps:0.8442250000031302
Episode#:372 reward:-200.0 best_reward:-200.0 eps:0.8438062500031386
Episode#:373 reward:-200.0 best_reward:-200.0 eps:0.843387500003147
Episode#:374 reward:-200.0 best_reward:-200.0 eps:0.8429687500031554
Episode#:375 reward:-200.0 best_reward:-200.0 eps:0.8425500000031638
Episode#:376 reward:-200.0 best_reward:-200.0 eps:0.8421312500031722
Episode#:377 reward:-200.0 best_reward:-200.0 eps:0.8417125000031807
Episode#:378 reward:-200.0 best_reward:-200.0 eps:0.8412937500031891
Episode#:379 reward:-200.0 best_rew

Episode#:485 reward:-200.0 best_reward:-200.0 eps:0.7964875000040894
Episode#:486 reward:-200.0 best_reward:-200.0 eps:0.7960687500040978
Episode#:487 reward:-200.0 best_reward:-200.0 eps:0.7956500000041062
Episode#:488 reward:-200.0 best_reward:-200.0 eps:0.7952312500041147
Episode#:489 reward:-200.0 best_reward:-200.0 eps:0.7948125000041231
Episode#:490 reward:-200.0 best_reward:-200.0 eps:0.7943937500041315
Episode#:491 reward:-200.0 best_reward:-200.0 eps:0.7939750000041399
Episode#:492 reward:-200.0 best_reward:-200.0 eps:0.7935562500041483
Episode#:493 reward:-200.0 best_reward:-200.0 eps:0.7931375000041567
Episode#:494 reward:-200.0 best_reward:-200.0 eps:0.7927187500041651
Episode#:495 reward:-200.0 best_reward:-200.0 eps:0.7923000000041736
Episode#:496 reward:-200.0 best_reward:-200.0 eps:0.791881250004182
Episode#:497 reward:-200.0 best_reward:-200.0 eps:0.7914625000041904
Episode#:498 reward:-200.0 best_reward:-200.0 eps:0.7910437500041988
Episode#:499 reward:-200.0 best_rew

Episode#:606 reward:-200.0 best_reward:-200.0 eps:0.7458187500051076
Episode#:607 reward:-200.0 best_reward:-200.0 eps:0.745400000005116
Episode#:608 reward:-200.0 best_reward:-200.0 eps:0.7449812500051244
Episode#:609 reward:-200.0 best_reward:-200.0 eps:0.7445625000051328
Episode#:610 reward:-200.0 best_reward:-200.0 eps:0.7441437500051412
Episode#:611 reward:-200.0 best_reward:-200.0 eps:0.7437250000051496
Episode#:612 reward:-200.0 best_reward:-200.0 eps:0.743306250005158
Episode#:613 reward:-200.0 best_reward:-200.0 eps:0.7428875000051665
Episode#:614 reward:-200.0 best_reward:-200.0 eps:0.7424687500051749
Episode#:615 reward:-200.0 best_reward:-200.0 eps:0.7420500000051833
Episode#:616 reward:-200.0 best_reward:-200.0 eps:0.7416312500051917
Episode#:617 reward:-200.0 best_reward:-200.0 eps:0.7412125000052001
Episode#:618 reward:-200.0 best_reward:-200.0 eps:0.7407937500052085
Episode#:619 reward:-200.0 best_reward:-200.0 eps:0.7403750000052169
Episode#:620 reward:-200.0 best_rewa

Episode#:726 reward:-200.0 best_reward:-200.0 eps:0.6955687500061173
Episode#:727 reward:-200.0 best_reward:-200.0 eps:0.6951500000061257
Episode#:728 reward:-200.0 best_reward:-200.0 eps:0.6947312500061341
Episode#:729 reward:-200.0 best_reward:-200.0 eps:0.6943125000061425
Episode#:730 reward:-200.0 best_reward:-200.0 eps:0.6938937500061509
Episode#:731 reward:-200.0 best_reward:-200.0 eps:0.6934750000061594
Episode#:732 reward:-200.0 best_reward:-200.0 eps:0.6930562500061678
Episode#:733 reward:-200.0 best_reward:-200.0 eps:0.6926375000061762
Episode#:734 reward:-200.0 best_reward:-200.0 eps:0.6922187500061846
Episode#:735 reward:-200.0 best_reward:-200.0 eps:0.691800000006193
Episode#:736 reward:-200.0 best_reward:-200.0 eps:0.6913812500062014
Episode#:737 reward:-200.0 best_reward:-200.0 eps:0.6909625000062098
Episode#:738 reward:-200.0 best_reward:-200.0 eps:0.6905437500062183
Episode#:739 reward:-200.0 best_reward:-200.0 eps:0.6901250000062267
Episode#:740 reward:-200.0 best_rew

Episode#:845 reward:-200.0 best_reward:-200.0 eps:0.6457375000071186
Episode#:846 reward:-200.0 best_reward:-200.0 eps:0.645318750007127
Episode#:847 reward:-200.0 best_reward:-200.0 eps:0.6449000000071354
Episode#:848 reward:-200.0 best_reward:-200.0 eps:0.6444812500071438
Episode#:849 reward:-200.0 best_reward:-200.0 eps:0.6440625000071523
Episode#:850 reward:-200.0 best_reward:-200.0 eps:0.6436437500071607
Episode#:851 reward:-200.0 best_reward:-200.0 eps:0.6432250000071691
Episode#:852 reward:-200.0 best_reward:-200.0 eps:0.6428062500071775
Episode#:853 reward:-200.0 best_reward:-200.0 eps:0.6423875000071859
Episode#:854 reward:-200.0 best_reward:-200.0 eps:0.6419687500071943
Episode#:855 reward:-200.0 best_reward:-200.0 eps:0.6415500000072027
Episode#:856 reward:-200.0 best_reward:-200.0 eps:0.6411312500072112
Episode#:857 reward:-200.0 best_reward:-200.0 eps:0.6407125000072196
Episode#:858 reward:-200.0 best_reward:-200.0 eps:0.640293750007228
Episode#:859 reward:-200.0 best_rewa

Episode#:965 reward:-200.0 best_reward:-200.0 eps:0.5954875000081283
Episode#:966 reward:-200.0 best_reward:-200.0 eps:0.5950687500081367
Episode#:967 reward:-200.0 best_reward:-200.0 eps:0.5946500000081452
Episode#:968 reward:-200.0 best_reward:-200.0 eps:0.5942312500081536
Episode#:969 reward:-200.0 best_reward:-200.0 eps:0.593812500008162
Episode#:970 reward:-200.0 best_reward:-200.0 eps:0.5933937500081704
Episode#:971 reward:-200.0 best_reward:-200.0 eps:0.5929750000081788
Episode#:972 reward:-200.0 best_reward:-200.0 eps:0.5925562500081872
Episode#:973 reward:-200.0 best_reward:-200.0 eps:0.5921375000081956
Episode#:974 reward:-200.0 best_reward:-200.0 eps:0.5917187500082041
Episode#:975 reward:-200.0 best_reward:-200.0 eps:0.5913000000082125
Episode#:976 reward:-200.0 best_reward:-200.0 eps:0.5908812500082209
Episode#:977 reward:-200.0 best_reward:-200.0 eps:0.5904625000082293
Episode#:978 reward:-200.0 best_reward:-200.0 eps:0.5900437500082377
Episode#:979 reward:-200.0 best_rew

Episode#:1086 reward:-200.0 best_reward:-200.0 eps:0.5448187500091465
Episode#:1087 reward:-200.0 best_reward:-200.0 eps:0.5444000000091549
Episode#:1088 reward:-200.0 best_reward:-200.0 eps:0.5439812500091633
Episode#:1089 reward:-200.0 best_reward:-200.0 eps:0.5435625000091717
Episode#:1090 reward:-200.0 best_reward:-200.0 eps:0.5431437500091801
Episode#:1091 reward:-200.0 best_reward:-200.0 eps:0.5427250000091886
Episode#:1092 reward:-200.0 best_reward:-200.0 eps:0.542306250009197
Episode#:1093 reward:-200.0 best_reward:-200.0 eps:0.5418875000092054
Episode#:1094 reward:-200.0 best_reward:-200.0 eps:0.5414687500092138
Episode#:1095 reward:-200.0 best_reward:-200.0 eps:0.5410500000092222
Episode#:1096 reward:-200.0 best_reward:-200.0 eps:0.5406312500092306
Episode#:1097 reward:-200.0 best_reward:-200.0 eps:0.540212500009239
Episode#:1098 reward:-200.0 best_reward:-200.0 eps:0.5397937500092475
Episode#:1099 reward:-200.0 best_reward:-200.0 eps:0.5393750000092559
Episode#:1100 reward:-

Episode#:1209 reward:-200.0 best_reward:-200.0 eps:0.49331250001000326
Episode#:1210 reward:-200.0 best_reward:-200.0 eps:0.4928937500100005
Episode#:1211 reward:-200.0 best_reward:-200.0 eps:0.4924750000099978
Episode#:1212 reward:-200.0 best_reward:-200.0 eps:0.49205625000999503
Episode#:1213 reward:-200.0 best_reward:-200.0 eps:0.4916375000099923
Episode#:1214 reward:-200.0 best_reward:-200.0 eps:0.49121875000998955
Episode#:1215 reward:-200.0 best_reward:-200.0 eps:0.4908000000099868
Episode#:1216 reward:-200.0 best_reward:-200.0 eps:0.49038125000998406
Episode#:1217 reward:-200.0 best_reward:-200.0 eps:0.4899625000099813
Episode#:1218 reward:-200.0 best_reward:-200.0 eps:0.4895437500099786
Episode#:1219 reward:-200.0 best_reward:-200.0 eps:0.48912500000997583
Episode#:1220 reward:-200.0 best_reward:-200.0 eps:0.4887062500099731
Episode#:1221 reward:-200.0 best_reward:-200.0 eps:0.48828750000997034
Episode#:1222 reward:-200.0 best_reward:-200.0 eps:0.4878687500099676
Episode#:1223 

Episode#:1330 reward:-200.0 best_reward:-200.0 eps:0.4426437500096713
Episode#:1331 reward:-200.0 best_reward:-200.0 eps:0.4422250000096686
Episode#:1332 reward:-200.0 best_reward:-200.0 eps:0.44180625000966584
Episode#:1333 reward:-200.0 best_reward:-200.0 eps:0.4413875000096631
Episode#:1334 reward:-200.0 best_reward:-200.0 eps:0.44096875000966035
Episode#:1335 reward:-200.0 best_reward:-200.0 eps:0.4405500000096576
Episode#:1336 reward:-200.0 best_reward:-200.0 eps:0.44013125000965486
Episode#:1337 reward:-200.0 best_reward:-200.0 eps:0.4397125000096521
Episode#:1338 reward:-200.0 best_reward:-200.0 eps:0.4392937500096494
Episode#:1339 reward:-200.0 best_reward:-200.0 eps:0.43887500000964663
Episode#:1340 reward:-200.0 best_reward:-200.0 eps:0.4384562500096439
Episode#:1341 reward:-200.0 best_reward:-200.0 eps:0.43803750000964115
Episode#:1342 reward:-200.0 best_reward:-200.0 eps:0.4376187500096384
Episode#:1343 reward:-200.0 best_reward:-200.0 eps:0.43720000000963566
Episode#:1344 

Episode#:1451 reward:-200.0 best_reward:-200.0 eps:0.3919750000093394
Episode#:1452 reward:-200.0 best_reward:-200.0 eps:0.39155625000933664
Episode#:1453 reward:-200.0 best_reward:-200.0 eps:0.3911375000093339
Episode#:1454 reward:-200.0 best_reward:-200.0 eps:0.39071875000933115
Episode#:1455 reward:-200.0 best_reward:-200.0 eps:0.3903000000093284
Episode#:1456 reward:-200.0 best_reward:-200.0 eps:0.38988125000932566
Episode#:1457 reward:-200.0 best_reward:-200.0 eps:0.3894625000093229
Episode#:1458 reward:-200.0 best_reward:-200.0 eps:0.3890437500093202
Episode#:1459 reward:-200.0 best_reward:-200.0 eps:0.38862500000931743
Episode#:1460 reward:-200.0 best_reward:-200.0 eps:0.3882062500093147
Episode#:1461 reward:-200.0 best_reward:-200.0 eps:0.38778750000931195
Episode#:1462 reward:-200.0 best_reward:-200.0 eps:0.3873687500093092
Episode#:1463 reward:-200.0 best_reward:-200.0 eps:0.38695000000930646
Episode#:1464 reward:-200.0 best_reward:-200.0 eps:0.3865312500093037
Episode#:1465 

Episode#:1572 reward:-200.0 best_reward:-200.0 eps:0.34130625000900744
Episode#:1573 reward:-200.0 best_reward:-200.0 eps:0.3408875000090047
Episode#:1574 reward:-200.0 best_reward:-200.0 eps:0.34046875000900195
Episode#:1575 reward:-200.0 best_reward:-200.0 eps:0.3400500000089992
Episode#:1576 reward:-200.0 best_reward:-200.0 eps:0.33963125000899647
Episode#:1577 reward:-200.0 best_reward:-200.0 eps:0.3392125000089937
Episode#:1578 reward:-200.0 best_reward:-200.0 eps:0.338793750008991
Episode#:1579 reward:-200.0 best_reward:-200.0 eps:0.33837500000898824
Episode#:1580 reward:-200.0 best_reward:-200.0 eps:0.3379562500089855
Episode#:1581 reward:-200.0 best_reward:-200.0 eps:0.33753750000898275
Episode#:1582 reward:-200.0 best_reward:-200.0 eps:0.33711875000898
Episode#:1583 reward:-200.0 best_reward:-200.0 eps:0.33670000000897726
Episode#:1584 reward:-200.0 best_reward:-200.0 eps:0.3362812500089745
Episode#:1585 reward:-200.0 best_reward:-200.0 eps:0.3358625000089718
Episode#:1586 rew

Episode#:1697 reward:-200.0 best_reward:-200.0 eps:0.2889625000086645
Episode#:1698 reward:-200.0 best_reward:-200.0 eps:0.2885437500086618
Episode#:1699 reward:-200.0 best_reward:-200.0 eps:0.28812500000865904
Episode#:1700 reward:-200.0 best_reward:-200.0 eps:0.2877062500086563
Episode#:1701 reward:-200.0 best_reward:-200.0 eps:0.28728750000865355
Episode#:1702 reward:-200.0 best_reward:-200.0 eps:0.2868687500086508
Episode#:1703 reward:-200.0 best_reward:-200.0 eps:0.28645000000864806
Episode#:1704 reward:-200.0 best_reward:-200.0 eps:0.2860312500086453
Episode#:1705 reward:-200.0 best_reward:-200.0 eps:0.2856125000086426
Episode#:1706 reward:-200.0 best_reward:-200.0 eps:0.28519375000863983
Episode#:1707 reward:-200.0 best_reward:-200.0 eps:0.2847750000086371
Episode#:1708 reward:-200.0 best_reward:-200.0 eps:0.28435625000863435
Episode#:1709 reward:-200.0 best_reward:-200.0 eps:0.2839375000086316
Episode#:1710 reward:-200.0 best_reward:-200.0 eps:0.28351875000862886
Episode#:1711 

Episode#:1819 reward:-200.0 best_reward:-199.0 eps:0.2378770833416632
Episode#:1820 reward:-200.0 best_reward:-199.0 eps:0.23745833334166044
Episode#:1821 reward:-200.0 best_reward:-199.0 eps:0.2370395833416577
Episode#:1822 reward:-200.0 best_reward:-199.0 eps:0.23662083334165496
Episode#:1823 reward:-200.0 best_reward:-199.0 eps:0.2362020833416522
Episode#:1824 reward:-200.0 best_reward:-199.0 eps:0.23578333334164947
Episode#:1825 reward:-200.0 best_reward:-199.0 eps:0.23536458334164673
Episode#:1826 reward:-200.0 best_reward:-199.0 eps:0.23494583334164398
Episode#:1827 reward:-200.0 best_reward:-199.0 eps:0.23452708334164124
Episode#:1828 reward:-200.0 best_reward:-199.0 eps:0.2341083333416385
Episode#:1829 reward:-200.0 best_reward:-199.0 eps:0.23368958334163575
Episode#:1830 reward:-200.0 best_reward:-199.0 eps:0.233270833341633
Episode#:1831 reward:-200.0 best_reward:-199.0 eps:0.23285208334163027
Episode#:1832 reward:-200.0 best_reward:-199.0 eps:0.23243333334162752
Episode#:183

Episode#:1941 reward:-200.0 best_reward:-199.0 eps:0.1867895833413285
Episode#:1942 reward:-200.0 best_reward:-199.0 eps:0.18637083334132576
Episode#:1943 reward:-200.0 best_reward:-199.0 eps:0.18595208334132302
Episode#:1944 reward:-200.0 best_reward:-199.0 eps:0.18553333334132027
Episode#:1945 reward:-200.0 best_reward:-199.0 eps:0.18511458334131753
Episode#:1946 reward:-200.0 best_reward:-199.0 eps:0.18469583334131479
Episode#:1947 reward:-200.0 best_reward:-199.0 eps:0.18427708334131204
Episode#:1948 reward:-200.0 best_reward:-199.0 eps:0.1838583333413093
Episode#:1949 reward:-200.0 best_reward:-199.0 eps:0.18343958334130656
Episode#:1950 reward:-200.0 best_reward:-199.0 eps:0.1830208333413038
Episode#:1951 reward:-200.0 best_reward:-199.0 eps:0.18260208334130107
Episode#:1952 reward:-200.0 best_reward:-199.0 eps:0.18218333334129833
Episode#:1953 reward:-200.0 best_reward:-199.0 eps:0.18176458334129558
Episode#:1954 reward:-200.0 best_reward:-199.0 eps:0.18134583334129284
Episode#:

Episode#:2066 reward:-200.0 best_reward:-160.0 eps:0.13493750000765548
Episode#:2067 reward:-200.0 best_reward:-160.0 eps:0.13451875000765273
Episode#:2068 reward:-200.0 best_reward:-160.0 eps:0.13410000000765
Episode#:2069 reward:-200.0 best_reward:-160.0 eps:0.13368125000764725
Episode#:2070 reward:-200.0 best_reward:-160.0 eps:0.1332625000076445
Episode#:2071 reward:-200.0 best_reward:-160.0 eps:0.13284375000764176
Episode#:2072 reward:-200.0 best_reward:-160.0 eps:0.13242500000763902
Episode#:2073 reward:-200.0 best_reward:-160.0 eps:0.13200625000763627
Episode#:2074 reward:-200.0 best_reward:-160.0 eps:0.13158750000763353
Episode#:2075 reward:-171.0 best_reward:-160.0 eps:0.13122916667429785
Episode#:2076 reward:-200.0 best_reward:-160.0 eps:0.1308104166742951
Episode#:2077 reward:-200.0 best_reward:-160.0 eps:0.13039166667429236
Episode#:2078 reward:-200.0 best_reward:-160.0 eps:0.12997291667428962
Episode#:2079 reward:-200.0 best_reward:-160.0 eps:0.12955416667428687
Episode#:20

Episode#:2192 reward:-200.0 best_reward:-158.0 eps:0.08232500000759507
Episode#:2193 reward:-200.0 best_reward:-158.0 eps:0.08190625000759512
Episode#:2194 reward:-200.0 best_reward:-158.0 eps:0.08148750000759516
Episode#:2195 reward:-200.0 best_reward:-158.0 eps:0.08106875000759521
Episode#:2196 reward:-200.0 best_reward:-158.0 eps:0.08065000000759526
Episode#:2197 reward:-200.0 best_reward:-158.0 eps:0.0802312500075953
Episode#:2198 reward:-200.0 best_reward:-158.0 eps:0.07981250000759535
Episode#:2199 reward:-166.0 best_reward:-158.0 eps:0.07946458334092872
Episode#:2200 reward:-200.0 best_reward:-158.0 eps:0.07904583334092877
Episode#:2201 reward:-166.0 best_reward:-158.0 eps:0.07869791667426214
Episode#:2202 reward:-200.0 best_reward:-158.0 eps:0.07827916667426218
Episode#:2203 reward:-200.0 best_reward:-158.0 eps:0.07786041667426223
Episode#:2204 reward:-200.0 best_reward:-158.0 eps:0.07744166667426228
Episode#:2205 reward:-200.0 best_reward:-158.0 eps:0.07702291667426232
Episode

Episode#:2320 reward:-200.0 best_reward:-157.0 eps:0.029152083340934262
Episode#:2321 reward:-200.0 best_reward:-157.0 eps:0.02873333334093431
Episode#:2322 reward:-200.0 best_reward:-157.0 eps:0.028314583340934354
Episode#:2323 reward:-200.0 best_reward:-157.0 eps:0.0278958333409344
Episode#:2324 reward:-200.0 best_reward:-157.0 eps:0.027477083340934447
Episode#:2325 reward:-200.0 best_reward:-157.0 eps:0.027058333340934493
Episode#:2326 reward:-200.0 best_reward:-157.0 eps:0.02663958334093454
Episode#:2327 reward:-200.0 best_reward:-157.0 eps:0.026220833340934585
Episode#:2328 reward:-200.0 best_reward:-157.0 eps:0.02580208334093463
Episode#:2329 reward:-200.0 best_reward:-157.0 eps:0.025383333340934677
Episode#:2330 reward:-200.0 best_reward:-157.0 eps:0.024964583340934723
Episode#:2331 reward:-200.0 best_reward:-157.0 eps:0.02454583334093477
Episode#:2332 reward:-200.0 best_reward:-157.0 eps:0.024127083340934816
Episode#:2333 reward:-200.0 best_reward:-157.0 eps:0.02370833334093486

Episode#:2446 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2447 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2448 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2449 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2450 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2451 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2452 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2453 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2454 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2455 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2456 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2457 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2458 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2459 reward:-200.0 best_reward:-157.0 eps:0.00499791667

Episode#:2574 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2575 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2576 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2577 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2578 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2579 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2580 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2581 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2582 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2583 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2584 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2585 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2586 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2587 reward:-200.0 best_reward:-157.0 eps:0.00499791667

Episode#:2703 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2704 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2705 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2706 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2707 reward:-198.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2708 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2709 reward:-199.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2710 reward:-198.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2711 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2712 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2713 reward:-159.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2714 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2715 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2716 reward:-200.0 best_reward:-157.0 eps:0.00499791667

Episode#:2831 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2832 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2833 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2834 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2835 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2836 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2837 reward:-180.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2838 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2839 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2840 reward:-164.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2841 reward:-174.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2842 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2843 reward:-200.0 best_reward:-157.0 eps:0.004997916674270256
Episode#:2844 reward:-200.0 best_reward:-157.0 eps:0.00499791667

Episode#:2946 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2947 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2948 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2949 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2950 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2951 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2952 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2953 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2954 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2955 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2956 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2957 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2958 reward:-200.0 best_reward:-155.0 eps:0.004997916674270256
Episode#:2959 reward:-200.0 best_reward:-155.0 eps:0.00499791667

Episode#:3074 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3075 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3076 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3077 reward:-196.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3078 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3079 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3080 reward:-198.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3081 reward:-198.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3082 reward:-196.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3083 reward:-163.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3084 reward:-198.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3085 reward:-199.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3086 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3087 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:3188 reward:-173.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3189 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3190 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3191 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3192 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3193 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3194 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3195 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3196 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3197 reward:-160.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3198 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3199 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3200 reward:-177.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3201 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:3312 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3313 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3314 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3315 reward:-159.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3316 reward:-199.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3317 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3318 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3319 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3320 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3321 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3322 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3323 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3324 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3325 reward:-158.0 best_reward:-124.0 eps:0.00499791667

Episode#:3438 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3439 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3440 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3441 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3442 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3443 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3444 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3445 reward:-193.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3446 reward:-195.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3447 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3448 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3449 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3450 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3451 reward:-193.0 best_reward:-124.0 eps:0.00499791667

Episode#:3564 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3565 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3566 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3567 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3568 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3569 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3570 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3571 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3572 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3573 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3574 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3575 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3576 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3577 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:3692 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3693 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3694 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3695 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3696 reward:-187.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3697 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3698 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3699 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3700 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3701 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3702 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3703 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3704 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3705 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:3809 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3810 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3811 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3812 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3813 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3814 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3815 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3816 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3817 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3818 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3819 reward:-190.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3820 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3821 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3822 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:3923 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3924 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3925 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3926 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3927 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3928 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3929 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3930 reward:-173.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3931 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3932 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3933 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3934 reward:-156.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3935 reward:-165.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:3936 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:4037 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4038 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4039 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4040 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4041 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4042 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4043 reward:-194.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4044 reward:-197.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4045 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4046 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4047 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4048 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4049 reward:-197.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4050 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:4168 reward:-177.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4169 reward:-184.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4170 reward:-165.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4171 reward:-183.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4172 reward:-184.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4173 reward:-160.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4174 reward:-197.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4175 reward:-155.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4176 reward:-197.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4177 reward:-198.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4178 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4179 reward:-170.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4180 reward:-183.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4181 reward:-193.0 best_reward:-124.0 eps:0.00499791667

Episode#:4291 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4292 reward:-168.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4293 reward:-149.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4294 reward:-165.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4295 reward:-160.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4296 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4297 reward:-170.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4298 reward:-173.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4299 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4300 reward:-177.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4301 reward:-153.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4302 reward:-197.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4303 reward:-200.0 best_reward:-124.0 eps:0.004997916674270256
Episode#:4304 reward:-200.0 best_reward:-124.0 eps:0.00499791667

Episode#:4410 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4411 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4412 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4413 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4414 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4415 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4416 reward:-159.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4417 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4418 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4419 reward:-162.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4420 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4421 reward:-168.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4422 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4423 reward:-200.0 best_reward:-118.0 eps:0.00499791667

Episode#:4538 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4539 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4540 reward:-193.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4541 reward:-195.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4542 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4543 reward:-190.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4544 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4545 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4546 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4547 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4548 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4549 reward:-184.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4550 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4551 reward:-200.0 best_reward:-118.0 eps:0.00499791667

Episode#:4667 reward:-186.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4668 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4669 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4670 reward:-184.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4671 reward:-178.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4672 reward:-162.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4673 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4674 reward:-181.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4675 reward:-166.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4676 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4677 reward:-183.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4678 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4679 reward:-184.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4680 reward:-200.0 best_reward:-118.0 eps:0.00499791667

Episode#:4785 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4786 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4787 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4788 reward:-161.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4789 reward:-157.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4790 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4791 reward:-154.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4792 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4793 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4794 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4795 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4796 reward:-157.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4797 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4798 reward:-200.0 best_reward:-118.0 eps:0.00499791667

Episode#:4904 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4905 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4906 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4907 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4908 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4909 reward:-160.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4910 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4911 reward:-186.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4912 reward:-171.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4913 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4914 reward:-154.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4915 reward:-185.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4916 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:4917 reward:-200.0 best_reward:-118.0 eps:0.00499791667

Episode#:5019 reward:-167.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5020 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5021 reward:-197.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5022 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5023 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5024 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5025 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5026 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5027 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5028 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5029 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5030 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5031 reward:-168.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5032 reward:-200.0 best_reward:-118.0 eps:0.00499791667

Episode#:5136 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5137 reward:-161.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5138 reward:-165.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5139 reward:-197.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5140 reward:-171.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5141 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5142 reward:-162.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5143 reward:-169.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5144 reward:-198.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5145 reward:-162.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5146 reward:-168.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5147 reward:-164.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5148 reward:-200.0 best_reward:-118.0 eps:0.004997916674270256
Episode#:5149 reward:-196.0 best_reward:-118.0 eps:0.00499791667

Episode#:5262 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5263 reward:-150.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5264 reward:-155.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5265 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5266 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5267 reward:-164.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5268 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5269 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5270 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5271 reward:-168.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5272 reward:-158.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5273 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5274 reward:-153.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5275 reward:-200.0 best_reward:-117.0 eps:0.00499791667

Episode#:5380 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5381 reward:-197.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5382 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5383 reward:-189.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5384 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5385 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5386 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5387 reward:-190.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5388 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5389 reward:-169.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5390 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5391 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5392 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5393 reward:-200.0 best_reward:-117.0 eps:0.00499791667

Episode#:5496 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5497 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5498 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5499 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5500 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5501 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5502 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5503 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5504 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5505 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5506 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5507 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5508 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5509 reward:-200.0 best_reward:-117.0 eps:0.00499791667

Episode#:5626 reward:-195.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5627 reward:-167.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5628 reward:-194.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5629 reward:-162.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5630 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5631 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5632 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5633 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5634 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5635 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5636 reward:-170.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5637 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5638 reward:-188.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5639 reward:-200.0 best_reward:-117.0 eps:0.00499791667

Episode#:5742 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5743 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5744 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5745 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5746 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5747 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5748 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5749 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5750 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5751 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5752 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5753 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5754 reward:-178.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5755 reward:-200.0 best_reward:-117.0 eps:0.00499791667

Episode#:5860 reward:-162.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5861 reward:-198.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5862 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5863 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5864 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5865 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5866 reward:-157.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5867 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5868 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5869 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5870 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5871 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5872 reward:-163.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5873 reward:-200.0 best_reward:-117.0 eps:0.00499791667

Episode#:5974 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5975 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5976 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5977 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5978 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5979 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5980 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5981 reward:-198.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5982 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5983 reward:-195.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5984 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5985 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5986 reward:-200.0 best_reward:-117.0 eps:0.004997916674270256
Episode#:5987 reward:-200.0 best_reward:-117.0 eps:0.00499791667