In [1]:
import gym
import numpy as np
rm='Pong-v0'

In [2]:
%matplotlib inline
from matplotlib import animation
import matplotlib.pyplot as plt
from IPython.display import display, HTML

def display_frames_as_gif(frames):
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 144)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
    plt.close(anim._fig)
    display(HTML(anim.to_jshtml()))

In [3]:
from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make(rm)
# model initialization
H = 400 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):
  # preprocess the observation, set input to network to be difference image
  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
    # preprocess the observation, set input to network to be difference image
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

    # if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
    #   print('ep {}: game finished, reward: {}'.format(episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))

  f"The environment {id} is out of date. You should consider "
  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."


In [4]:
%time hist1 = train_model(env, model, total_episodes=6000)

  "Core environment is written in old step API which returns one bool instead of two. "


episode 1.000000, reward total was -20.000000. running mean: -20.000000
episode 2.000000, reward total was -20.000000. running mean: -20.000000
episode 3.000000, reward total was -19.000000. running mean: -19.990000
episode 4.000000, reward total was -21.000000. running mean: -20.000100
episode 5.000000, reward total was -21.000000. running mean: -20.010099
episode 6.000000, reward total was -21.000000. running mean: -20.019998
episode 7.000000, reward total was -21.000000. running mean: -20.029798
episode 8.000000, reward total was -21.000000. running mean: -20.039500
episode 9.000000, reward total was -20.000000. running mean: -20.039105
episode 10.000000, reward total was -21.000000. running mean: -20.048714
episode 11.000000, reward total was -18.000000. running mean: -20.028227
episode 12.000000, reward total was -20.000000. running mean: -20.027945
episode 13.000000, reward total was -21.000000. running mean: -20.037665
episode 14.000000, reward total was -21.000000. running mean

episode 114.000000, reward total was -19.000000. running mean: -20.275008
episode 115.000000, reward total was -21.000000. running mean: -20.282258
episode 116.000000, reward total was -21.000000. running mean: -20.289435
episode 117.000000, reward total was -21.000000. running mean: -20.296541
episode 118.000000, reward total was -21.000000. running mean: -20.303575
episode 119.000000, reward total was -21.000000. running mean: -20.310540
episode 120.000000, reward total was -20.000000. running mean: -20.307434
episode 121.000000, reward total was -20.000000. running mean: -20.304360
episode 122.000000, reward total was -19.000000. running mean: -20.291316
episode 123.000000, reward total was -21.000000. running mean: -20.298403
episode 124.000000, reward total was -21.000000. running mean: -20.305419
episode 125.000000, reward total was -20.000000. running mean: -20.302365
episode 126.000000, reward total was -21.000000. running mean: -20.309341
episode 127.000000, reward total was -

episode 225.000000, reward total was -21.000000. running mean: -20.390738
episode 226.000000, reward total was -21.000000. running mean: -20.396831
episode 227.000000, reward total was -21.000000. running mean: -20.402862
episode 228.000000, reward total was -21.000000. running mean: -20.408834
episode 229.000000, reward total was -20.000000. running mean: -20.404745
episode 230.000000, reward total was -21.000000. running mean: -20.410698
episode 231.000000, reward total was -18.000000. running mean: -20.386591
episode 232.000000, reward total was -21.000000. running mean: -20.392725
episode 233.000000, reward total was -20.000000. running mean: -20.388798
episode 234.000000, reward total was -21.000000. running mean: -20.394910
episode 235.000000, reward total was -21.000000. running mean: -20.400961
episode 236.000000, reward total was -21.000000. running mean: -20.406951
episode 237.000000, reward total was -19.000000. running mean: -20.392882
episode 238.000000, reward total was -

episode 336.000000, reward total was -19.000000. running mean: -20.380060
episode 337.000000, reward total was -20.000000. running mean: -20.376260
episode 338.000000, reward total was -21.000000. running mean: -20.382497
episode 339.000000, reward total was -21.000000. running mean: -20.388672
episode 340.000000, reward total was -21.000000. running mean: -20.394786
episode 341.000000, reward total was -19.000000. running mean: -20.380838
episode 342.000000, reward total was -19.000000. running mean: -20.367029
episode 343.000000, reward total was -21.000000. running mean: -20.373359
episode 344.000000, reward total was -21.000000. running mean: -20.379625
episode 345.000000, reward total was -20.000000. running mean: -20.375829
episode 346.000000, reward total was -21.000000. running mean: -20.382071
episode 347.000000, reward total was -19.000000. running mean: -20.368250
episode 348.000000, reward total was -21.000000. running mean: -20.374568
episode 349.000000, reward total was -

episode 447.000000, reward total was -20.000000. running mean: -20.225539
episode 448.000000, reward total was -20.000000. running mean: -20.223284
episode 449.000000, reward total was -21.000000. running mean: -20.231051
episode 450.000000, reward total was -21.000000. running mean: -20.238740
episode 451.000000, reward total was -21.000000. running mean: -20.246353
episode 452.000000, reward total was -20.000000. running mean: -20.243889
episode 453.000000, reward total was -19.000000. running mean: -20.231451
episode 454.000000, reward total was -21.000000. running mean: -20.239136
episode 455.000000, reward total was -19.000000. running mean: -20.226745
episode 456.000000, reward total was -21.000000. running mean: -20.234477
episode 457.000000, reward total was -20.000000. running mean: -20.232132
episode 458.000000, reward total was -20.000000. running mean: -20.229811
episode 459.000000, reward total was -20.000000. running mean: -20.227513
episode 460.000000, reward total was -

episode 558.000000, reward total was -21.000000. running mean: -20.367338
episode 559.000000, reward total was -21.000000. running mean: -20.373664
episode 560.000000, reward total was -21.000000. running mean: -20.379928
episode 561.000000, reward total was -21.000000. running mean: -20.386128
episode 562.000000, reward total was -19.000000. running mean: -20.372267
episode 563.000000, reward total was -21.000000. running mean: -20.378544
episode 564.000000, reward total was -21.000000. running mean: -20.384759
episode 565.000000, reward total was -21.000000. running mean: -20.390911
episode 566.000000, reward total was -19.000000. running mean: -20.377002
episode 567.000000, reward total was -21.000000. running mean: -20.383232
episode 568.000000, reward total was -21.000000. running mean: -20.389400
episode 569.000000, reward total was -19.000000. running mean: -20.375506
episode 570.000000, reward total was -18.000000. running mean: -20.351751
episode 571.000000, reward total was -

episode 669.000000, reward total was -21.000000. running mean: -20.365820
episode 670.000000, reward total was -21.000000. running mean: -20.372162
episode 671.000000, reward total was -21.000000. running mean: -20.378441
episode 672.000000, reward total was -21.000000. running mean: -20.384656
episode 673.000000, reward total was -20.000000. running mean: -20.380810
episode 674.000000, reward total was -21.000000. running mean: -20.387001
episode 675.000000, reward total was -21.000000. running mean: -20.393131
episode 676.000000, reward total was -21.000000. running mean: -20.399200
episode 677.000000, reward total was -21.000000. running mean: -20.405208
episode 678.000000, reward total was -19.000000. running mean: -20.391156
episode 679.000000, reward total was -20.000000. running mean: -20.387245
episode 680.000000, reward total was -21.000000. running mean: -20.393372
episode 681.000000, reward total was -21.000000. running mean: -20.399438
episode 682.000000, reward total was -

episode 780.000000, reward total was -18.000000. running mean: -20.266072
episode 781.000000, reward total was -21.000000. running mean: -20.273411
episode 782.000000, reward total was -19.000000. running mean: -20.260677
episode 783.000000, reward total was -21.000000. running mean: -20.268070
episode 784.000000, reward total was -21.000000. running mean: -20.275389
episode 785.000000, reward total was -21.000000. running mean: -20.282635
episode 786.000000, reward total was -21.000000. running mean: -20.289809
episode 787.000000, reward total was -21.000000. running mean: -20.296911
episode 788.000000, reward total was -20.000000. running mean: -20.293942
episode 789.000000, reward total was -20.000000. running mean: -20.291002
episode 790.000000, reward total was -20.000000. running mean: -20.288092
episode 791.000000, reward total was -20.000000. running mean: -20.285211
episode 792.000000, reward total was -20.000000. running mean: -20.282359
episode 793.000000, reward total was -

episode 891.000000, reward total was -20.000000. running mean: -20.298606
episode 892.000000, reward total was -20.000000. running mean: -20.295620
episode 893.000000, reward total was -21.000000. running mean: -20.302664
episode 894.000000, reward total was -20.000000. running mean: -20.299637
episode 895.000000, reward total was -21.000000. running mean: -20.306641
episode 896.000000, reward total was -21.000000. running mean: -20.313574
episode 897.000000, reward total was -20.000000. running mean: -20.310439
episode 898.000000, reward total was -21.000000. running mean: -20.317334
episode 899.000000, reward total was -21.000000. running mean: -20.324161
episode 900.000000, reward total was -18.000000. running mean: -20.300919
episode 901.000000, reward total was -20.000000. running mean: -20.297910
episode 902.000000, reward total was -20.000000. running mean: -20.294931
episode 903.000000, reward total was -19.000000. running mean: -20.281982
episode 904.000000, reward total was -

episode 1002.000000, reward total was -20.000000. running mean: -20.330709
episode 1003.000000, reward total was -21.000000. running mean: -20.337401
episode 1004.000000, reward total was -20.000000. running mean: -20.334027
episode 1005.000000, reward total was -20.000000. running mean: -20.330687
episode 1006.000000, reward total was -21.000000. running mean: -20.337380
episode 1007.000000, reward total was -18.000000. running mean: -20.314007
episode 1008.000000, reward total was -20.000000. running mean: -20.310866
episode 1009.000000, reward total was -21.000000. running mean: -20.317758
episode 1010.000000, reward total was -19.000000. running mean: -20.304580
episode 1011.000000, reward total was -21.000000. running mean: -20.311534
episode 1012.000000, reward total was -21.000000. running mean: -20.318419
episode 1013.000000, reward total was -20.000000. running mean: -20.315235
episode 1014.000000, reward total was -21.000000. running mean: -20.322083
episode 1015.000000, rewa

episode 1112.000000, reward total was -21.000000. running mean: -20.201782
episode 1113.000000, reward total was -20.000000. running mean: -20.199764
episode 1114.000000, reward total was -21.000000. running mean: -20.207766
episode 1115.000000, reward total was -21.000000. running mean: -20.215689
episode 1116.000000, reward total was -19.000000. running mean: -20.203532
episode 1117.000000, reward total was -21.000000. running mean: -20.211496
episode 1118.000000, reward total was -21.000000. running mean: -20.219381
episode 1119.000000, reward total was -21.000000. running mean: -20.227188
episode 1120.000000, reward total was -21.000000. running mean: -20.234916
episode 1121.000000, reward total was -21.000000. running mean: -20.242567
episode 1122.000000, reward total was -21.000000. running mean: -20.250141
episode 1123.000000, reward total was -21.000000. running mean: -20.257640
episode 1124.000000, reward total was -18.000000. running mean: -20.235063
episode 1125.000000, rewa

episode 1222.000000, reward total was -21.000000. running mean: -20.180503
episode 1223.000000, reward total was -21.000000. running mean: -20.188698
episode 1224.000000, reward total was -19.000000. running mean: -20.176811
episode 1225.000000, reward total was -21.000000. running mean: -20.185043
episode 1226.000000, reward total was -21.000000. running mean: -20.193192
episode 1227.000000, reward total was -21.000000. running mean: -20.201261
episode 1228.000000, reward total was -21.000000. running mean: -20.209248
episode 1229.000000, reward total was -20.000000. running mean: -20.207155
episode 1230.000000, reward total was -21.000000. running mean: -20.215084
episode 1231.000000, reward total was -20.000000. running mean: -20.212933
episode 1232.000000, reward total was -20.000000. running mean: -20.210804
episode 1233.000000, reward total was -21.000000. running mean: -20.218696
episode 1234.000000, reward total was -21.000000. running mean: -20.226509
episode 1235.000000, rewa

episode 1332.000000, reward total was -21.000000. running mean: -20.100467
episode 1333.000000, reward total was -19.000000. running mean: -20.089463
episode 1334.000000, reward total was -20.000000. running mean: -20.088568
episode 1335.000000, reward total was -21.000000. running mean: -20.097682
episode 1336.000000, reward total was -18.000000. running mean: -20.076706
episode 1337.000000, reward total was -21.000000. running mean: -20.085939
episode 1338.000000, reward total was -21.000000. running mean: -20.095079
episode 1339.000000, reward total was -21.000000. running mean: -20.104128
episode 1340.000000, reward total was -20.000000. running mean: -20.103087
episode 1341.000000, reward total was -19.000000. running mean: -20.092056
episode 1342.000000, reward total was -20.000000. running mean: -20.091136
episode 1343.000000, reward total was -21.000000. running mean: -20.100224
episode 1344.000000, reward total was -20.000000. running mean: -20.099222
episode 1345.000000, rewa

episode 1442.000000, reward total was -21.000000. running mean: -20.210734
episode 1443.000000, reward total was -20.000000. running mean: -20.208626
episode 1444.000000, reward total was -21.000000. running mean: -20.216540
episode 1445.000000, reward total was -21.000000. running mean: -20.224375
episode 1446.000000, reward total was -21.000000. running mean: -20.232131
episode 1447.000000, reward total was -18.000000. running mean: -20.209810
episode 1448.000000, reward total was -19.000000. running mean: -20.197712
episode 1449.000000, reward total was -19.000000. running mean: -20.185735
episode 1450.000000, reward total was -20.000000. running mean: -20.183877
episode 1451.000000, reward total was -21.000000. running mean: -20.192038
episode 1452.000000, reward total was -20.000000. running mean: -20.190118
episode 1453.000000, reward total was -19.000000. running mean: -20.178217
episode 1454.000000, reward total was -21.000000. running mean: -20.186435
episode 1455.000000, rewa

episode 1552.000000, reward total was -20.000000. running mean: -20.194325
episode 1553.000000, reward total was -20.000000. running mean: -20.192382
episode 1554.000000, reward total was -20.000000. running mean: -20.190458
episode 1555.000000, reward total was -18.000000. running mean: -20.168553
episode 1556.000000, reward total was -20.000000. running mean: -20.166868
episode 1557.000000, reward total was -19.000000. running mean: -20.155199
episode 1558.000000, reward total was -19.000000. running mean: -20.143647
episode 1559.000000, reward total was -20.000000. running mean: -20.142211
episode 1560.000000, reward total was -21.000000. running mean: -20.150788
episode 1561.000000, reward total was -21.000000. running mean: -20.159281
episode 1562.000000, reward total was -21.000000. running mean: -20.167688
episode 1563.000000, reward total was -21.000000. running mean: -20.176011
episode 1564.000000, reward total was -21.000000. running mean: -20.184251
episode 1565.000000, rewa

episode 1662.000000, reward total was -21.000000. running mean: -20.096943
episode 1663.000000, reward total was -19.000000. running mean: -20.085973
episode 1664.000000, reward total was -20.000000. running mean: -20.085114
episode 1665.000000, reward total was -21.000000. running mean: -20.094263
episode 1666.000000, reward total was -21.000000. running mean: -20.103320
episode 1667.000000, reward total was -21.000000. running mean: -20.112287
episode 1668.000000, reward total was -21.000000. running mean: -20.121164
episode 1669.000000, reward total was -21.000000. running mean: -20.129952
episode 1670.000000, reward total was -21.000000. running mean: -20.138653
episode 1671.000000, reward total was -20.000000. running mean: -20.137266
episode 1672.000000, reward total was -21.000000. running mean: -20.145894
episode 1673.000000, reward total was -21.000000. running mean: -20.154435
episode 1674.000000, reward total was -21.000000. running mean: -20.162890
episode 1675.000000, rewa

episode 1772.000000, reward total was -21.000000. running mean: -20.112885
episode 1773.000000, reward total was -20.000000. running mean: -20.111756
episode 1774.000000, reward total was -20.000000. running mean: -20.110639
episode 1775.000000, reward total was -20.000000. running mean: -20.109532
episode 1776.000000, reward total was -21.000000. running mean: -20.118437
episode 1777.000000, reward total was -19.000000. running mean: -20.107252
episode 1778.000000, reward total was -20.000000. running mean: -20.106180
episode 1779.000000, reward total was -18.000000. running mean: -20.085118
episode 1780.000000, reward total was -20.000000. running mean: -20.084267
episode 1781.000000, reward total was -19.000000. running mean: -20.073424
episode 1782.000000, reward total was -21.000000. running mean: -20.082690
episode 1783.000000, reward total was -20.000000. running mean: -20.081863
episode 1784.000000, reward total was -19.000000. running mean: -20.071045
episode 1785.000000, rewa

episode 1882.000000, reward total was -19.000000. running mean: -20.091068
episode 1883.000000, reward total was -19.000000. running mean: -20.080157
episode 1884.000000, reward total was -21.000000. running mean: -20.089355
episode 1885.000000, reward total was -21.000000. running mean: -20.098462
episode 1886.000000, reward total was -20.000000. running mean: -20.097477
episode 1887.000000, reward total was -20.000000. running mean: -20.096502
episode 1888.000000, reward total was -20.000000. running mean: -20.095537
episode 1889.000000, reward total was -19.000000. running mean: -20.084582
episode 1890.000000, reward total was -21.000000. running mean: -20.093736
episode 1891.000000, reward total was -20.000000. running mean: -20.092799
episode 1892.000000, reward total was -21.000000. running mean: -20.101871
episode 1893.000000, reward total was -21.000000. running mean: -20.110852
episode 1894.000000, reward total was -20.000000. running mean: -20.109744
episode 1895.000000, rewa

episode 1992.000000, reward total was -21.000000. running mean: -20.264837
episode 1993.000000, reward total was -21.000000. running mean: -20.272188
episode 1994.000000, reward total was -21.000000. running mean: -20.279466
episode 1995.000000, reward total was -20.000000. running mean: -20.276672
episode 1996.000000, reward total was -18.000000. running mean: -20.253905
episode 1997.000000, reward total was -19.000000. running mean: -20.241366
episode 1998.000000, reward total was -21.000000. running mean: -20.248952
episode 1999.000000, reward total was -21.000000. running mean: -20.256463
episode 2000.000000, reward total was -19.000000. running mean: -20.243898
episode 2001.000000, reward total was -21.000000. running mean: -20.251459
episode 2002.000000, reward total was -21.000000. running mean: -20.258945
episode 2003.000000, reward total was -21.000000. running mean: -20.266355
episode 2004.000000, reward total was -19.000000. running mean: -20.253692
episode 2005.000000, rewa

episode 2102.000000, reward total was -21.000000. running mean: -20.277427
episode 2103.000000, reward total was -19.000000. running mean: -20.264653
episode 2104.000000, reward total was -20.000000. running mean: -20.262007
episode 2105.000000, reward total was -19.000000. running mean: -20.249387
episode 2106.000000, reward total was -20.000000. running mean: -20.246893
episode 2107.000000, reward total was -19.000000. running mean: -20.234424
episode 2108.000000, reward total was -20.000000. running mean: -20.232080
episode 2109.000000, reward total was -20.000000. running mean: -20.229759
episode 2110.000000, reward total was -19.000000. running mean: -20.217461
episode 2111.000000, reward total was -20.000000. running mean: -20.215287
episode 2112.000000, reward total was -21.000000. running mean: -20.223134
episode 2113.000000, reward total was -19.000000. running mean: -20.210902
episode 2114.000000, reward total was -20.000000. running mean: -20.208793
episode 2115.000000, rewa

episode 2212.000000, reward total was -17.000000. running mean: -20.230402
episode 2213.000000, reward total was -21.000000. running mean: -20.238098
episode 2214.000000, reward total was -19.000000. running mean: -20.225717
episode 2215.000000, reward total was -21.000000. running mean: -20.233459
episode 2216.000000, reward total was -20.000000. running mean: -20.231125
episode 2217.000000, reward total was -20.000000. running mean: -20.228814
episode 2218.000000, reward total was -21.000000. running mean: -20.236525
episode 2219.000000, reward total was -21.000000. running mean: -20.244160
episode 2220.000000, reward total was -20.000000. running mean: -20.241719
episode 2221.000000, reward total was -21.000000. running mean: -20.249301
episode 2222.000000, reward total was -19.000000. running mean: -20.236808
episode 2223.000000, reward total was -21.000000. running mean: -20.244440
episode 2224.000000, reward total was -19.000000. running mean: -20.231996
episode 2225.000000, rewa

episode 2322.000000, reward total was -20.000000. running mean: -20.167592
episode 2323.000000, reward total was -21.000000. running mean: -20.175916
episode 2324.000000, reward total was -21.000000. running mean: -20.184157
episode 2325.000000, reward total was -20.000000. running mean: -20.182315
episode 2326.000000, reward total was -20.000000. running mean: -20.180492
episode 2327.000000, reward total was -20.000000. running mean: -20.178687
episode 2328.000000, reward total was -20.000000. running mean: -20.176900
episode 2329.000000, reward total was -20.000000. running mean: -20.175131
episode 2330.000000, reward total was -21.000000. running mean: -20.183380
episode 2331.000000, reward total was -21.000000. running mean: -20.191546
episode 2332.000000, reward total was -21.000000. running mean: -20.199631
episode 2333.000000, reward total was -20.000000. running mean: -20.197635
episode 2334.000000, reward total was -21.000000. running mean: -20.205658
episode 2335.000000, rewa

episode 2432.000000, reward total was -20.000000. running mean: -20.069298
episode 2433.000000, reward total was -20.000000. running mean: -20.068605
episode 2434.000000, reward total was -21.000000. running mean: -20.077919
episode 2435.000000, reward total was -21.000000. running mean: -20.087140
episode 2436.000000, reward total was -20.000000. running mean: -20.086268
episode 2437.000000, reward total was -20.000000. running mean: -20.085406
episode 2438.000000, reward total was -20.000000. running mean: -20.084552
episode 2439.000000, reward total was -21.000000. running mean: -20.093706
episode 2440.000000, reward total was -20.000000. running mean: -20.092769
episode 2441.000000, reward total was -20.000000. running mean: -20.091841
episode 2442.000000, reward total was -20.000000. running mean: -20.090923
episode 2443.000000, reward total was -18.000000. running mean: -20.070014
episode 2444.000000, reward total was -19.000000. running mean: -20.059313
episode 2445.000000, rewa

episode 2542.000000, reward total was -21.000000. running mean: -19.918089
episode 2543.000000, reward total was -21.000000. running mean: -19.928908
episode 2544.000000, reward total was -21.000000. running mean: -19.939619
episode 2545.000000, reward total was -20.000000. running mean: -19.940223
episode 2546.000000, reward total was -21.000000. running mean: -19.950820
episode 2547.000000, reward total was -21.000000. running mean: -19.961312
episode 2548.000000, reward total was -20.000000. running mean: -19.961699
episode 2549.000000, reward total was -21.000000. running mean: -19.972082
episode 2550.000000, reward total was -19.000000. running mean: -19.962361
episode 2551.000000, reward total was -21.000000. running mean: -19.972738
episode 2552.000000, reward total was -19.000000. running mean: -19.963010
episode 2553.000000, reward total was -21.000000. running mean: -19.973380
episode 2554.000000, reward total was -20.000000. running mean: -19.973646
episode 2555.000000, rewa

episode 2652.000000, reward total was -20.000000. running mean: -20.018052
episode 2653.000000, reward total was -21.000000. running mean: -20.027872
episode 2654.000000, reward total was -20.000000. running mean: -20.027593
episode 2655.000000, reward total was -19.000000. running mean: -20.017317
episode 2656.000000, reward total was -21.000000. running mean: -20.027144
episode 2657.000000, reward total was -20.000000. running mean: -20.026872
episode 2658.000000, reward total was -20.000000. running mean: -20.026604
episode 2659.000000, reward total was -21.000000. running mean: -20.036338
episode 2660.000000, reward total was -21.000000. running mean: -20.045974
episode 2661.000000, reward total was -21.000000. running mean: -20.055514
episode 2662.000000, reward total was -21.000000. running mean: -20.064959
episode 2663.000000, reward total was -20.000000. running mean: -20.064310
episode 2664.000000, reward total was -20.000000. running mean: -20.063667
episode 2665.000000, rewa

episode 2762.000000, reward total was -20.000000. running mean: -20.111784
episode 2763.000000, reward total was -20.000000. running mean: -20.110667
episode 2764.000000, reward total was -20.000000. running mean: -20.109560
episode 2765.000000, reward total was -16.000000. running mean: -20.068464
episode 2766.000000, reward total was -19.000000. running mean: -20.057780
episode 2767.000000, reward total was -19.000000. running mean: -20.047202
episode 2768.000000, reward total was -19.000000. running mean: -20.036730
episode 2769.000000, reward total was -21.000000. running mean: -20.046363
episode 2770.000000, reward total was -19.000000. running mean: -20.035899
episode 2771.000000, reward total was -21.000000. running mean: -20.045540
episode 2772.000000, reward total was -21.000000. running mean: -20.055085
episode 2773.000000, reward total was -19.000000. running mean: -20.044534
episode 2774.000000, reward total was -21.000000. running mean: -20.054088
episode 2775.000000, rewa

episode 2872.000000, reward total was -19.000000. running mean: -19.994098
episode 2873.000000, reward total was -17.000000. running mean: -19.964157
episode 2874.000000, reward total was -21.000000. running mean: -19.974515
episode 2875.000000, reward total was -20.000000. running mean: -19.974770
episode 2876.000000, reward total was -18.000000. running mean: -19.955022
episode 2877.000000, reward total was -21.000000. running mean: -19.965472
episode 2878.000000, reward total was -20.000000. running mean: -19.965817
episode 2879.000000, reward total was -20.000000. running mean: -19.966159
episode 2880.000000, reward total was -21.000000. running mean: -19.976497
episode 2881.000000, reward total was -20.000000. running mean: -19.976733
episode 2882.000000, reward total was -21.000000. running mean: -19.986965
episode 2883.000000, reward total was -20.000000. running mean: -19.987096
episode 2884.000000, reward total was -21.000000. running mean: -19.997225
episode 2885.000000, rewa

episode 2982.000000, reward total was -20.000000. running mean: -20.012599
episode 2983.000000, reward total was -19.000000. running mean: -20.002473
episode 2984.000000, reward total was -17.000000. running mean: -19.972448
episode 2985.000000, reward total was -20.000000. running mean: -19.972724
episode 2986.000000, reward total was -20.000000. running mean: -19.972997
episode 2987.000000, reward total was -20.000000. running mean: -19.973267
episode 2988.000000, reward total was -15.000000. running mean: -19.923534
episode 2989.000000, reward total was -18.000000. running mean: -19.904299
episode 2990.000000, reward total was -21.000000. running mean: -19.915256
episode 2991.000000, reward total was -18.000000. running mean: -19.896103
episode 2992.000000, reward total was -19.000000. running mean: -19.887142
episode 2993.000000, reward total was -21.000000. running mean: -19.898271
episode 2994.000000, reward total was -19.000000. running mean: -19.889288
episode 2995.000000, rewa

episode 3092.000000, reward total was -21.000000. running mean: -19.978028
episode 3093.000000, reward total was -19.000000. running mean: -19.968248
episode 3094.000000, reward total was -17.000000. running mean: -19.938566
episode 3095.000000, reward total was -21.000000. running mean: -19.949180
episode 3096.000000, reward total was -20.000000. running mean: -19.949688
episode 3097.000000, reward total was -21.000000. running mean: -19.960191
episode 3098.000000, reward total was -19.000000. running mean: -19.950589
episode 3099.000000, reward total was -19.000000. running mean: -19.941083
episode 3100.000000, reward total was -20.000000. running mean: -19.941673
episode 3101.000000, reward total was -20.000000. running mean: -19.942256
episode 3102.000000, reward total was -19.000000. running mean: -19.932833
episode 3103.000000, reward total was -20.000000. running mean: -19.933505
episode 3104.000000, reward total was -20.000000. running mean: -19.934170
episode 3105.000000, rewa

episode 3202.000000, reward total was -20.000000. running mean: -20.005137
episode 3203.000000, reward total was -20.000000. running mean: -20.005086
episode 3204.000000, reward total was -20.000000. running mean: -20.005035
episode 3205.000000, reward total was -19.000000. running mean: -19.994985
episode 3206.000000, reward total was -21.000000. running mean: -20.005035
episode 3207.000000, reward total was -18.000000. running mean: -19.984984
episode 3208.000000, reward total was -21.000000. running mean: -19.995135
episode 3209.000000, reward total was -21.000000. running mean: -20.005183
episode 3210.000000, reward total was -19.000000. running mean: -19.995131
episode 3211.000000, reward total was -20.000000. running mean: -19.995180
episode 3212.000000, reward total was -20.000000. running mean: -19.995228
episode 3213.000000, reward total was -21.000000. running mean: -20.005276
episode 3214.000000, reward total was -20.000000. running mean: -20.005223
episode 3215.000000, rewa

episode 3312.000000, reward total was -21.000000. running mean: -19.879828
episode 3313.000000, reward total was -21.000000. running mean: -19.891030
episode 3314.000000, reward total was -20.000000. running mean: -19.892119
episode 3315.000000, reward total was -19.000000. running mean: -19.883198
episode 3316.000000, reward total was -19.000000. running mean: -19.874366
episode 3317.000000, reward total was -21.000000. running mean: -19.885622
episode 3318.000000, reward total was -20.000000. running mean: -19.886766
episode 3319.000000, reward total was -19.000000. running mean: -19.877899
episode 3320.000000, reward total was -19.000000. running mean: -19.869120
episode 3321.000000, reward total was -21.000000. running mean: -19.880428
episode 3322.000000, reward total was -18.000000. running mean: -19.861624
episode 3323.000000, reward total was -19.000000. running mean: -19.853008
episode 3324.000000, reward total was -21.000000. running mean: -19.864478
episode 3325.000000, rewa

episode 3422.000000, reward total was -21.000000. running mean: -19.816639
episode 3423.000000, reward total was -21.000000. running mean: -19.828473
episode 3424.000000, reward total was -19.000000. running mean: -19.820188
episode 3425.000000, reward total was -20.000000. running mean: -19.821986
episode 3426.000000, reward total was -18.000000. running mean: -19.803766
episode 3427.000000, reward total was -20.000000. running mean: -19.805729
episode 3428.000000, reward total was -21.000000. running mean: -19.817671
episode 3429.000000, reward total was -21.000000. running mean: -19.829495
episode 3430.000000, reward total was -21.000000. running mean: -19.841200
episode 3431.000000, reward total was -20.000000. running mean: -19.842788
episode 3432.000000, reward total was -19.000000. running mean: -19.834360
episode 3433.000000, reward total was -21.000000. running mean: -19.846016
episode 3434.000000, reward total was -17.000000. running mean: -19.817556
episode 3435.000000, rewa

episode 3532.000000, reward total was -19.000000. running mean: -19.798611
episode 3533.000000, reward total was -21.000000. running mean: -19.810625
episode 3534.000000, reward total was -21.000000. running mean: -19.822519
episode 3535.000000, reward total was -19.000000. running mean: -19.814294
episode 3536.000000, reward total was -20.000000. running mean: -19.816151
episode 3537.000000, reward total was -19.000000. running mean: -19.807989
episode 3538.000000, reward total was -19.000000. running mean: -19.799909
episode 3539.000000, reward total was -20.000000. running mean: -19.801910
episode 3540.000000, reward total was -18.000000. running mean: -19.783891
episode 3541.000000, reward total was -21.000000. running mean: -19.796052
episode 3542.000000, reward total was -19.000000. running mean: -19.788092
episode 3543.000000, reward total was -21.000000. running mean: -19.800211
episode 3544.000000, reward total was -20.000000. running mean: -19.802209
episode 3545.000000, rewa

episode 3642.000000, reward total was -21.000000. running mean: -19.732871
episode 3643.000000, reward total was -21.000000. running mean: -19.745542
episode 3644.000000, reward total was -19.000000. running mean: -19.738086
episode 3645.000000, reward total was -21.000000. running mean: -19.750706
episode 3646.000000, reward total was -20.000000. running mean: -19.753199
episode 3647.000000, reward total was -19.000000. running mean: -19.745667
episode 3648.000000, reward total was -19.000000. running mean: -19.738210
episode 3649.000000, reward total was -20.000000. running mean: -19.740828
episode 3650.000000, reward total was -21.000000. running mean: -19.753420
episode 3651.000000, reward total was -21.000000. running mean: -19.765885
episode 3652.000000, reward total was -19.000000. running mean: -19.758226
episode 3653.000000, reward total was -16.000000. running mean: -19.720644
episode 3654.000000, reward total was -18.000000. running mean: -19.703438
episode 3655.000000, rewa

episode 3752.000000, reward total was -19.000000. running mean: -19.759950
episode 3753.000000, reward total was -19.000000. running mean: -19.752350
episode 3754.000000, reward total was -18.000000. running mean: -19.734827
episode 3755.000000, reward total was -21.000000. running mean: -19.747479
episode 3756.000000, reward total was -20.000000. running mean: -19.750004
episode 3757.000000, reward total was -21.000000. running mean: -19.762504
episode 3758.000000, reward total was -20.000000. running mean: -19.764879
episode 3759.000000, reward total was -19.000000. running mean: -19.757230
episode 3760.000000, reward total was -20.000000. running mean: -19.759658
episode 3761.000000, reward total was -19.000000. running mean: -19.752061
episode 3762.000000, reward total was -20.000000. running mean: -19.754540
episode 3763.000000, reward total was -17.000000. running mean: -19.726995
episode 3764.000000, reward total was -20.000000. running mean: -19.729725
episode 3765.000000, rewa

episode 3862.000000, reward total was -21.000000. running mean: -19.640042
episode 3863.000000, reward total was -19.000000. running mean: -19.633642
episode 3864.000000, reward total was -21.000000. running mean: -19.647305
episode 3865.000000, reward total was -21.000000. running mean: -19.660832
episode 3866.000000, reward total was -19.000000. running mean: -19.654224
episode 3867.000000, reward total was -19.000000. running mean: -19.647682
episode 3868.000000, reward total was -21.000000. running mean: -19.661205
episode 3869.000000, reward total was -21.000000. running mean: -19.674593
episode 3870.000000, reward total was -18.000000. running mean: -19.657847
episode 3871.000000, reward total was -20.000000. running mean: -19.661268
episode 3872.000000, reward total was -21.000000. running mean: -19.674656
episode 3873.000000, reward total was -21.000000. running mean: -19.687909
episode 3874.000000, reward total was -21.000000. running mean: -19.701030
episode 3875.000000, rewa

episode 3972.000000, reward total was -19.000000. running mean: -19.802498
episode 3973.000000, reward total was -20.000000. running mean: -19.804473
episode 3974.000000, reward total was -21.000000. running mean: -19.816428
episode 3975.000000, reward total was -18.000000. running mean: -19.798264
episode 3976.000000, reward total was -19.000000. running mean: -19.790281
episode 3977.000000, reward total was -20.000000. running mean: -19.792378
episode 3978.000000, reward total was -18.000000. running mean: -19.774454
episode 3979.000000, reward total was -18.000000. running mean: -19.756710
episode 3980.000000, reward total was -21.000000. running mean: -19.769143
episode 3981.000000, reward total was -21.000000. running mean: -19.781451
episode 3982.000000, reward total was -19.000000. running mean: -19.773637
episode 3983.000000, reward total was -19.000000. running mean: -19.765900
episode 3984.000000, reward total was -20.000000. running mean: -19.768241
episode 3985.000000, rewa

episode 4082.000000, reward total was -18.000000. running mean: -19.660481
episode 4083.000000, reward total was -20.000000. running mean: -19.663877
episode 4084.000000, reward total was -21.000000. running mean: -19.677238
episode 4085.000000, reward total was -19.000000. running mean: -19.670465
episode 4086.000000, reward total was -19.000000. running mean: -19.663761
episode 4087.000000, reward total was -21.000000. running mean: -19.677123
episode 4088.000000, reward total was -20.000000. running mean: -19.680352
episode 4089.000000, reward total was -18.000000. running mean: -19.663548
episode 4090.000000, reward total was -20.000000. running mean: -19.666913
episode 4091.000000, reward total was -18.000000. running mean: -19.650244
episode 4092.000000, reward total was -20.000000. running mean: -19.653741
episode 4093.000000, reward total was -20.000000. running mean: -19.657204
episode 4094.000000, reward total was -21.000000. running mean: -19.670632
episode 4095.000000, rewa

episode 4192.000000, reward total was -18.000000. running mean: -19.520018
episode 4193.000000, reward total was -18.000000. running mean: -19.504818
episode 4194.000000, reward total was -19.000000. running mean: -19.499769
episode 4195.000000, reward total was -19.000000. running mean: -19.494772
episode 4196.000000, reward total was -15.000000. running mean: -19.449824
episode 4197.000000, reward total was -19.000000. running mean: -19.445326
episode 4198.000000, reward total was -20.000000. running mean: -19.450872
episode 4199.000000, reward total was -20.000000. running mean: -19.456364
episode 4200.000000, reward total was -20.000000. running mean: -19.461800
episode 4201.000000, reward total was -21.000000. running mean: -19.477182
episode 4202.000000, reward total was -20.000000. running mean: -19.482410
episode 4203.000000, reward total was -19.000000. running mean: -19.477586
episode 4204.000000, reward total was -19.000000. running mean: -19.472810
episode 4205.000000, rewa

episode 4302.000000, reward total was -20.000000. running mean: -19.518168
episode 4303.000000, reward total was -19.000000. running mean: -19.512987
episode 4304.000000, reward total was -17.000000. running mean: -19.487857
episode 4305.000000, reward total was -20.000000. running mean: -19.492978
episode 4306.000000, reward total was -18.000000. running mean: -19.478049
episode 4307.000000, reward total was -20.000000. running mean: -19.483268
episode 4308.000000, reward total was -20.000000. running mean: -19.488435
episode 4309.000000, reward total was -20.000000. running mean: -19.493551
episode 4310.000000, reward total was -19.000000. running mean: -19.488616
episode 4311.000000, reward total was -19.000000. running mean: -19.483729
episode 4312.000000, reward total was -16.000000. running mean: -19.448892
episode 4313.000000, reward total was -21.000000. running mean: -19.464403
episode 4314.000000, reward total was -21.000000. running mean: -19.479759
episode 4315.000000, rewa

episode 4412.000000, reward total was -19.000000. running mean: -19.579210
episode 4413.000000, reward total was -20.000000. running mean: -19.583418
episode 4414.000000, reward total was -20.000000. running mean: -19.587584
episode 4415.000000, reward total was -20.000000. running mean: -19.591708
episode 4416.000000, reward total was -20.000000. running mean: -19.595791
episode 4417.000000, reward total was -19.000000. running mean: -19.589833
episode 4418.000000, reward total was -18.000000. running mean: -19.573935
episode 4419.000000, reward total was -20.000000. running mean: -19.578196
episode 4420.000000, reward total was -18.000000. running mean: -19.562414
episode 4421.000000, reward total was -21.000000. running mean: -19.576790
episode 4422.000000, reward total was -20.000000. running mean: -19.581022
episode 4423.000000, reward total was -18.000000. running mean: -19.565211
episode 4424.000000, reward total was -20.000000. running mean: -19.569559
episode 4425.000000, rewa

episode 4522.000000, reward total was -20.000000. running mean: -19.432081
episode 4523.000000, reward total was -21.000000. running mean: -19.447760
episode 4524.000000, reward total was -18.000000. running mean: -19.433283
episode 4525.000000, reward total was -19.000000. running mean: -19.428950
episode 4526.000000, reward total was -18.000000. running mean: -19.414660
episode 4527.000000, reward total was -21.000000. running mean: -19.430514
episode 4528.000000, reward total was -19.000000. running mean: -19.426208
episode 4529.000000, reward total was -17.000000. running mean: -19.401946
episode 4530.000000, reward total was -21.000000. running mean: -19.417927
episode 4531.000000, reward total was -21.000000. running mean: -19.433748
episode 4532.000000, reward total was -19.000000. running mean: -19.429410
episode 4533.000000, reward total was -20.000000. running mean: -19.435116
episode 4534.000000, reward total was -21.000000. running mean: -19.450765
episode 4535.000000, rewa

episode 4632.000000, reward total was -19.000000. running mean: -19.248113
episode 4633.000000, reward total was -21.000000. running mean: -19.265632
episode 4634.000000, reward total was -20.000000. running mean: -19.272975
episode 4635.000000, reward total was -16.000000. running mean: -19.240246
episode 4636.000000, reward total was -19.000000. running mean: -19.237843
episode 4637.000000, reward total was -17.000000. running mean: -19.215465
episode 4638.000000, reward total was -19.000000. running mean: -19.213310
episode 4639.000000, reward total was -19.000000. running mean: -19.211177
episode 4640.000000, reward total was -20.000000. running mean: -19.219065
episode 4641.000000, reward total was -19.000000. running mean: -19.216874
episode 4642.000000, reward total was -19.000000. running mean: -19.214706
episode 4643.000000, reward total was -21.000000. running mean: -19.232559
episode 4644.000000, reward total was -21.000000. running mean: -19.250233
episode 4645.000000, rewa

episode 4742.000000, reward total was -19.000000. running mean: -19.439100
episode 4743.000000, reward total was -17.000000. running mean: -19.414709
episode 4744.000000, reward total was -15.000000. running mean: -19.370562
episode 4745.000000, reward total was -19.000000. running mean: -19.366856
episode 4746.000000, reward total was -19.000000. running mean: -19.363188
episode 4747.000000, reward total was -20.000000. running mean: -19.369556
episode 4748.000000, reward total was -21.000000. running mean: -19.385860
episode 4749.000000, reward total was -20.000000. running mean: -19.392002
episode 4750.000000, reward total was -19.000000. running mean: -19.388082
episode 4751.000000, reward total was -18.000000. running mean: -19.374201
episode 4752.000000, reward total was -19.000000. running mean: -19.370459
episode 4753.000000, reward total was -20.000000. running mean: -19.376754
episode 4754.000000, reward total was -20.000000. running mean: -19.382987
episode 4755.000000, rewa

episode 4852.000000, reward total was -20.000000. running mean: -19.296883
episode 4853.000000, reward total was -21.000000. running mean: -19.313914
episode 4854.000000, reward total was -17.000000. running mean: -19.290775
episode 4855.000000, reward total was -17.000000. running mean: -19.267867
episode 4856.000000, reward total was -21.000000. running mean: -19.285188
episode 4857.000000, reward total was -21.000000. running mean: -19.302336
episode 4858.000000, reward total was -19.000000. running mean: -19.299313
episode 4859.000000, reward total was -18.000000. running mean: -19.286320
episode 4860.000000, reward total was -18.000000. running mean: -19.273457
episode 4861.000000, reward total was -20.000000. running mean: -19.280722
episode 4862.000000, reward total was -19.000000. running mean: -19.277915
episode 4863.000000, reward total was -18.000000. running mean: -19.265136
episode 4864.000000, reward total was -19.000000. running mean: -19.262484
episode 4865.000000, rewa

episode 4962.000000, reward total was -21.000000. running mean: -19.352677
episode 4963.000000, reward total was -21.000000. running mean: -19.369151
episode 4964.000000, reward total was -20.000000. running mean: -19.375459
episode 4965.000000, reward total was -21.000000. running mean: -19.391704
episode 4966.000000, reward total was -20.000000. running mean: -19.397787
episode 4967.000000, reward total was -20.000000. running mean: -19.403809
episode 4968.000000, reward total was -19.000000. running mean: -19.399771
episode 4969.000000, reward total was -21.000000. running mean: -19.415774
episode 4970.000000, reward total was -20.000000. running mean: -19.421616
episode 4971.000000, reward total was -20.000000. running mean: -19.427400
episode 4972.000000, reward total was -18.000000. running mean: -19.413126
episode 4973.000000, reward total was -17.000000. running mean: -19.388995
episode 4974.000000, reward total was -18.000000. running mean: -19.375105
episode 4975.000000, rewa

episode 5072.000000, reward total was -17.000000. running mean: -19.167269
episode 5073.000000, reward total was -20.000000. running mean: -19.175596
episode 5074.000000, reward total was -19.000000. running mean: -19.173840
episode 5075.000000, reward total was -20.000000. running mean: -19.182102
episode 5076.000000, reward total was -20.000000. running mean: -19.190281
episode 5077.000000, reward total was -19.000000. running mean: -19.188378
episode 5078.000000, reward total was -20.000000. running mean: -19.196494
episode 5079.000000, reward total was -18.000000. running mean: -19.184529
episode 5080.000000, reward total was -21.000000. running mean: -19.202684
episode 5081.000000, reward total was -19.000000. running mean: -19.200657
episode 5082.000000, reward total was -20.000000. running mean: -19.208651
episode 5083.000000, reward total was -20.000000. running mean: -19.216564
episode 5084.000000, reward total was -21.000000. running mean: -19.234398
episode 5085.000000, rewa

episode 5182.000000, reward total was -21.000000. running mean: -19.136400
episode 5183.000000, reward total was -19.000000. running mean: -19.135036
episode 5184.000000, reward total was -20.000000. running mean: -19.143686
episode 5185.000000, reward total was -19.000000. running mean: -19.142249
episode 5186.000000, reward total was -20.000000. running mean: -19.150827
episode 5187.000000, reward total was -19.000000. running mean: -19.149318
episode 5188.000000, reward total was -19.000000. running mean: -19.147825
episode 5189.000000, reward total was -20.000000. running mean: -19.156347
episode 5190.000000, reward total was -21.000000. running mean: -19.174784
episode 5191.000000, reward total was -19.000000. running mean: -19.173036
episode 5192.000000, reward total was -20.000000. running mean: -19.181305
episode 5193.000000, reward total was -21.000000. running mean: -19.199492
episode 5194.000000, reward total was -20.000000. running mean: -19.207497
episode 5195.000000, rewa

episode 5292.000000, reward total was -21.000000. running mean: -19.204052
episode 5293.000000, reward total was -18.000000. running mean: -19.192011
episode 5294.000000, reward total was -20.000000. running mean: -19.200091
episode 5295.000000, reward total was -20.000000. running mean: -19.208090
episode 5296.000000, reward total was -19.000000. running mean: -19.206009
episode 5297.000000, reward total was -21.000000. running mean: -19.223949
episode 5298.000000, reward total was -19.000000. running mean: -19.221710
episode 5299.000000, reward total was -20.000000. running mean: -19.229493
episode 5300.000000, reward total was -20.000000. running mean: -19.237198
episode 5301.000000, reward total was -17.000000. running mean: -19.214826
episode 5302.000000, reward total was -19.000000. running mean: -19.212678
episode 5303.000000, reward total was -20.000000. running mean: -19.220551
episode 5304.000000, reward total was -21.000000. running mean: -19.238345
episode 5305.000000, rewa

episode 5402.000000, reward total was -19.000000. running mean: -19.146937
episode 5403.000000, reward total was -21.000000. running mean: -19.165468
episode 5404.000000, reward total was -19.000000. running mean: -19.163813
episode 5405.000000, reward total was -20.000000. running mean: -19.172175
episode 5406.000000, reward total was -21.000000. running mean: -19.190453
episode 5407.000000, reward total was -21.000000. running mean: -19.208549
episode 5408.000000, reward total was -21.000000. running mean: -19.226463
episode 5409.000000, reward total was -19.000000. running mean: -19.224198
episode 5410.000000, reward total was -20.000000. running mean: -19.231956
episode 5411.000000, reward total was -21.000000. running mean: -19.249637
episode 5412.000000, reward total was -19.000000. running mean: -19.247141
episode 5413.000000, reward total was -20.000000. running mean: -19.254669
episode 5414.000000, reward total was -20.000000. running mean: -19.262122
episode 5415.000000, rewa

episode 5512.000000, reward total was -20.000000. running mean: -19.035994
episode 5513.000000, reward total was -19.000000. running mean: -19.035634
episode 5514.000000, reward total was -19.000000. running mean: -19.035277
episode 5515.000000, reward total was -20.000000. running mean: -19.044925
episode 5516.000000, reward total was -20.000000. running mean: -19.054475
episode 5517.000000, reward total was -20.000000. running mean: -19.063931
episode 5518.000000, reward total was -19.000000. running mean: -19.063291
episode 5519.000000, reward total was -15.000000. running mean: -19.022658
episode 5520.000000, reward total was -20.000000. running mean: -19.032432
episode 5521.000000, reward total was -16.000000. running mean: -19.002108
episode 5522.000000, reward total was -17.000000. running mean: -18.982086
episode 5523.000000, reward total was -16.000000. running mean: -18.952266
episode 5524.000000, reward total was -17.000000. running mean: -18.932743
episode 5525.000000, rewa

episode 5622.000000, reward total was -21.000000. running mean: -18.915225
episode 5623.000000, reward total was -20.000000. running mean: -18.926073
episode 5624.000000, reward total was -20.000000. running mean: -18.936812
episode 5625.000000, reward total was -17.000000. running mean: -18.917444
episode 5626.000000, reward total was -18.000000. running mean: -18.908270
episode 5627.000000, reward total was -17.000000. running mean: -18.889187
episode 5628.000000, reward total was -18.000000. running mean: -18.880295
episode 5629.000000, reward total was -19.000000. running mean: -18.881492
episode 5630.000000, reward total was -16.000000. running mean: -18.852677
episode 5631.000000, reward total was -21.000000. running mean: -18.874151
episode 5632.000000, reward total was -19.000000. running mean: -18.875409
episode 5633.000000, reward total was -19.000000. running mean: -18.876655
episode 5634.000000, reward total was -21.000000. running mean: -18.897889
episode 5635.000000, rewa

episode 5732.000000, reward total was -21.000000. running mean: -18.984740
episode 5733.000000, reward total was -16.000000. running mean: -18.954893
episode 5734.000000, reward total was -18.000000. running mean: -18.945344
episode 5735.000000, reward total was -17.000000. running mean: -18.925890
episode 5736.000000, reward total was -19.000000. running mean: -18.926632
episode 5737.000000, reward total was -20.000000. running mean: -18.937365
episode 5738.000000, reward total was -19.000000. running mean: -18.937992
episode 5739.000000, reward total was -19.000000. running mean: -18.938612
episode 5740.000000, reward total was -18.000000. running mean: -18.929226
episode 5741.000000, reward total was -20.000000. running mean: -18.939933
episode 5742.000000, reward total was -21.000000. running mean: -18.960534
episode 5743.000000, reward total was -19.000000. running mean: -18.960929
episode 5744.000000, reward total was -16.000000. running mean: -18.931319
episode 5745.000000, rewa

episode 5842.000000, reward total was -20.000000. running mean: -18.869711
episode 5843.000000, reward total was -17.000000. running mean: -18.851014
episode 5844.000000, reward total was -19.000000. running mean: -18.852504
episode 5845.000000, reward total was -19.000000. running mean: -18.853979
episode 5846.000000, reward total was -17.000000. running mean: -18.835439
episode 5847.000000, reward total was -16.000000. running mean: -18.807085
episode 5848.000000, reward total was -19.000000. running mean: -18.809014
episode 5849.000000, reward total was -20.000000. running mean: -18.820924
episode 5850.000000, reward total was -20.000000. running mean: -18.832715
episode 5851.000000, reward total was -17.000000. running mean: -18.814387
episode 5852.000000, reward total was -18.000000. running mean: -18.806244
episode 5853.000000, reward total was -19.000000. running mean: -18.808181
episode 5854.000000, reward total was -19.000000. running mean: -18.810099
episode 5855.000000, rewa

episode 5952.000000, reward total was -14.000000. running mean: -18.768323
episode 5953.000000, reward total was -19.000000. running mean: -18.770640
episode 5954.000000, reward total was -17.000000. running mean: -18.752933
episode 5955.000000, reward total was -20.000000. running mean: -18.765404
episode 5956.000000, reward total was -15.000000. running mean: -18.727750
episode 5957.000000, reward total was -21.000000. running mean: -18.750473
episode 5958.000000, reward total was -16.000000. running mean: -18.722968
episode 5959.000000, reward total was -16.000000. running mean: -18.695738
episode 5960.000000, reward total was -20.000000. running mean: -18.708781
episode 5961.000000, reward total was -20.000000. running mean: -18.721693
episode 5962.000000, reward total was -19.000000. running mean: -18.724476
episode 5963.000000, reward total was -17.000000. running mean: -18.707231
episode 5964.000000, reward total was -20.000000. running mean: -18.720159
episode 5965.000000, rewa

In [5]:
play_game(env, model)

See here for more information: https://www.gymlibrary.ml/content/api/[0m
  "The argument mode in render method is deprecated; "
  "No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps."


Episode finished without success, accumulated reward = -6.0
