In [1]:
import gym
import numpy as np
rm='Pong-v4'

In [2]:
%matplotlib inline
from matplotlib import animation
import matplotlib.pyplot as plt
from IPython.display import display, HTML

def display_frames_as_gif(frames):
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 144)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
    plt.close(anim._fig)
    display(HTML(anim.to_jshtml()))

In [3]:
from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make(rm)
# model initialization
H = 800 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
#learning_rate = 1e-3
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
  I=np.asarray(I)
  I = I[35:195] # crop
  
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  """ take 1D float array of rewards and compute discounted reward """
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):
  # preprocess the observation, set input to network to be difference image
  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
    # preprocess the observation, set input to network to be difference image
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

    # if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
    #   print('ep {}: game finished, reward: {}'.format(episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))

  deprecation(
  deprecation(


In [4]:
env.action_space

Discrete(6)

In [5]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
%time hist1 = train_model(env, model, total_episodes=6000)

  logger.deprecation(


episode 1.000000, reward total was -21.000000. running mean: -21.000000
episode 2.000000, reward total was -20.000000. running mean: -20.990000
episode 3.000000, reward total was -21.000000. running mean: -20.990100
episode 4.000000, reward total was -21.000000. running mean: -20.990199
episode 5.000000, reward total was -21.000000. running mean: -20.990297
episode 6.000000, reward total was -20.000000. running mean: -20.980394
episode 7.000000, reward total was -21.000000. running mean: -20.980590
episode 8.000000, reward total was -20.000000. running mean: -20.970784
episode 9.000000, reward total was -20.000000. running mean: -20.961076
episode 10.000000, reward total was -21.000000. running mean: -20.961466
episode 11.000000, reward total was -21.000000. running mean: -20.961851
episode 12.000000, reward total was -21.000000. running mean: -20.962232
episode 13.000000, reward total was -21.000000. running mean: -20.962610
episode 14.000000, reward total was -19.000000. running mean

episode 114.000000, reward total was -20.000000. running mean: -20.555413
episode 115.000000, reward total was -21.000000. running mean: -20.559859
episode 116.000000, reward total was -20.000000. running mean: -20.554260
episode 117.000000, reward total was -20.000000. running mean: -20.548718
episode 118.000000, reward total was -21.000000. running mean: -20.553230
episode 119.000000, reward total was -21.000000. running mean: -20.557698
episode 120.000000, reward total was -21.000000. running mean: -20.562121
episode 121.000000, reward total was -19.000000. running mean: -20.546500
episode 122.000000, reward total was -21.000000. running mean: -20.551035
episode 123.000000, reward total was -21.000000. running mean: -20.555525
episode 124.000000, reward total was -21.000000. running mean: -20.559969
episode 125.000000, reward total was -19.000000. running mean: -20.544370
episode 126.000000, reward total was -18.000000. running mean: -20.518926
episode 127.000000, reward total was -

episode 225.000000, reward total was -21.000000. running mean: -20.414214
episode 226.000000, reward total was -21.000000. running mean: -20.420072
episode 227.000000, reward total was -21.000000. running mean: -20.425871
episode 228.000000, reward total was -21.000000. running mean: -20.431612
episode 229.000000, reward total was -20.000000. running mean: -20.427296
episode 230.000000, reward total was -21.000000. running mean: -20.433023
episode 231.000000, reward total was -18.000000. running mean: -20.408693
episode 232.000000, reward total was -21.000000. running mean: -20.414606
episode 233.000000, reward total was -21.000000. running mean: -20.420460
episode 234.000000, reward total was -20.000000. running mean: -20.416256
episode 235.000000, reward total was -20.000000. running mean: -20.412093
episode 236.000000, reward total was -21.000000. running mean: -20.417972
episode 237.000000, reward total was -20.000000. running mean: -20.413792
episode 238.000000, reward total was -

episode 336.000000, reward total was -21.000000. running mean: -20.336317
episode 337.000000, reward total was -20.000000. running mean: -20.332954
episode 338.000000, reward total was -19.000000. running mean: -20.319625
episode 339.000000, reward total was -21.000000. running mean: -20.326428
episode 340.000000, reward total was -20.000000. running mean: -20.323164
episode 341.000000, reward total was -20.000000. running mean: -20.319932
episode 342.000000, reward total was -21.000000. running mean: -20.326733
episode 343.000000, reward total was -21.000000. running mean: -20.333466
episode 344.000000, reward total was -21.000000. running mean: -20.340131
episode 345.000000, reward total was -21.000000. running mean: -20.346730
episode 346.000000, reward total was -21.000000. running mean: -20.353263
episode 347.000000, reward total was -21.000000. running mean: -20.359730
episode 348.000000, reward total was -20.000000. running mean: -20.356133
episode 349.000000, reward total was -

episode 447.000000, reward total was -19.000000. running mean: -20.344200
episode 448.000000, reward total was -21.000000. running mean: -20.350758
episode 449.000000, reward total was -20.000000. running mean: -20.347251
episode 450.000000, reward total was -20.000000. running mean: -20.343778
episode 451.000000, reward total was -20.000000. running mean: -20.340340
episode 452.000000, reward total was -21.000000. running mean: -20.346937
episode 453.000000, reward total was -21.000000. running mean: -20.353468
episode 454.000000, reward total was -21.000000. running mean: -20.359933
episode 455.000000, reward total was -21.000000. running mean: -20.366334
episode 456.000000, reward total was -21.000000. running mean: -20.372670
episode 457.000000, reward total was -21.000000. running mean: -20.378944
episode 458.000000, reward total was -20.000000. running mean: -20.375154
episode 459.000000, reward total was -21.000000. running mean: -20.381403
episode 460.000000, reward total was -

episode 558.000000, reward total was -21.000000. running mean: -20.357698
episode 559.000000, reward total was -20.000000. running mean: -20.354121
episode 560.000000, reward total was -21.000000. running mean: -20.360580
episode 561.000000, reward total was -21.000000. running mean: -20.366974
episode 562.000000, reward total was -20.000000. running mean: -20.363305
episode 563.000000, reward total was -21.000000. running mean: -20.369672
episode 564.000000, reward total was -21.000000. running mean: -20.375975
episode 565.000000, reward total was -21.000000. running mean: -20.382215
episode 566.000000, reward total was -21.000000. running mean: -20.388393
episode 567.000000, reward total was -19.000000. running mean: -20.374509
episode 568.000000, reward total was -21.000000. running mean: -20.380764
episode 569.000000, reward total was -21.000000. running mean: -20.386956
episode 570.000000, reward total was -21.000000. running mean: -20.393087
episode 571.000000, reward total was -

episode 669.000000, reward total was -21.000000. running mean: -20.294671
episode 670.000000, reward total was -20.000000. running mean: -20.291724
episode 671.000000, reward total was -20.000000. running mean: -20.288807
episode 672.000000, reward total was -20.000000. running mean: -20.285919
episode 673.000000, reward total was -18.000000. running mean: -20.263060
episode 674.000000, reward total was -21.000000. running mean: -20.270429
episode 675.000000, reward total was -21.000000. running mean: -20.277725
episode 676.000000, reward total was -19.000000. running mean: -20.264948
episode 677.000000, reward total was -20.000000. running mean: -20.262298
episode 678.000000, reward total was -18.000000. running mean: -20.239675
episode 679.000000, reward total was -20.000000. running mean: -20.237278
episode 680.000000, reward total was -20.000000. running mean: -20.234906
episode 681.000000, reward total was -19.000000. running mean: -20.222557
episode 682.000000, reward total was -

episode 780.000000, reward total was -21.000000. running mean: -20.130924
episode 781.000000, reward total was -19.000000. running mean: -20.119615
episode 782.000000, reward total was -20.000000. running mean: -20.118419
episode 783.000000, reward total was -20.000000. running mean: -20.117235
episode 784.000000, reward total was -19.000000. running mean: -20.106062
episode 785.000000, reward total was -21.000000. running mean: -20.115002
episode 786.000000, reward total was -21.000000. running mean: -20.123852
episode 787.000000, reward total was -21.000000. running mean: -20.132613
episode 788.000000, reward total was -19.000000. running mean: -20.121287
episode 789.000000, reward total was -19.000000. running mean: -20.110074
episode 790.000000, reward total was -20.000000. running mean: -20.108973
episode 791.000000, reward total was -21.000000. running mean: -20.117884
episode 792.000000, reward total was -20.000000. running mean: -20.116705
episode 793.000000, reward total was -

episode 891.000000, reward total was -21.000000. running mean: -20.139061
episode 892.000000, reward total was -21.000000. running mean: -20.147670
episode 893.000000, reward total was -21.000000. running mean: -20.156194
episode 894.000000, reward total was -19.000000. running mean: -20.144632
episode 895.000000, reward total was -17.000000. running mean: -20.113185
episode 896.000000, reward total was -21.000000. running mean: -20.122053
episode 897.000000, reward total was -21.000000. running mean: -20.130833
episode 898.000000, reward total was -21.000000. running mean: -20.139525
episode 899.000000, reward total was -21.000000. running mean: -20.148129
episode 900.000000, reward total was -20.000000. running mean: -20.146648
episode 901.000000, reward total was -21.000000. running mean: -20.155182
episode 902.000000, reward total was -20.000000. running mean: -20.153630
episode 903.000000, reward total was -18.000000. running mean: -20.132093
episode 904.000000, reward total was -

episode 1002.000000, reward total was -20.000000. running mean: -20.173433
episode 1003.000000, reward total was -21.000000. running mean: -20.181699
episode 1004.000000, reward total was -21.000000. running mean: -20.189882
episode 1005.000000, reward total was -18.000000. running mean: -20.167983
episode 1006.000000, reward total was -20.000000. running mean: -20.166303
episode 1007.000000, reward total was -18.000000. running mean: -20.144640
episode 1008.000000, reward total was -21.000000. running mean: -20.153194
episode 1009.000000, reward total was -20.000000. running mean: -20.151662
episode 1010.000000, reward total was -21.000000. running mean: -20.160145
episode 1011.000000, reward total was -19.000000. running mean: -20.148544
episode 1012.000000, reward total was -19.000000. running mean: -20.137058
episode 1013.000000, reward total was -20.000000. running mean: -20.135688
episode 1014.000000, reward total was -21.000000. running mean: -20.144331
episode 1015.000000, rewa

episode 1112.000000, reward total was -21.000000. running mean: -20.078501
episode 1113.000000, reward total was -19.000000. running mean: -20.067716
episode 1114.000000, reward total was -19.000000. running mean: -20.057039
episode 1115.000000, reward total was -20.000000. running mean: -20.056469
episode 1116.000000, reward total was -21.000000. running mean: -20.065904
episode 1117.000000, reward total was -21.000000. running mean: -20.075245
episode 1118.000000, reward total was -19.000000. running mean: -20.064492
episode 1119.000000, reward total was -20.000000. running mean: -20.063848
episode 1120.000000, reward total was -21.000000. running mean: -20.073209
episode 1121.000000, reward total was -21.000000. running mean: -20.082477
episode 1122.000000, reward total was -20.000000. running mean: -20.081652
episode 1123.000000, reward total was -21.000000. running mean: -20.090836
episode 1124.000000, reward total was -21.000000. running mean: -20.099927
episode 1125.000000, rewa

episode 1222.000000, reward total was -21.000000. running mean: -20.143935
episode 1223.000000, reward total was -19.000000. running mean: -20.132495
episode 1224.000000, reward total was -20.000000. running mean: -20.131170
episode 1225.000000, reward total was -19.000000. running mean: -20.119859
episode 1226.000000, reward total was -20.000000. running mean: -20.118660
episode 1227.000000, reward total was -21.000000. running mean: -20.127473
episode 1228.000000, reward total was -21.000000. running mean: -20.136199
episode 1229.000000, reward total was -21.000000. running mean: -20.144837
episode 1230.000000, reward total was -20.000000. running mean: -20.143388
episode 1231.000000, reward total was -21.000000. running mean: -20.151954
episode 1232.000000, reward total was -19.000000. running mean: -20.140435
episode 1233.000000, reward total was -18.000000. running mean: -20.119030
episode 1234.000000, reward total was -20.000000. running mean: -20.117840
episode 1235.000000, rewa

episode 1332.000000, reward total was -21.000000. running mean: -20.003378
episode 1333.000000, reward total was -16.000000. running mean: -19.963345
episode 1334.000000, reward total was -21.000000. running mean: -19.973711
episode 1335.000000, reward total was -20.000000. running mean: -19.973974
episode 1336.000000, reward total was -20.000000. running mean: -19.974234
episode 1337.000000, reward total was -21.000000. running mean: -19.984492
episode 1338.000000, reward total was -21.000000. running mean: -19.994647
episode 1339.000000, reward total was -20.000000. running mean: -19.994701
episode 1340.000000, reward total was -21.000000. running mean: -20.004754
episode 1341.000000, reward total was -20.000000. running mean: -20.004706
episode 1342.000000, reward total was -21.000000. running mean: -20.014659
episode 1343.000000, reward total was -20.000000. running mean: -20.014512
episode 1344.000000, reward total was -19.000000. running mean: -20.004367
episode 1345.000000, rewa

episode 1442.000000, reward total was -21.000000. running mean: -19.987555
episode 1443.000000, reward total was -21.000000. running mean: -19.997680
episode 1444.000000, reward total was -21.000000. running mean: -20.007703
episode 1445.000000, reward total was -21.000000. running mean: -20.017626
episode 1446.000000, reward total was -20.000000. running mean: -20.017450
episode 1447.000000, reward total was -19.000000. running mean: -20.007275
episode 1448.000000, reward total was -18.000000. running mean: -19.987202
episode 1449.000000, reward total was -19.000000. running mean: -19.977330
episode 1450.000000, reward total was -21.000000. running mean: -19.987557
episode 1451.000000, reward total was -19.000000. running mean: -19.977682
episode 1452.000000, reward total was -21.000000. running mean: -19.987905
episode 1453.000000, reward total was -21.000000. running mean: -19.998026
episode 1454.000000, reward total was -19.000000. running mean: -19.988045
episode 1455.000000, rewa

episode 1552.000000, reward total was -19.000000. running mean: -19.900076
episode 1553.000000, reward total was -20.000000. running mean: -19.901075
episode 1554.000000, reward total was -21.000000. running mean: -19.912065
episode 1555.000000, reward total was -19.000000. running mean: -19.902944
episode 1556.000000, reward total was -21.000000. running mean: -19.913914
episode 1557.000000, reward total was -21.000000. running mean: -19.924775
episode 1558.000000, reward total was -19.000000. running mean: -19.915528
episode 1559.000000, reward total was -21.000000. running mean: -19.926372
episode 1560.000000, reward total was -20.000000. running mean: -19.927109
episode 1561.000000, reward total was -20.000000. running mean: -19.927837
episode 1562.000000, reward total was -21.000000. running mean: -19.938559
episode 1563.000000, reward total was -21.000000. running mean: -19.949174
episode 1564.000000, reward total was -19.000000. running mean: -19.939682
episode 1565.000000, rewa

episode 1662.000000, reward total was -21.000000. running mean: -19.870372
episode 1663.000000, reward total was -20.000000. running mean: -19.871668
episode 1664.000000, reward total was -21.000000. running mean: -19.882951
episode 1665.000000, reward total was -20.000000. running mean: -19.884122
episode 1666.000000, reward total was -21.000000. running mean: -19.895280
episode 1667.000000, reward total was -21.000000. running mean: -19.906328
episode 1668.000000, reward total was -18.000000. running mean: -19.887264
episode 1669.000000, reward total was -20.000000. running mean: -19.888392
episode 1670.000000, reward total was -21.000000. running mean: -19.899508
episode 1671.000000, reward total was -20.000000. running mean: -19.900513
episode 1672.000000, reward total was -21.000000. running mean: -19.911508
episode 1673.000000, reward total was -21.000000. running mean: -19.922393
episode 1674.000000, reward total was -21.000000. running mean: -19.933169
episode 1675.000000, rewa

episode 1772.000000, reward total was -21.000000. running mean: -19.862341
episode 1773.000000, reward total was -21.000000. running mean: -19.873718
episode 1774.000000, reward total was -19.000000. running mean: -19.864981
episode 1775.000000, reward total was -19.000000. running mean: -19.856331
episode 1776.000000, reward total was -20.000000. running mean: -19.857768
episode 1777.000000, reward total was -20.000000. running mean: -19.859190
episode 1778.000000, reward total was -18.000000. running mean: -19.840598
episode 1779.000000, reward total was -18.000000. running mean: -19.822192
episode 1780.000000, reward total was -19.000000. running mean: -19.813970
episode 1781.000000, reward total was -17.000000. running mean: -19.785831
episode 1782.000000, reward total was -21.000000. running mean: -19.797972
episode 1783.000000, reward total was -19.000000. running mean: -19.789993
episode 1784.000000, reward total was -21.000000. running mean: -19.802093
episode 1785.000000, rewa

episode 1882.000000, reward total was -20.000000. running mean: -19.977691
episode 1883.000000, reward total was -21.000000. running mean: -19.987914
episode 1884.000000, reward total was -20.000000. running mean: -19.988035
episode 1885.000000, reward total was -21.000000. running mean: -19.998154
episode 1886.000000, reward total was -20.000000. running mean: -19.998173
episode 1887.000000, reward total was -14.000000. running mean: -19.938191
episode 1888.000000, reward total was -18.000000. running mean: -19.918809
episode 1889.000000, reward total was -20.000000. running mean: -19.919621
episode 1890.000000, reward total was -19.000000. running mean: -19.910425
episode 1891.000000, reward total was -21.000000. running mean: -19.921321
episode 1892.000000, reward total was -20.000000. running mean: -19.922108
episode 1893.000000, reward total was -20.000000. running mean: -19.922886
episode 1894.000000, reward total was -20.000000. running mean: -19.923658
episode 1895.000000, rewa

episode 1992.000000, reward total was -21.000000. running mean: -19.895452
episode 1993.000000, reward total was -20.000000. running mean: -19.896497
episode 1994.000000, reward total was -17.000000. running mean: -19.867532
episode 1995.000000, reward total was -18.000000. running mean: -19.848857
episode 1996.000000, reward total was -17.000000. running mean: -19.820369
episode 1997.000000, reward total was -21.000000. running mean: -19.832165
episode 1998.000000, reward total was -19.000000. running mean: -19.823843
episode 1999.000000, reward total was -20.000000. running mean: -19.825605
episode 2000.000000, reward total was -19.000000. running mean: -19.817349
episode 2001.000000, reward total was -20.000000. running mean: -19.819175
episode 2002.000000, reward total was -17.000000. running mean: -19.790984
episode 2003.000000, reward total was -21.000000. running mean: -19.803074
episode 2004.000000, reward total was -21.000000. running mean: -19.815043
episode 2005.000000, rewa

episode 2102.000000, reward total was -19.000000. running mean: -19.689169
episode 2103.000000, reward total was -18.000000. running mean: -19.672278
episode 2104.000000, reward total was -19.000000. running mean: -19.665555
episode 2105.000000, reward total was -21.000000. running mean: -19.678899
episode 2106.000000, reward total was -19.000000. running mean: -19.672110
episode 2107.000000, reward total was -20.000000. running mean: -19.675389
episode 2108.000000, reward total was -20.000000. running mean: -19.678635
episode 2109.000000, reward total was -21.000000. running mean: -19.691849
episode 2110.000000, reward total was -18.000000. running mean: -19.674931
episode 2111.000000, reward total was -21.000000. running mean: -19.688181
episode 2112.000000, reward total was -21.000000. running mean: -19.701299
episode 2113.000000, reward total was -20.000000. running mean: -19.704286
episode 2114.000000, reward total was -20.000000. running mean: -19.707244
episode 2115.000000, rewa

episode 2212.000000, reward total was -19.000000. running mean: -19.578474
episode 2213.000000, reward total was -17.000000. running mean: -19.552690
episode 2214.000000, reward total was -18.000000. running mean: -19.537163
episode 2215.000000, reward total was -19.000000. running mean: -19.531791
episode 2216.000000, reward total was -20.000000. running mean: -19.536473
episode 2217.000000, reward total was -20.000000. running mean: -19.541109
episode 2218.000000, reward total was -19.000000. running mean: -19.535697
episode 2219.000000, reward total was -20.000000. running mean: -19.540340
episode 2220.000000, reward total was -20.000000. running mean: -19.544937
episode 2221.000000, reward total was -19.000000. running mean: -19.539488
episode 2222.000000, reward total was -18.000000. running mean: -19.524093
episode 2223.000000, reward total was -21.000000. running mean: -19.538852
episode 2224.000000, reward total was -19.000000. running mean: -19.533463
episode 2225.000000, rewa

episode 2322.000000, reward total was -19.000000. running mean: -19.512270
episode 2323.000000, reward total was -19.000000. running mean: -19.507148
episode 2324.000000, reward total was -20.000000. running mean: -19.512076
episode 2325.000000, reward total was -19.000000. running mean: -19.506955
episode 2326.000000, reward total was -18.000000. running mean: -19.491886
episode 2327.000000, reward total was -19.000000. running mean: -19.486967
episode 2328.000000, reward total was -21.000000. running mean: -19.502097
episode 2329.000000, reward total was -21.000000. running mean: -19.517076
episode 2330.000000, reward total was -21.000000. running mean: -19.531905
episode 2331.000000, reward total was -19.000000. running mean: -19.526586
episode 2332.000000, reward total was -20.000000. running mean: -19.531321
episode 2333.000000, reward total was -18.000000. running mean: -19.516007
episode 2334.000000, reward total was -21.000000. running mean: -19.530847
episode 2335.000000, rewa

episode 2432.000000, reward total was -20.000000. running mean: -19.367293
episode 2433.000000, reward total was -16.000000. running mean: -19.333621
episode 2434.000000, reward total was -21.000000. running mean: -19.350284
episode 2435.000000, reward total was -18.000000. running mean: -19.336781
episode 2436.000000, reward total was -21.000000. running mean: -19.353414
episode 2437.000000, reward total was -19.000000. running mean: -19.349880
episode 2438.000000, reward total was -20.000000. running mean: -19.356381
episode 2439.000000, reward total was -21.000000. running mean: -19.372817
episode 2440.000000, reward total was -19.000000. running mean: -19.369089
episode 2441.000000, reward total was -20.000000. running mean: -19.375398
episode 2442.000000, reward total was -21.000000. running mean: -19.391644
episode 2443.000000, reward total was -21.000000. running mean: -19.407727
episode 2444.000000, reward total was -16.000000. running mean: -19.373650
episode 2445.000000, rewa

episode 2542.000000, reward total was -20.000000. running mean: -19.634132
episode 2543.000000, reward total was -19.000000. running mean: -19.627791
episode 2544.000000, reward total was -21.000000. running mean: -19.641513
episode 2545.000000, reward total was -20.000000. running mean: -19.645098
episode 2546.000000, reward total was -20.000000. running mean: -19.648647
episode 2547.000000, reward total was -17.000000. running mean: -19.622161
episode 2548.000000, reward total was -20.000000. running mean: -19.625939
episode 2549.000000, reward total was -18.000000. running mean: -19.609680
episode 2550.000000, reward total was -21.000000. running mean: -19.623583
episode 2551.000000, reward total was -21.000000. running mean: -19.637347
episode 2552.000000, reward total was -20.000000. running mean: -19.640974
episode 2553.000000, reward total was -19.000000. running mean: -19.634564
episode 2554.000000, reward total was -21.000000. running mean: -19.648218
episode 2555.000000, rewa

episode 2652.000000, reward total was -21.000000. running mean: -19.422110
episode 2653.000000, reward total was -18.000000. running mean: -19.407889
episode 2654.000000, reward total was -19.000000. running mean: -19.403810
episode 2655.000000, reward total was -19.000000. running mean: -19.399772
episode 2656.000000, reward total was -19.000000. running mean: -19.395774
episode 2657.000000, reward total was -19.000000. running mean: -19.391816
episode 2658.000000, reward total was -17.000000. running mean: -19.367898
episode 2659.000000, reward total was -21.000000. running mean: -19.384219
episode 2660.000000, reward total was -21.000000. running mean: -19.400377
episode 2661.000000, reward total was -21.000000. running mean: -19.416373
episode 2662.000000, reward total was -17.000000. running mean: -19.392209
episode 2663.000000, reward total was -18.000000. running mean: -19.378287
episode 2664.000000, reward total was -19.000000. running mean: -19.374504
episode 2665.000000, rewa

episode 2762.000000, reward total was -19.000000. running mean: -19.164700
episode 2763.000000, reward total was -19.000000. running mean: -19.163053
episode 2764.000000, reward total was -19.000000. running mean: -19.161422
episode 2765.000000, reward total was -19.000000. running mean: -19.159808
episode 2766.000000, reward total was -18.000000. running mean: -19.148210
episode 2767.000000, reward total was -18.000000. running mean: -19.136728
episode 2768.000000, reward total was -17.000000. running mean: -19.115361
episode 2769.000000, reward total was -20.000000. running mean: -19.124207
episode 2770.000000, reward total was -19.000000. running mean: -19.122965
episode 2771.000000, reward total was -20.000000. running mean: -19.131735
episode 2772.000000, reward total was -20.000000. running mean: -19.140418
episode 2773.000000, reward total was -15.000000. running mean: -19.099014
episode 2774.000000, reward total was -19.000000. running mean: -19.098024
episode 2775.000000, rewa

episode 2872.000000, reward total was -19.000000. running mean: -19.120243
episode 2873.000000, reward total was -19.000000. running mean: -19.119040
episode 2874.000000, reward total was -19.000000. running mean: -19.117850
episode 2875.000000, reward total was -20.000000. running mean: -19.126671
episode 2876.000000, reward total was -15.000000. running mean: -19.085405
episode 2877.000000, reward total was -18.000000. running mean: -19.074551
episode 2878.000000, reward total was -18.000000. running mean: -19.063805
episode 2879.000000, reward total was -18.000000. running mean: -19.053167
episode 2880.000000, reward total was -21.000000. running mean: -19.072635
episode 2881.000000, reward total was -19.000000. running mean: -19.071909
episode 2882.000000, reward total was -17.000000. running mean: -19.051190
episode 2883.000000, reward total was -19.000000. running mean: -19.050678
episode 2884.000000, reward total was -17.000000. running mean: -19.030171
episode 2885.000000, rewa

episode 2982.000000, reward total was -17.000000. running mean: -19.014108
episode 2983.000000, reward total was -20.000000. running mean: -19.023967
episode 2984.000000, reward total was -17.000000. running mean: -19.003727
episode 2985.000000, reward total was -19.000000. running mean: -19.003690
episode 2986.000000, reward total was -17.000000. running mean: -18.983653
episode 2987.000000, reward total was -19.000000. running mean: -18.983817
episode 2988.000000, reward total was -19.000000. running mean: -18.983978
episode 2989.000000, reward total was -19.000000. running mean: -18.984139
episode 2990.000000, reward total was -20.000000. running mean: -18.994297
episode 2991.000000, reward total was -21.000000. running mean: -19.014354
episode 2992.000000, reward total was -18.000000. running mean: -19.004211
episode 2993.000000, reward total was -20.000000. running mean: -19.014169
episode 2994.000000, reward total was -21.000000. running mean: -19.034027
episode 2995.000000, rewa

episode 3092.000000, reward total was -17.000000. running mean: -18.832887
episode 3093.000000, reward total was -16.000000. running mean: -18.804558
episode 3094.000000, reward total was -20.000000. running mean: -18.816512
episode 3095.000000, reward total was -18.000000. running mean: -18.808347
episode 3096.000000, reward total was -18.000000. running mean: -18.800264
episode 3097.000000, reward total was -18.000000. running mean: -18.792261
episode 3098.000000, reward total was -20.000000. running mean: -18.804338
episode 3099.000000, reward total was -18.000000. running mean: -18.796295
episode 3100.000000, reward total was -18.000000. running mean: -18.788332
episode 3101.000000, reward total was -21.000000. running mean: -18.810449
episode 3102.000000, reward total was -18.000000. running mean: -18.802344
episode 3103.000000, reward total was -17.000000. running mean: -18.784321
episode 3104.000000, reward total was -17.000000. running mean: -18.766478
episode 3105.000000, rewa

episode 3202.000000, reward total was -20.000000. running mean: -18.782821
episode 3203.000000, reward total was -20.000000. running mean: -18.794993
episode 3204.000000, reward total was -19.000000. running mean: -18.797043
episode 3205.000000, reward total was -18.000000. running mean: -18.789072
episode 3206.000000, reward total was -21.000000. running mean: -18.811182
episode 3207.000000, reward total was -21.000000. running mean: -18.833070
episode 3208.000000, reward total was -16.000000. running mean: -18.804739
episode 3209.000000, reward total was -19.000000. running mean: -18.806692
episode 3210.000000, reward total was -20.000000. running mean: -18.818625
episode 3211.000000, reward total was -19.000000. running mean: -18.820439
episode 3212.000000, reward total was -16.000000. running mean: -18.792234
episode 3213.000000, reward total was -18.000000. running mean: -18.784312
episode 3214.000000, reward total was -18.000000. running mean: -18.776469
episode 3215.000000, rewa

episode 3312.000000, reward total was -19.000000. running mean: -18.759283
episode 3313.000000, reward total was -20.000000. running mean: -18.771690
episode 3314.000000, reward total was -18.000000. running mean: -18.763973
episode 3315.000000, reward total was -17.000000. running mean: -18.746334
episode 3316.000000, reward total was -19.000000. running mean: -18.748870
episode 3317.000000, reward total was -19.000000. running mean: -18.751382
episode 3318.000000, reward total was -16.000000. running mean: -18.723868
episode 3319.000000, reward total was -21.000000. running mean: -18.746629
episode 3320.000000, reward total was -18.000000. running mean: -18.739163
episode 3321.000000, reward total was -21.000000. running mean: -18.761771
episode 3322.000000, reward total was -14.000000. running mean: -18.714153
episode 3323.000000, reward total was -20.000000. running mean: -18.727012
episode 3324.000000, reward total was -20.000000. running mean: -18.739742
episode 3325.000000, rewa

episode 3422.000000, reward total was -21.000000. running mean: -18.718560
episode 3423.000000, reward total was -19.000000. running mean: -18.721374
episode 3424.000000, reward total was -19.000000. running mean: -18.724160
episode 3425.000000, reward total was -20.000000. running mean: -18.736919
episode 3426.000000, reward total was -19.000000. running mean: -18.739550
episode 3427.000000, reward total was -20.000000. running mean: -18.752154
episode 3428.000000, reward total was -18.000000. running mean: -18.744632
episode 3429.000000, reward total was -17.000000. running mean: -18.727186
episode 3430.000000, reward total was -16.000000. running mean: -18.699914
episode 3431.000000, reward total was -17.000000. running mean: -18.682915
episode 3432.000000, reward total was -17.000000. running mean: -18.666086
episode 3433.000000, reward total was -19.000000. running mean: -18.669425
episode 3434.000000, reward total was -20.000000. running mean: -18.682731
episode 3435.000000, rewa

episode 3532.000000, reward total was -17.000000. running mean: -18.503516
episode 3533.000000, reward total was -17.000000. running mean: -18.488481
episode 3534.000000, reward total was -19.000000. running mean: -18.493596
episode 3535.000000, reward total was -20.000000. running mean: -18.508660
episode 3536.000000, reward total was -17.000000. running mean: -18.493573
episode 3537.000000, reward total was -17.000000. running mean: -18.478638
episode 3538.000000, reward total was -18.000000. running mean: -18.473851
episode 3539.000000, reward total was -21.000000. running mean: -18.499113
episode 3540.000000, reward total was -19.000000. running mean: -18.504121
episode 3541.000000, reward total was -17.000000. running mean: -18.489080
episode 3542.000000, reward total was -19.000000. running mean: -18.494189
episode 3543.000000, reward total was -18.000000. running mean: -18.489248
episode 3544.000000, reward total was -18.000000. running mean: -18.484355
episode 3545.000000, rewa

episode 3642.000000, reward total was -19.000000. running mean: -18.352748
episode 3643.000000, reward total was -19.000000. running mean: -18.359221
episode 3644.000000, reward total was -19.000000. running mean: -18.365628
episode 3645.000000, reward total was -18.000000. running mean: -18.361972
episode 3646.000000, reward total was -20.000000. running mean: -18.378352
episode 3647.000000, reward total was -20.000000. running mean: -18.394569
episode 3648.000000, reward total was -19.000000. running mean: -18.400623
episode 3649.000000, reward total was -20.000000. running mean: -18.416617
episode 3650.000000, reward total was -21.000000. running mean: -18.442451
episode 3651.000000, reward total was -18.000000. running mean: -18.438026
episode 3652.000000, reward total was -20.000000. running mean: -18.453646
episode 3653.000000, reward total was -16.000000. running mean: -18.429110
episode 3654.000000, reward total was -16.000000. running mean: -18.404818
episode 3655.000000, rewa

episode 3752.000000, reward total was -18.000000. running mean: -18.188660
episode 3753.000000, reward total was -21.000000. running mean: -18.216773
episode 3754.000000, reward total was -19.000000. running mean: -18.224606
episode 3755.000000, reward total was -20.000000. running mean: -18.242360
episode 3756.000000, reward total was -17.000000. running mean: -18.229936
episode 3757.000000, reward total was -15.000000. running mean: -18.197637
episode 3758.000000, reward total was -18.000000. running mean: -18.195660
episode 3759.000000, reward total was -18.000000. running mean: -18.193704
episode 3760.000000, reward total was -19.000000. running mean: -18.201767
episode 3761.000000, reward total was -20.000000. running mean: -18.219749
episode 3762.000000, reward total was -21.000000. running mean: -18.247551
episode 3763.000000, reward total was -18.000000. running mean: -18.245076
episode 3764.000000, reward total was -18.000000. running mean: -18.242625
episode 3765.000000, rewa

episode 3862.000000, reward total was -19.000000. running mean: -18.274234
episode 3863.000000, reward total was -17.000000. running mean: -18.261492
episode 3864.000000, reward total was -20.000000. running mean: -18.278877
episode 3865.000000, reward total was -18.000000. running mean: -18.276088
episode 3866.000000, reward total was -20.000000. running mean: -18.293327
episode 3867.000000, reward total was -19.000000. running mean: -18.300394
episode 3868.000000, reward total was -20.000000. running mean: -18.317390
episode 3869.000000, reward total was -12.000000. running mean: -18.254216
episode 3870.000000, reward total was -20.000000. running mean: -18.271674
episode 3871.000000, reward total was -18.000000. running mean: -18.268957
episode 3872.000000, reward total was -18.000000. running mean: -18.266267
episode 3873.000000, reward total was -20.000000. running mean: -18.283605
episode 3874.000000, reward total was -18.000000. running mean: -18.280769
episode 3875.000000, rewa

episode 3972.000000, reward total was -15.000000. running mean: -18.051960
episode 3973.000000, reward total was -18.000000. running mean: -18.051440
episode 3974.000000, reward total was -18.000000. running mean: -18.050926
episode 3975.000000, reward total was -17.000000. running mean: -18.040416
episode 3976.000000, reward total was -19.000000. running mean: -18.050012
episode 3977.000000, reward total was -21.000000. running mean: -18.079512
episode 3978.000000, reward total was -20.000000. running mean: -18.098717
episode 3979.000000, reward total was -19.000000. running mean: -18.107730
episode 3980.000000, reward total was -19.000000. running mean: -18.116652
episode 3981.000000, reward total was -17.000000. running mean: -18.105486
episode 3982.000000, reward total was -15.000000. running mean: -18.074431
episode 3983.000000, reward total was -20.000000. running mean: -18.093687
episode 3984.000000, reward total was -20.000000. running mean: -18.112750
episode 3985.000000, rewa

episode 4082.000000, reward total was -18.000000. running mean: -17.903754
episode 4083.000000, reward total was -15.000000. running mean: -17.874716
episode 4084.000000, reward total was -15.000000. running mean: -17.845969
episode 4085.000000, reward total was -20.000000. running mean: -17.867510
episode 4086.000000, reward total was -17.000000. running mean: -17.858835
episode 4087.000000, reward total was -19.000000. running mean: -17.870246
episode 4088.000000, reward total was -20.000000. running mean: -17.891544
episode 4089.000000, reward total was -18.000000. running mean: -17.892628
episode 4090.000000, reward total was -17.000000. running mean: -17.883702
episode 4091.000000, reward total was -15.000000. running mean: -17.854865
episode 4092.000000, reward total was -14.000000. running mean: -17.816316
episode 4093.000000, reward total was -19.000000. running mean: -17.828153
episode 4094.000000, reward total was -17.000000. running mean: -17.819872
episode 4095.000000, rewa

episode 4192.000000, reward total was -17.000000. running mean: -17.700888
episode 4193.000000, reward total was -16.000000. running mean: -17.683879
episode 4194.000000, reward total was -16.000000. running mean: -17.667041
episode 4195.000000, reward total was -16.000000. running mean: -17.650370
episode 4196.000000, reward total was -19.000000. running mean: -17.663867
episode 4197.000000, reward total was -20.000000. running mean: -17.687228
episode 4198.000000, reward total was -14.000000. running mean: -17.650356
episode 4199.000000, reward total was -20.000000. running mean: -17.673852
episode 4200.000000, reward total was -18.000000. running mean: -17.677113
episode 4201.000000, reward total was -16.000000. running mean: -17.660342
episode 4202.000000, reward total was -18.000000. running mean: -17.663739
episode 4203.000000, reward total was -19.000000. running mean: -17.677102
episode 4204.000000, reward total was -18.000000. running mean: -17.680331
episode 4205.000000, rewa

episode 4302.000000, reward total was -18.000000. running mean: -17.392031
episode 4303.000000, reward total was -13.000000. running mean: -17.348111
episode 4304.000000, reward total was -15.000000. running mean: -17.324630
episode 4305.000000, reward total was -16.000000. running mean: -17.311383
episode 4306.000000, reward total was -17.000000. running mean: -17.308269
episode 4307.000000, reward total was -20.000000. running mean: -17.335187
episode 4308.000000, reward total was -17.000000. running mean: -17.331835
episode 4309.000000, reward total was -14.000000. running mean: -17.298517
episode 4310.000000, reward total was -19.000000. running mean: -17.315531
episode 4311.000000, reward total was -18.000000. running mean: -17.322376
episode 4312.000000, reward total was -13.000000. running mean: -17.279152
episode 4313.000000, reward total was -19.000000. running mean: -17.296361
episode 4314.000000, reward total was -16.000000. running mean: -17.283397
episode 4315.000000, rewa

episode 4412.000000, reward total was -19.000000. running mean: -17.646139
episode 4413.000000, reward total was -15.000000. running mean: -17.619678
episode 4414.000000, reward total was -19.000000. running mean: -17.633481
episode 4415.000000, reward total was -19.000000. running mean: -17.647146
episode 4416.000000, reward total was -18.000000. running mean: -17.650675
episode 4417.000000, reward total was -19.000000. running mean: -17.664168
episode 4418.000000, reward total was -17.000000. running mean: -17.657526
episode 4419.000000, reward total was -14.000000. running mean: -17.620951
episode 4420.000000, reward total was -13.000000. running mean: -17.574742
episode 4421.000000, reward total was -16.000000. running mean: -17.558994
episode 4422.000000, reward total was -16.000000. running mean: -17.543404
episode 4423.000000, reward total was -19.000000. running mean: -17.557970
episode 4424.000000, reward total was -19.000000. running mean: -17.572391
episode 4425.000000, rewa

episode 4522.000000, reward total was -15.000000. running mean: -17.228241
episode 4523.000000, reward total was -15.000000. running mean: -17.205958
episode 4524.000000, reward total was -18.000000. running mean: -17.213899
episode 4525.000000, reward total was -19.000000. running mean: -17.231760
episode 4526.000000, reward total was -18.000000. running mean: -17.239442
episode 4527.000000, reward total was -18.000000. running mean: -17.247048
episode 4528.000000, reward total was -15.000000. running mean: -17.224577
episode 4529.000000, reward total was -18.000000. running mean: -17.232332
episode 4530.000000, reward total was -21.000000. running mean: -17.270008
episode 4531.000000, reward total was -14.000000. running mean: -17.237308
episode 4532.000000, reward total was -16.000000. running mean: -17.224935
episode 4533.000000, reward total was -21.000000. running mean: -17.262686
episode 4534.000000, reward total was -18.000000. running mean: -17.270059
episode 4535.000000, rewa

episode 4632.000000, reward total was -17.000000. running mean: -17.173953
episode 4633.000000, reward total was -21.000000. running mean: -17.212213
episode 4634.000000, reward total was -13.000000. running mean: -17.170091
episode 4635.000000, reward total was -19.000000. running mean: -17.188390
episode 4636.000000, reward total was -13.000000. running mean: -17.146506
episode 4637.000000, reward total was -20.000000. running mean: -17.175041
episode 4638.000000, reward total was -16.000000. running mean: -17.163291
episode 4639.000000, reward total was -16.000000. running mean: -17.151658
episode 4640.000000, reward total was -18.000000. running mean: -17.160141
episode 4641.000000, reward total was -19.000000. running mean: -17.178540
episode 4642.000000, reward total was -16.000000. running mean: -17.166754
episode 4643.000000, reward total was -19.000000. running mean: -17.185087
episode 4644.000000, reward total was -19.000000. running mean: -17.203236
episode 4645.000000, rewa

episode 4742.000000, reward total was -17.000000. running mean: -16.921706
episode 4743.000000, reward total was -19.000000. running mean: -16.942489
episode 4744.000000, reward total was -20.000000. running mean: -16.973064
episode 4745.000000, reward total was -20.000000. running mean: -17.003333
episode 4746.000000, reward total was -14.000000. running mean: -16.973300
episode 4747.000000, reward total was -18.000000. running mean: -16.983567
episode 4748.000000, reward total was -15.000000. running mean: -16.963731
episode 4749.000000, reward total was -18.000000. running mean: -16.974094
episode 4750.000000, reward total was -16.000000. running mean: -16.964353
episode 4751.000000, reward total was -16.000000. running mean: -16.954709
episode 4752.000000, reward total was -18.000000. running mean: -16.965162
episode 4753.000000, reward total was -19.000000. running mean: -16.985511
episode 4754.000000, reward total was -17.000000. running mean: -16.985656
episode 4755.000000, rewa

episode 4852.000000, reward total was -15.000000. running mean: -16.707003
episode 4853.000000, reward total was -18.000000. running mean: -16.719933
episode 4854.000000, reward total was -15.000000. running mean: -16.702733
episode 4855.000000, reward total was -17.000000. running mean: -16.705706
episode 4856.000000, reward total was -13.000000. running mean: -16.668649
episode 4857.000000, reward total was -16.000000. running mean: -16.661962
episode 4858.000000, reward total was -18.000000. running mean: -16.675343
episode 4859.000000, reward total was -15.000000. running mean: -16.658589
episode 4860.000000, reward total was -21.000000. running mean: -16.702003
episode 4861.000000, reward total was -19.000000. running mean: -16.724983
episode 4862.000000, reward total was -17.000000. running mean: -16.727734
episode 4863.000000, reward total was -18.000000. running mean: -16.740456
episode 4864.000000, reward total was -17.000000. running mean: -16.743052
episode 4865.000000, rewa

episode 4962.000000, reward total was -17.000000. running mean: -16.773766
episode 4963.000000, reward total was -18.000000. running mean: -16.786028
episode 4964.000000, reward total was -14.000000. running mean: -16.758168
episode 4965.000000, reward total was -16.000000. running mean: -16.750586
episode 4966.000000, reward total was -15.000000. running mean: -16.733080
episode 4967.000000, reward total was -20.000000. running mean: -16.765749
episode 4968.000000, reward total was -17.000000. running mean: -16.768092
episode 4969.000000, reward total was -16.000000. running mean: -16.760411
episode 4970.000000, reward total was -17.000000. running mean: -16.762807
episode 4971.000000, reward total was -18.000000. running mean: -16.775179
episode 4972.000000, reward total was -15.000000. running mean: -16.757427
episode 4973.000000, reward total was -15.000000. running mean: -16.739853
episode 4974.000000, reward total was -15.000000. running mean: -16.722454
episode 4975.000000, rewa

episode 5072.000000, reward total was -17.000000. running mean: -16.592024
episode 5073.000000, reward total was -19.000000. running mean: -16.616104
episode 5074.000000, reward total was -19.000000. running mean: -16.639943
episode 5075.000000, reward total was -16.000000. running mean: -16.633543
episode 5076.000000, reward total was -15.000000. running mean: -16.617208
episode 5077.000000, reward total was -19.000000. running mean: -16.641036
episode 5078.000000, reward total was -18.000000. running mean: -16.654625
episode 5079.000000, reward total was -17.000000. running mean: -16.658079
episode 5080.000000, reward total was -13.000000. running mean: -16.621498
episode 5081.000000, reward total was -18.000000. running mean: -16.635283
episode 5082.000000, reward total was -16.000000. running mean: -16.628930
episode 5083.000000, reward total was -17.000000. running mean: -16.632641
episode 5084.000000, reward total was -17.000000. running mean: -16.636315
episode 5085.000000, rewa

episode 5182.000000, reward total was -15.000000. running mean: -16.255468
episode 5183.000000, reward total was -14.000000. running mean: -16.232913
episode 5184.000000, reward total was -16.000000. running mean: -16.230584
episode 5185.000000, reward total was -15.000000. running mean: -16.218278
episode 5186.000000, reward total was -13.000000. running mean: -16.186095
episode 5187.000000, reward total was -13.000000. running mean: -16.154234
episode 5188.000000, reward total was -17.000000. running mean: -16.162692
episode 5189.000000, reward total was -13.000000. running mean: -16.131065
episode 5190.000000, reward total was -16.000000. running mean: -16.129754
episode 5191.000000, reward total was -19.000000. running mean: -16.158457
episode 5192.000000, reward total was -21.000000. running mean: -16.206872
episode 5193.000000, reward total was -17.000000. running mean: -16.214803
episode 5194.000000, reward total was -17.000000. running mean: -16.222655
episode 5195.000000, rewa

episode 5292.000000, reward total was -15.000000. running mean: -15.879504
episode 5293.000000, reward total was -10.000000. running mean: -15.820709
episode 5294.000000, reward total was -17.000000. running mean: -15.832502
episode 5295.000000, reward total was -18.000000. running mean: -15.854177
episode 5296.000000, reward total was -15.000000. running mean: -15.845635
episode 5297.000000, reward total was -17.000000. running mean: -15.857178
episode 5298.000000, reward total was -17.000000. running mean: -15.868607
episode 5299.000000, reward total was -17.000000. running mean: -15.879921
episode 5300.000000, reward total was -17.000000. running mean: -15.891121
episode 5301.000000, reward total was -19.000000. running mean: -15.922210
episode 5302.000000, reward total was -18.000000. running mean: -15.942988
episode 5303.000000, reward total was -17.000000. running mean: -15.953558
episode 5304.000000, reward total was -17.000000. running mean: -15.964023
episode 5305.000000, rewa

episode 5402.000000, reward total was -15.000000. running mean: -15.774470
episode 5403.000000, reward total was -20.000000. running mean: -15.816726
episode 5404.000000, reward total was -19.000000. running mean: -15.848558
episode 5405.000000, reward total was -17.000000. running mean: -15.860073
episode 5406.000000, reward total was -17.000000. running mean: -15.871472
episode 5407.000000, reward total was -13.000000. running mean: -15.842757
episode 5408.000000, reward total was -15.000000. running mean: -15.834330
episode 5409.000000, reward total was -15.000000. running mean: -15.825986
episode 5410.000000, reward total was -13.000000. running mean: -15.797727
episode 5411.000000, reward total was -16.000000. running mean: -15.799749
episode 5412.000000, reward total was -15.000000. running mean: -15.791752
episode 5413.000000, reward total was -15.000000. running mean: -15.783834
episode 5414.000000, reward total was -19.000000. running mean: -15.815996
episode 5415.000000, rewa

episode 5512.000000, reward total was -18.000000. running mean: -15.590828
episode 5513.000000, reward total was -13.000000. running mean: -15.564920
episode 5514.000000, reward total was -18.000000. running mean: -15.589271
episode 5515.000000, reward total was -9.000000. running mean: -15.523378
episode 5516.000000, reward total was -12.000000. running mean: -15.488144
episode 5517.000000, reward total was -15.000000. running mean: -15.483263
episode 5518.000000, reward total was -19.000000. running mean: -15.518430
episode 5519.000000, reward total was -17.000000. running mean: -15.533246
episode 5520.000000, reward total was -15.000000. running mean: -15.527913
episode 5521.000000, reward total was -17.000000. running mean: -15.542634
episode 5522.000000, reward total was -17.000000. running mean: -15.557208
episode 5523.000000, reward total was -18.000000. running mean: -15.581636
episode 5524.000000, reward total was -14.000000. running mean: -15.565819
episode 5525.000000, rewar

episode 5622.000000, reward total was -15.000000. running mean: -14.888647
episode 5623.000000, reward total was -12.000000. running mean: -14.859761
episode 5624.000000, reward total was -16.000000. running mean: -14.871163
episode 5625.000000, reward total was -13.000000. running mean: -14.852452
episode 5626.000000, reward total was -16.000000. running mean: -14.863927
episode 5627.000000, reward total was -15.000000. running mean: -14.865288
episode 5628.000000, reward total was -18.000000. running mean: -14.896635
episode 5629.000000, reward total was -19.000000. running mean: -14.937669
episode 5630.000000, reward total was -15.000000. running mean: -14.938292
episode 5631.000000, reward total was -15.000000. running mean: -14.938909
episode 5632.000000, reward total was -13.000000. running mean: -14.919520
episode 5633.000000, reward total was -17.000000. running mean: -14.940325
episode 5634.000000, reward total was -18.000000. running mean: -14.970922
episode 5635.000000, rewa

episode 5732.000000, reward total was -14.000000. running mean: -14.925000
episode 5733.000000, reward total was -12.000000. running mean: -14.895750
episode 5734.000000, reward total was -14.000000. running mean: -14.886792
episode 5735.000000, reward total was -14.000000. running mean: -14.877924
episode 5736.000000, reward total was -17.000000. running mean: -14.899145
episode 5737.000000, reward total was -13.000000. running mean: -14.880153
episode 5738.000000, reward total was -14.000000. running mean: -14.871352
episode 5739.000000, reward total was -18.000000. running mean: -14.902638
episode 5740.000000, reward total was -16.000000. running mean: -14.913612
episode 5741.000000, reward total was -17.000000. running mean: -14.934476
episode 5742.000000, reward total was -15.000000. running mean: -14.935131
episode 5743.000000, reward total was -17.000000. running mean: -14.955780
episode 5744.000000, reward total was -18.000000. running mean: -14.986222
episode 5745.000000, rewa

episode 5842.000000, reward total was -16.000000. running mean: -14.910850
episode 5843.000000, reward total was -17.000000. running mean: -14.931742
episode 5844.000000, reward total was -16.000000. running mean: -14.942424
episode 5845.000000, reward total was -16.000000. running mean: -14.953000
episode 5846.000000, reward total was -13.000000. running mean: -14.933470
episode 5847.000000, reward total was -8.000000. running mean: -14.864135
episode 5848.000000, reward total was -17.000000. running mean: -14.885494
episode 5849.000000, reward total was -19.000000. running mean: -14.926639
episode 5850.000000, reward total was -13.000000. running mean: -14.907373
episode 5851.000000, reward total was -9.000000. running mean: -14.848299
episode 5852.000000, reward total was -15.000000. running mean: -14.849816
episode 5853.000000, reward total was -12.000000. running mean: -14.821318
episode 5854.000000, reward total was -16.000000. running mean: -14.833104
episode 5855.000000, reward

episode 5952.000000, reward total was -17.000000. running mean: -14.881753
episode 5953.000000, reward total was -12.000000. running mean: -14.852936
episode 5954.000000, reward total was -10.000000. running mean: -14.804406
episode 5955.000000, reward total was -17.000000. running mean: -14.826362
episode 5956.000000, reward total was -13.000000. running mean: -14.808099
episode 5957.000000, reward total was -14.000000. running mean: -14.800018
episode 5958.000000, reward total was -15.000000. running mean: -14.802018
episode 5959.000000, reward total was -13.000000. running mean: -14.783997
episode 5960.000000, reward total was -9.000000. running mean: -14.726157
episode 5961.000000, reward total was -20.000000. running mean: -14.778896
episode 5962.000000, reward total was -18.000000. running mean: -14.811107
episode 5963.000000, reward total was -18.000000. running mean: -14.842996
episode 5964.000000, reward total was -16.000000. running mean: -14.854566
episode 5965.000000, rewar

In [7]:
play_game(env, model)

See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.warn(


Episode finished without success, accumulated reward = -3.0
