In [1]:
import gym
import numpy as np
rm='Pong-v0'

In [2]:
%matplotlib inline
from matplotlib import animation
import matplotlib.pyplot as plt
from IPython.display import display, HTML

def display_frames_as_gif(frames):
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 144)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
    plt.close(anim._fig)
    display(HTML(anim.to_jshtml()))

In [3]:
from gym.wrappers import AtariPreprocessing
gym.new_step_api=True
env = gym.make(rm)
# model initialization
H = 200 # number of hidden layer neurons
D = 80 * 80 # input dimensionality: 80x80 grid
model = {}
model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
model['W2'] = np.random.randn(H) / np.sqrt(H)
# hyperparameters
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-1
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  I=np.asarray(I)
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  
  I[I == 144] = 0 # erase background (background type 1)
  
  I[I == 109] = 0 # erase background (background type 2)
  
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(float).ravel()

def discount_rewards(r):
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(epx, eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

def model_step(model, observation, prev_x):
  # preprocess the observation, set input to network to be difference image
  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x
  
  # forward the policy network and sample an action from the returned probability
  aprob, _ = policy_forward(x)
  action = 2 if aprob >= 0.5 else 3 # roll the dice!
  
  return action, prev_x

def play_game(env, model):
  observation = env.reset()

  frames = []
  cumulated_reward = 0

  prev_x = None # used in computing the difference frame

  for t in range(1000):
      frames.append(env.render(mode = 'rgb_array'))
      action, prev_x = model_step(model, observation, prev_x)
      observation, reward, done, info = env.step(action)
      cumulated_reward += reward
      if done:
          print("Episode finished after {} timesteps, accumulated reward = {}".format(t+1, cumulated_reward))
          break
  print("Episode finished without success, accumulated reward = {}".format(cumulated_reward))
  env.close()
  display_frames_as_gif(frames)

def train_model(env, model, total_episodes = 100):
  hist = []
  observation = env.reset()

  prev_x = None # used in computing the difference frame
  xs,hs,dlogps,drs = [],[],[],[]
  running_reward = None
  reward_sum = 0
  episode_number = 0

  while True:
    # preprocess the observation, set input to network to be difference image
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    # forward the policy network and sample an action from the returned probability
    aprob, h = policy_forward(x)
    action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

    # record various intermediates (needed later for backprop)
    xs.append(x) # observation
    hs.append(h) # hidden state
    y = 1 if action == 2 else 0 # a "fake label"
    dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

    # step the environment and get new measurements
    observation, reward, done, info = env.step(action)
    reward_sum += reward

    drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

    if done: # an episode finished
      episode_number += 1

      # stack together all inputs, hidden states, action gradients, and rewards for this episode
      epx = np.vstack(xs)
      eph = np.vstack(hs)
      epdlogp = np.vstack(dlogps)
      epr = np.vstack(drs)
      xs,hs,dlogps,drs = [],[],[],[] # reset array memory

      # compute the discounted reward backwards through time
      discounted_epr = discount_rewards(epr)
      # standardize the rewards to be unit normal (helps control the gradient estimator variance)
      discounted_epr -= np.mean(discounted_epr)
      discounted_epr /= np.std(discounted_epr)

      epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
      grad = policy_backward(epx, eph, epdlogp)
      for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

      # perform rmsprop parameter update every batch_size episodes
      if episode_number % batch_size == 0:
        for k,v in model.items():
          g = grad_buffer[k] # gradient
          rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
          model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
          grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

      # boring book-keeping
      running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
      hist.append((episode_number, reward_sum, running_reward))
      print ('episode %f, reward total was %f. running mean: %f' % (episode_number, reward_sum, running_reward))
      reward_sum = 0
      observation = env.reset() # reset env
      prev_x = None
      if episode_number == total_episodes: 
        return hist

    # if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
    #   print('ep {}: game finished, reward: {}'.format(episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))

  logger.warn(
  deprecation(
  deprecation(


In [4]:
%time hist1 = train_model(env, model, total_episodes=6000)

  logger.deprecation(


episode 1.000000, reward total was -20.000000. running mean: -20.000000
episode 2.000000, reward total was -20.000000. running mean: -20.000000
episode 3.000000, reward total was -21.000000. running mean: -20.010000
episode 4.000000, reward total was -21.000000. running mean: -20.019900
episode 5.000000, reward total was -21.000000. running mean: -20.029701
episode 6.000000, reward total was -20.000000. running mean: -20.029404
episode 7.000000, reward total was -21.000000. running mean: -20.039110
episode 8.000000, reward total was -21.000000. running mean: -20.048719
episode 9.000000, reward total was -20.000000. running mean: -20.048232
episode 10.000000, reward total was -21.000000. running mean: -20.057749


  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]


episode 11.000000, reward total was -21.000000. running mean: -20.067172
episode 12.000000, reward total was -21.000000. running mean: -20.076500
episode 13.000000, reward total was -21.000000. running mean: -20.085735
episode 14.000000, reward total was -21.000000. running mean: -20.094878
episode 15.000000, reward total was -21.000000. running mean: -20.103929
episode 16.000000, reward total was -21.000000. running mean: -20.112890
episode 17.000000, reward total was -21.000000. running mean: -20.121761
episode 18.000000, reward total was -21.000000. running mean: -20.130543
episode 19.000000, reward total was -21.000000. running mean: -20.139238
episode 20.000000, reward total was -21.000000. running mean: -20.147845
episode 21.000000, reward total was -21.000000. running mean: -20.156367
episode 22.000000, reward total was -21.000000. running mean: -20.164803
episode 23.000000, reward total was -21.000000. running mean: -20.173155
episode 24.000000, reward total was -21.000000. run

episode 123.000000, reward total was -21.000000. running mean: -20.697348
episode 124.000000, reward total was -21.000000. running mean: -20.700375
episode 125.000000, reward total was -21.000000. running mean: -20.703371
episode 126.000000, reward total was -21.000000. running mean: -20.706337
episode 127.000000, reward total was -21.000000. running mean: -20.709274
episode 128.000000, reward total was -21.000000. running mean: -20.712181
episode 129.000000, reward total was -21.000000. running mean: -20.715059
episode 130.000000, reward total was -21.000000. running mean: -20.717909
episode 131.000000, reward total was -21.000000. running mean: -20.720730
episode 132.000000, reward total was -21.000000. running mean: -20.723522
episode 133.000000, reward total was -21.000000. running mean: -20.726287
episode 134.000000, reward total was -21.000000. running mean: -20.729024
episode 135.000000, reward total was -21.000000. running mean: -20.731734
episode 136.000000, reward total was -

episode 234.000000, reward total was -21.000000. running mean: -20.900814
episode 235.000000, reward total was -21.000000. running mean: -20.901806
episode 236.000000, reward total was -21.000000. running mean: -20.902788
episode 237.000000, reward total was -21.000000. running mean: -20.903760
episode 238.000000, reward total was -21.000000. running mean: -20.904722
episode 239.000000, reward total was -21.000000. running mean: -20.905675
episode 240.000000, reward total was -21.000000. running mean: -20.906618
episode 241.000000, reward total was -21.000000. running mean: -20.907552
episode 242.000000, reward total was -21.000000. running mean: -20.908477
episode 243.000000, reward total was -21.000000. running mean: -20.909392
episode 244.000000, reward total was -21.000000. running mean: -20.910298
episode 245.000000, reward total was -21.000000. running mean: -20.911195
episode 246.000000, reward total was -21.000000. running mean: -20.912083
episode 247.000000, reward total was -

episode 345.000000, reward total was -21.000000. running mean: -20.967495
episode 346.000000, reward total was -21.000000. running mean: -20.967820
episode 347.000000, reward total was -21.000000. running mean: -20.968141
episode 348.000000, reward total was -21.000000. running mean: -20.968460
episode 349.000000, reward total was -21.000000. running mean: -20.968775
episode 350.000000, reward total was -21.000000. running mean: -20.969088
episode 351.000000, reward total was -21.000000. running mean: -20.969397
episode 352.000000, reward total was -21.000000. running mean: -20.969703
episode 353.000000, reward total was -21.000000. running mean: -20.970006
episode 354.000000, reward total was -21.000000. running mean: -20.970306
episode 355.000000, reward total was -21.000000. running mean: -20.970603
episode 356.000000, reward total was -21.000000. running mean: -20.970897
episode 357.000000, reward total was -21.000000. running mean: -20.971188
episode 358.000000, reward total was -

episode 456.000000, reward total was -21.000000. running mean: -20.989347
episode 457.000000, reward total was -21.000000. running mean: -20.989454
episode 458.000000, reward total was -21.000000. running mean: -20.989559
episode 459.000000, reward total was -21.000000. running mean: -20.989664
episode 460.000000, reward total was -21.000000. running mean: -20.989767
episode 461.000000, reward total was -21.000000. running mean: -20.989869
episode 462.000000, reward total was -21.000000. running mean: -20.989971
episode 463.000000, reward total was -21.000000. running mean: -20.990071
episode 464.000000, reward total was -21.000000. running mean: -20.990170
episode 465.000000, reward total was -21.000000. running mean: -20.990268
episode 466.000000, reward total was -21.000000. running mean: -20.990366
episode 467.000000, reward total was -21.000000. running mean: -20.990462
episode 468.000000, reward total was -21.000000. running mean: -20.990558
episode 469.000000, reward total was -

episode 567.000000, reward total was -21.000000. running mean: -20.996509
episode 568.000000, reward total was -21.000000. running mean: -20.996544
episode 569.000000, reward total was -21.000000. running mean: -20.996578
episode 570.000000, reward total was -21.000000. running mean: -20.996613
episode 571.000000, reward total was -21.000000. running mean: -20.996646
episode 572.000000, reward total was -21.000000. running mean: -20.996680
episode 573.000000, reward total was -21.000000. running mean: -20.996713
episode 574.000000, reward total was -21.000000. running mean: -20.996746
episode 575.000000, reward total was -21.000000. running mean: -20.996779
episode 576.000000, reward total was -21.000000. running mean: -20.996811
episode 577.000000, reward total was -21.000000. running mean: -20.996843
episode 578.000000, reward total was -21.000000. running mean: -20.996874
episode 579.000000, reward total was -21.000000. running mean: -20.996905
episode 580.000000, reward total was -

episode 678.000000, reward total was -21.000000. running mean: -20.998856
episode 679.000000, reward total was -21.000000. running mean: -20.998867
episode 680.000000, reward total was -21.000000. running mean: -20.998879
episode 681.000000, reward total was -21.000000. running mean: -20.998890
episode 682.000000, reward total was -21.000000. running mean: -20.998901
episode 683.000000, reward total was -21.000000. running mean: -20.998912
episode 684.000000, reward total was -21.000000. running mean: -20.998923
episode 685.000000, reward total was -21.000000. running mean: -20.998934
episode 686.000000, reward total was -21.000000. running mean: -20.998944
episode 687.000000, reward total was -21.000000. running mean: -20.998955
episode 688.000000, reward total was -21.000000. running mean: -20.998965
episode 689.000000, reward total was -21.000000. running mean: -20.998976
episode 690.000000, reward total was -21.000000. running mean: -20.998986
episode 691.000000, reward total was -

episode 789.000000, reward total was -21.000000. running mean: -20.999625
episode 790.000000, reward total was -21.000000. running mean: -20.999629
episode 791.000000, reward total was -21.000000. running mean: -20.999633
episode 792.000000, reward total was -21.000000. running mean: -20.999636
episode 793.000000, reward total was -21.000000. running mean: -20.999640
episode 794.000000, reward total was -21.000000. running mean: -20.999643
episode 795.000000, reward total was -21.000000. running mean: -20.999647
episode 796.000000, reward total was -21.000000. running mean: -20.999651
episode 797.000000, reward total was -21.000000. running mean: -20.999654
episode 798.000000, reward total was -21.000000. running mean: -20.999657
episode 799.000000, reward total was -21.000000. running mean: -20.999661
episode 800.000000, reward total was -21.000000. running mean: -20.999664
episode 801.000000, reward total was -21.000000. running mean: -20.999668
episode 802.000000, reward total was -

episode 900.000000, reward total was -21.000000. running mean: -20.999877
episode 901.000000, reward total was -21.000000. running mean: -20.999878
episode 902.000000, reward total was -21.000000. running mean: -20.999880
episode 903.000000, reward total was -21.000000. running mean: -20.999881
episode 904.000000, reward total was -21.000000. running mean: -20.999882
episode 905.000000, reward total was -21.000000. running mean: -20.999883
episode 906.000000, reward total was -21.000000. running mean: -20.999884
episode 907.000000, reward total was -21.000000. running mean: -20.999885
episode 908.000000, reward total was -21.000000. running mean: -20.999887
episode 909.000000, reward total was -21.000000. running mean: -20.999888
episode 910.000000, reward total was -21.000000. running mean: -20.999889
episode 911.000000, reward total was -21.000000. running mean: -20.999890
episode 912.000000, reward total was -21.000000. running mean: -20.999891
episode 913.000000, reward total was -

episode 1011.000000, reward total was -21.000000. running mean: -20.999960
episode 1012.000000, reward total was -21.000000. running mean: -20.999960
episode 1013.000000, reward total was -21.000000. running mean: -20.999961
episode 1014.000000, reward total was -21.000000. running mean: -20.999961
episode 1015.000000, reward total was -21.000000. running mean: -20.999961
episode 1016.000000, reward total was -21.000000. running mean: -20.999962
episode 1017.000000, reward total was -21.000000. running mean: -20.999962
episode 1018.000000, reward total was -21.000000. running mean: -20.999962
episode 1019.000000, reward total was -21.000000. running mean: -20.999963
episode 1020.000000, reward total was -21.000000. running mean: -20.999963
episode 1021.000000, reward total was -21.000000. running mean: -20.999964
episode 1022.000000, reward total was -21.000000. running mean: -20.999964
episode 1023.000000, reward total was -21.000000. running mean: -20.999964
episode 1024.000000, rewa

episode 1121.000000, reward total was -21.000000. running mean: -20.999987
episode 1122.000000, reward total was -21.000000. running mean: -20.999987
episode 1123.000000, reward total was -21.000000. running mean: -20.999987
episode 1124.000000, reward total was -21.000000. running mean: -20.999987
episode 1125.000000, reward total was -21.000000. running mean: -20.999987
episode 1126.000000, reward total was -21.000000. running mean: -20.999987
episode 1127.000000, reward total was -21.000000. running mean: -20.999987
episode 1128.000000, reward total was -21.000000. running mean: -20.999988
episode 1129.000000, reward total was -21.000000. running mean: -20.999988
episode 1130.000000, reward total was -21.000000. running mean: -20.999988
episode 1131.000000, reward total was -21.000000. running mean: -20.999988
episode 1132.000000, reward total was -21.000000. running mean: -20.999988
episode 1133.000000, reward total was -21.000000. running mean: -20.999988
episode 1134.000000, rewa

episode 1231.000000, reward total was -21.000000. running mean: -20.999996
episode 1232.000000, reward total was -21.000000. running mean: -20.999996
episode 1233.000000, reward total was -21.000000. running mean: -20.999996
episode 1234.000000, reward total was -21.000000. running mean: -20.999996
episode 1235.000000, reward total was -21.000000. running mean: -20.999996
episode 1236.000000, reward total was -21.000000. running mean: -20.999996
episode 1237.000000, reward total was -21.000000. running mean: -20.999996
episode 1238.000000, reward total was -21.000000. running mean: -20.999996
episode 1239.000000, reward total was -21.000000. running mean: -20.999996
episode 1240.000000, reward total was -21.000000. running mean: -20.999996
episode 1241.000000, reward total was -21.000000. running mean: -20.999996
episode 1242.000000, reward total was -21.000000. running mean: -20.999996
episode 1243.000000, reward total was -21.000000. running mean: -20.999996
episode 1244.000000, rewa

episode 1341.000000, reward total was -21.000000. running mean: -20.999999
episode 1342.000000, reward total was -21.000000. running mean: -20.999999
episode 1343.000000, reward total was -21.000000. running mean: -20.999999
episode 1344.000000, reward total was -21.000000. running mean: -20.999999
episode 1345.000000, reward total was -21.000000. running mean: -20.999999
episode 1346.000000, reward total was -21.000000. running mean: -20.999999
episode 1347.000000, reward total was -21.000000. running mean: -20.999999
episode 1348.000000, reward total was -21.000000. running mean: -20.999999
episode 1349.000000, reward total was -21.000000. running mean: -20.999999
episode 1350.000000, reward total was -21.000000. running mean: -20.999999
episode 1351.000000, reward total was -21.000000. running mean: -20.999999
episode 1352.000000, reward total was -21.000000. running mean: -20.999999
episode 1353.000000, reward total was -21.000000. running mean: -20.999999
episode 1354.000000, rewa

episode 1451.000000, reward total was -21.000000. running mean: -21.000000
episode 1452.000000, reward total was -21.000000. running mean: -21.000000
episode 1453.000000, reward total was -21.000000. running mean: -21.000000
episode 1454.000000, reward total was -21.000000. running mean: -21.000000
episode 1455.000000, reward total was -21.000000. running mean: -21.000000
episode 1456.000000, reward total was -21.000000. running mean: -21.000000
episode 1457.000000, reward total was -21.000000. running mean: -21.000000
episode 1458.000000, reward total was -21.000000. running mean: -21.000000
episode 1459.000000, reward total was -21.000000. running mean: -21.000000
episode 1460.000000, reward total was -21.000000. running mean: -21.000000
episode 1461.000000, reward total was -21.000000. running mean: -21.000000
episode 1462.000000, reward total was -21.000000. running mean: -21.000000
episode 1463.000000, reward total was -21.000000. running mean: -21.000000
episode 1464.000000, rewa

episode 1561.000000, reward total was -21.000000. running mean: -21.000000
episode 1562.000000, reward total was -21.000000. running mean: -21.000000
episode 1563.000000, reward total was -21.000000. running mean: -21.000000
episode 1564.000000, reward total was -21.000000. running mean: -21.000000
episode 1565.000000, reward total was -21.000000. running mean: -21.000000
episode 1566.000000, reward total was -21.000000. running mean: -21.000000
episode 1567.000000, reward total was -21.000000. running mean: -21.000000
episode 1568.000000, reward total was -21.000000. running mean: -21.000000
episode 1569.000000, reward total was -21.000000. running mean: -21.000000
episode 1570.000000, reward total was -21.000000. running mean: -21.000000
episode 1571.000000, reward total was -21.000000. running mean: -21.000000
episode 1572.000000, reward total was -21.000000. running mean: -21.000000
episode 1573.000000, reward total was -21.000000. running mean: -21.000000
episode 1574.000000, rewa

episode 1671.000000, reward total was -21.000000. running mean: -21.000000
episode 1672.000000, reward total was -21.000000. running mean: -21.000000
episode 1673.000000, reward total was -21.000000. running mean: -21.000000
episode 1674.000000, reward total was -21.000000. running mean: -21.000000
episode 1675.000000, reward total was -21.000000. running mean: -21.000000
episode 1676.000000, reward total was -21.000000. running mean: -21.000000
episode 1677.000000, reward total was -21.000000. running mean: -21.000000
episode 1678.000000, reward total was -21.000000. running mean: -21.000000
episode 1679.000000, reward total was -21.000000. running mean: -21.000000
episode 1680.000000, reward total was -21.000000. running mean: -21.000000
episode 1681.000000, reward total was -21.000000. running mean: -21.000000
episode 1682.000000, reward total was -21.000000. running mean: -21.000000
episode 1683.000000, reward total was -21.000000. running mean: -21.000000
episode 1684.000000, rewa

episode 1781.000000, reward total was -21.000000. running mean: -21.000000
episode 1782.000000, reward total was -21.000000. running mean: -21.000000
episode 1783.000000, reward total was -21.000000. running mean: -21.000000
episode 1784.000000, reward total was -21.000000. running mean: -21.000000
episode 1785.000000, reward total was -21.000000. running mean: -21.000000
episode 1786.000000, reward total was -21.000000. running mean: -21.000000
episode 1787.000000, reward total was -21.000000. running mean: -21.000000
episode 1788.000000, reward total was -21.000000. running mean: -21.000000
episode 1789.000000, reward total was -21.000000. running mean: -21.000000
episode 1790.000000, reward total was -21.000000. running mean: -21.000000
episode 1791.000000, reward total was -21.000000. running mean: -21.000000
episode 1792.000000, reward total was -21.000000. running mean: -21.000000
episode 1793.000000, reward total was -21.000000. running mean: -21.000000
episode 1794.000000, rewa

episode 1891.000000, reward total was -21.000000. running mean: -21.000000
episode 1892.000000, reward total was -21.000000. running mean: -21.000000
episode 1893.000000, reward total was -21.000000. running mean: -21.000000
episode 1894.000000, reward total was -21.000000. running mean: -21.000000
episode 1895.000000, reward total was -21.000000. running mean: -21.000000
episode 1896.000000, reward total was -21.000000. running mean: -21.000000
episode 1897.000000, reward total was -21.000000. running mean: -21.000000
episode 1898.000000, reward total was -21.000000. running mean: -21.000000
episode 1899.000000, reward total was -21.000000. running mean: -21.000000
episode 1900.000000, reward total was -21.000000. running mean: -21.000000
episode 1901.000000, reward total was -21.000000. running mean: -21.000000
episode 1902.000000, reward total was -21.000000. running mean: -21.000000
episode 1903.000000, reward total was -21.000000. running mean: -21.000000
episode 1904.000000, rewa

episode 2001.000000, reward total was -21.000000. running mean: -21.000000
episode 2002.000000, reward total was -21.000000. running mean: -21.000000
episode 2003.000000, reward total was -21.000000. running mean: -21.000000
episode 2004.000000, reward total was -21.000000. running mean: -21.000000
episode 2005.000000, reward total was -21.000000. running mean: -21.000000
episode 2006.000000, reward total was -21.000000. running mean: -21.000000
episode 2007.000000, reward total was -21.000000. running mean: -21.000000
episode 2008.000000, reward total was -21.000000. running mean: -21.000000
episode 2009.000000, reward total was -21.000000. running mean: -21.000000
episode 2010.000000, reward total was -21.000000. running mean: -21.000000
episode 2011.000000, reward total was -21.000000. running mean: -21.000000
episode 2012.000000, reward total was -21.000000. running mean: -21.000000
episode 2013.000000, reward total was -21.000000. running mean: -21.000000
episode 2014.000000, rewa

episode 2111.000000, reward total was -21.000000. running mean: -21.000000
episode 2112.000000, reward total was -21.000000. running mean: -21.000000
episode 2113.000000, reward total was -21.000000. running mean: -21.000000
episode 2114.000000, reward total was -21.000000. running mean: -21.000000
episode 2115.000000, reward total was -21.000000. running mean: -21.000000
episode 2116.000000, reward total was -21.000000. running mean: -21.000000
episode 2117.000000, reward total was -21.000000. running mean: -21.000000
episode 2118.000000, reward total was -21.000000. running mean: -21.000000
episode 2119.000000, reward total was -21.000000. running mean: -21.000000
episode 2120.000000, reward total was -21.000000. running mean: -21.000000
episode 2121.000000, reward total was -21.000000. running mean: -21.000000
episode 2122.000000, reward total was -21.000000. running mean: -21.000000
episode 2123.000000, reward total was -21.000000. running mean: -21.000000
episode 2124.000000, rewa

episode 2221.000000, reward total was -21.000000. running mean: -21.000000
episode 2222.000000, reward total was -21.000000. running mean: -21.000000
episode 2223.000000, reward total was -21.000000. running mean: -21.000000
episode 2224.000000, reward total was -21.000000. running mean: -21.000000
episode 2225.000000, reward total was -21.000000. running mean: -21.000000
episode 2226.000000, reward total was -21.000000. running mean: -21.000000
episode 2227.000000, reward total was -21.000000. running mean: -21.000000
episode 2228.000000, reward total was -21.000000. running mean: -21.000000
episode 2229.000000, reward total was -21.000000. running mean: -21.000000
episode 2230.000000, reward total was -21.000000. running mean: -21.000000
episode 2231.000000, reward total was -21.000000. running mean: -21.000000
episode 2232.000000, reward total was -21.000000. running mean: -21.000000
episode 2233.000000, reward total was -21.000000. running mean: -21.000000
episode 2234.000000, rewa

episode 2331.000000, reward total was -21.000000. running mean: -21.000000
episode 2332.000000, reward total was -21.000000. running mean: -21.000000
episode 2333.000000, reward total was -21.000000. running mean: -21.000000
episode 2334.000000, reward total was -21.000000. running mean: -21.000000
episode 2335.000000, reward total was -21.000000. running mean: -21.000000
episode 2336.000000, reward total was -21.000000. running mean: -21.000000
episode 2337.000000, reward total was -21.000000. running mean: -21.000000
episode 2338.000000, reward total was -21.000000. running mean: -21.000000
episode 2339.000000, reward total was -21.000000. running mean: -21.000000
episode 2340.000000, reward total was -21.000000. running mean: -21.000000
episode 2341.000000, reward total was -21.000000. running mean: -21.000000
episode 2342.000000, reward total was -21.000000. running mean: -21.000000
episode 2343.000000, reward total was -21.000000. running mean: -21.000000
episode 2344.000000, rewa

episode 2441.000000, reward total was -21.000000. running mean: -21.000000
episode 2442.000000, reward total was -21.000000. running mean: -21.000000
episode 2443.000000, reward total was -21.000000. running mean: -21.000000
episode 2444.000000, reward total was -21.000000. running mean: -21.000000
episode 2445.000000, reward total was -21.000000. running mean: -21.000000
episode 2446.000000, reward total was -21.000000. running mean: -21.000000
episode 2447.000000, reward total was -21.000000. running mean: -21.000000
episode 2448.000000, reward total was -21.000000. running mean: -21.000000
episode 2449.000000, reward total was -21.000000. running mean: -21.000000
episode 2450.000000, reward total was -21.000000. running mean: -21.000000
episode 2451.000000, reward total was -21.000000. running mean: -21.000000
episode 2452.000000, reward total was -21.000000. running mean: -21.000000
episode 2453.000000, reward total was -21.000000. running mean: -21.000000
episode 2454.000000, rewa

episode 2551.000000, reward total was -21.000000. running mean: -21.000000
episode 2552.000000, reward total was -21.000000. running mean: -21.000000
episode 2553.000000, reward total was -21.000000. running mean: -21.000000
episode 2554.000000, reward total was -21.000000. running mean: -21.000000
episode 2555.000000, reward total was -21.000000. running mean: -21.000000
episode 2556.000000, reward total was -21.000000. running mean: -21.000000
episode 2557.000000, reward total was -21.000000. running mean: -21.000000
episode 2558.000000, reward total was -21.000000. running mean: -21.000000
episode 2559.000000, reward total was -21.000000. running mean: -21.000000
episode 2560.000000, reward total was -21.000000. running mean: -21.000000
episode 2561.000000, reward total was -21.000000. running mean: -21.000000
episode 2562.000000, reward total was -21.000000. running mean: -21.000000
episode 2563.000000, reward total was -21.000000. running mean: -21.000000
episode 2564.000000, rewa

episode 2661.000000, reward total was -21.000000. running mean: -21.000000
episode 2662.000000, reward total was -21.000000. running mean: -21.000000
episode 2663.000000, reward total was -21.000000. running mean: -21.000000
episode 2664.000000, reward total was -21.000000. running mean: -21.000000
episode 2665.000000, reward total was -21.000000. running mean: -21.000000
episode 2666.000000, reward total was -21.000000. running mean: -21.000000
episode 2667.000000, reward total was -21.000000. running mean: -21.000000
episode 2668.000000, reward total was -21.000000. running mean: -21.000000
episode 2669.000000, reward total was -21.000000. running mean: -21.000000
episode 2670.000000, reward total was -21.000000. running mean: -21.000000
episode 2671.000000, reward total was -21.000000. running mean: -21.000000
episode 2672.000000, reward total was -21.000000. running mean: -21.000000
episode 2673.000000, reward total was -21.000000. running mean: -21.000000
episode 2674.000000, rewa

episode 2771.000000, reward total was -21.000000. running mean: -21.000000
episode 2772.000000, reward total was -21.000000. running mean: -21.000000
episode 2773.000000, reward total was -21.000000. running mean: -21.000000
episode 2774.000000, reward total was -21.000000. running mean: -21.000000
episode 2775.000000, reward total was -21.000000. running mean: -21.000000
episode 2776.000000, reward total was -21.000000. running mean: -21.000000
episode 2777.000000, reward total was -21.000000. running mean: -21.000000
episode 2778.000000, reward total was -21.000000. running mean: -21.000000
episode 2779.000000, reward total was -21.000000. running mean: -21.000000
episode 2780.000000, reward total was -21.000000. running mean: -21.000000
episode 2781.000000, reward total was -21.000000. running mean: -21.000000
episode 2782.000000, reward total was -21.000000. running mean: -21.000000
episode 2783.000000, reward total was -21.000000. running mean: -21.000000
episode 2784.000000, rewa

episode 2881.000000, reward total was -21.000000. running mean: -21.000000
episode 2882.000000, reward total was -21.000000. running mean: -21.000000
episode 2883.000000, reward total was -21.000000. running mean: -21.000000
episode 2884.000000, reward total was -21.000000. running mean: -21.000000
episode 2885.000000, reward total was -21.000000. running mean: -21.000000
episode 2886.000000, reward total was -21.000000. running mean: -21.000000
episode 2887.000000, reward total was -21.000000. running mean: -21.000000
episode 2888.000000, reward total was -21.000000. running mean: -21.000000
episode 2889.000000, reward total was -21.000000. running mean: -21.000000
episode 2890.000000, reward total was -21.000000. running mean: -21.000000
episode 2891.000000, reward total was -21.000000. running mean: -21.000000
episode 2892.000000, reward total was -21.000000. running mean: -21.000000
episode 2893.000000, reward total was -21.000000. running mean: -21.000000
episode 2894.000000, rewa

episode 2991.000000, reward total was -21.000000. running mean: -21.000000
episode 2992.000000, reward total was -21.000000. running mean: -21.000000
episode 2993.000000, reward total was -21.000000. running mean: -21.000000
episode 2994.000000, reward total was -21.000000. running mean: -21.000000
episode 2995.000000, reward total was -21.000000. running mean: -21.000000
episode 2996.000000, reward total was -21.000000. running mean: -21.000000
episode 2997.000000, reward total was -21.000000. running mean: -21.000000
episode 2998.000000, reward total was -21.000000. running mean: -21.000000
episode 2999.000000, reward total was -21.000000. running mean: -21.000000
episode 3000.000000, reward total was -21.000000. running mean: -21.000000
episode 3001.000000, reward total was -21.000000. running mean: -21.000000
episode 3002.000000, reward total was -21.000000. running mean: -21.000000
episode 3003.000000, reward total was -21.000000. running mean: -21.000000
episode 3004.000000, rewa

episode 3101.000000, reward total was -21.000000. running mean: -21.000000
episode 3102.000000, reward total was -21.000000. running mean: -21.000000
episode 3103.000000, reward total was -21.000000. running mean: -21.000000
episode 3104.000000, reward total was -21.000000. running mean: -21.000000
episode 3105.000000, reward total was -21.000000. running mean: -21.000000
episode 3106.000000, reward total was -21.000000. running mean: -21.000000
episode 3107.000000, reward total was -21.000000. running mean: -21.000000
episode 3108.000000, reward total was -21.000000. running mean: -21.000000
episode 3109.000000, reward total was -21.000000. running mean: -21.000000
episode 3110.000000, reward total was -21.000000. running mean: -21.000000
episode 3111.000000, reward total was -21.000000. running mean: -21.000000
episode 3112.000000, reward total was -21.000000. running mean: -21.000000
episode 3113.000000, reward total was -21.000000. running mean: -21.000000
episode 3114.000000, rewa

episode 3211.000000, reward total was -21.000000. running mean: -21.000000
episode 3212.000000, reward total was -21.000000. running mean: -21.000000
episode 3213.000000, reward total was -21.000000. running mean: -21.000000
episode 3214.000000, reward total was -21.000000. running mean: -21.000000
episode 3215.000000, reward total was -21.000000. running mean: -21.000000
episode 3216.000000, reward total was -21.000000. running mean: -21.000000
episode 3217.000000, reward total was -21.000000. running mean: -21.000000
episode 3218.000000, reward total was -21.000000. running mean: -21.000000
episode 3219.000000, reward total was -21.000000. running mean: -21.000000
episode 3220.000000, reward total was -21.000000. running mean: -21.000000
episode 3221.000000, reward total was -21.000000. running mean: -21.000000
episode 3222.000000, reward total was -21.000000. running mean: -21.000000
episode 3223.000000, reward total was -21.000000. running mean: -21.000000
episode 3224.000000, rewa

episode 3321.000000, reward total was -21.000000. running mean: -21.000000
episode 3322.000000, reward total was -21.000000. running mean: -21.000000
episode 3323.000000, reward total was -21.000000. running mean: -21.000000
episode 3324.000000, reward total was -21.000000. running mean: -21.000000
episode 3325.000000, reward total was -21.000000. running mean: -21.000000
episode 3326.000000, reward total was -21.000000. running mean: -21.000000
episode 3327.000000, reward total was -21.000000. running mean: -21.000000
episode 3328.000000, reward total was -21.000000. running mean: -21.000000
episode 3329.000000, reward total was -21.000000. running mean: -21.000000
episode 3330.000000, reward total was -21.000000. running mean: -21.000000
episode 3331.000000, reward total was -21.000000. running mean: -21.000000
episode 3332.000000, reward total was -21.000000. running mean: -21.000000
episode 3333.000000, reward total was -21.000000. running mean: -21.000000
episode 3334.000000, rewa

episode 3431.000000, reward total was -21.000000. running mean: -21.000000
episode 3432.000000, reward total was -21.000000. running mean: -21.000000
episode 3433.000000, reward total was -21.000000. running mean: -21.000000
episode 3434.000000, reward total was -21.000000. running mean: -21.000000
episode 3435.000000, reward total was -21.000000. running mean: -21.000000
episode 3436.000000, reward total was -21.000000. running mean: -21.000000
episode 3437.000000, reward total was -21.000000. running mean: -21.000000
episode 3438.000000, reward total was -21.000000. running mean: -21.000000
episode 3439.000000, reward total was -21.000000. running mean: -21.000000
episode 3440.000000, reward total was -21.000000. running mean: -21.000000
episode 3441.000000, reward total was -21.000000. running mean: -21.000000
episode 3442.000000, reward total was -21.000000. running mean: -21.000000
episode 3443.000000, reward total was -21.000000. running mean: -21.000000
episode 3444.000000, rewa

episode 3541.000000, reward total was -21.000000. running mean: -21.000000
episode 3542.000000, reward total was -21.000000. running mean: -21.000000
episode 3543.000000, reward total was -21.000000. running mean: -21.000000
episode 3544.000000, reward total was -21.000000. running mean: -21.000000
episode 3545.000000, reward total was -21.000000. running mean: -21.000000
episode 3546.000000, reward total was -21.000000. running mean: -21.000000
episode 3547.000000, reward total was -21.000000. running mean: -21.000000
episode 3548.000000, reward total was -21.000000. running mean: -21.000000
episode 3549.000000, reward total was -21.000000. running mean: -21.000000
episode 3550.000000, reward total was -21.000000. running mean: -21.000000
episode 3551.000000, reward total was -21.000000. running mean: -21.000000
episode 3552.000000, reward total was -21.000000. running mean: -21.000000
episode 3553.000000, reward total was -21.000000. running mean: -21.000000
episode 3554.000000, rewa

episode 3651.000000, reward total was -21.000000. running mean: -21.000000
episode 3652.000000, reward total was -21.000000. running mean: -21.000000
episode 3653.000000, reward total was -21.000000. running mean: -21.000000
episode 3654.000000, reward total was -21.000000. running mean: -21.000000
episode 3655.000000, reward total was -21.000000. running mean: -21.000000
episode 3656.000000, reward total was -21.000000. running mean: -21.000000
episode 3657.000000, reward total was -21.000000. running mean: -21.000000
episode 3658.000000, reward total was -21.000000. running mean: -21.000000
episode 3659.000000, reward total was -21.000000. running mean: -21.000000
episode 3660.000000, reward total was -21.000000. running mean: -21.000000
episode 3661.000000, reward total was -21.000000. running mean: -21.000000
episode 3662.000000, reward total was -21.000000. running mean: -21.000000
episode 3663.000000, reward total was -21.000000. running mean: -21.000000
episode 3664.000000, rewa

episode 3761.000000, reward total was -21.000000. running mean: -21.000000
episode 3762.000000, reward total was -21.000000. running mean: -21.000000
episode 3763.000000, reward total was -21.000000. running mean: -21.000000
episode 3764.000000, reward total was -21.000000. running mean: -21.000000
episode 3765.000000, reward total was -21.000000. running mean: -21.000000
episode 3766.000000, reward total was -21.000000. running mean: -21.000000
episode 3767.000000, reward total was -21.000000. running mean: -21.000000
episode 3768.000000, reward total was -21.000000. running mean: -21.000000
episode 3769.000000, reward total was -21.000000. running mean: -21.000000
episode 3770.000000, reward total was -21.000000. running mean: -21.000000
episode 3771.000000, reward total was -21.000000. running mean: -21.000000
episode 3772.000000, reward total was -21.000000. running mean: -21.000000
episode 3773.000000, reward total was -21.000000. running mean: -21.000000
episode 3774.000000, rewa

episode 3871.000000, reward total was -21.000000. running mean: -21.000000
episode 3872.000000, reward total was -21.000000. running mean: -21.000000
episode 3873.000000, reward total was -21.000000. running mean: -21.000000
episode 3874.000000, reward total was -21.000000. running mean: -21.000000
episode 3875.000000, reward total was -21.000000. running mean: -21.000000
episode 3876.000000, reward total was -21.000000. running mean: -21.000000
episode 3877.000000, reward total was -21.000000. running mean: -21.000000
episode 3878.000000, reward total was -21.000000. running mean: -21.000000
episode 3879.000000, reward total was -21.000000. running mean: -21.000000
episode 3880.000000, reward total was -21.000000. running mean: -21.000000
episode 3881.000000, reward total was -21.000000. running mean: -21.000000
episode 3882.000000, reward total was -21.000000. running mean: -21.000000
episode 3883.000000, reward total was -21.000000. running mean: -21.000000
episode 3884.000000, rewa

episode 3981.000000, reward total was -21.000000. running mean: -21.000000
episode 3982.000000, reward total was -21.000000. running mean: -21.000000
episode 3983.000000, reward total was -21.000000. running mean: -21.000000
episode 3984.000000, reward total was -21.000000. running mean: -21.000000
episode 3985.000000, reward total was -21.000000. running mean: -21.000000
episode 3986.000000, reward total was -21.000000. running mean: -21.000000
episode 3987.000000, reward total was -21.000000. running mean: -21.000000
episode 3988.000000, reward total was -21.000000. running mean: -21.000000
episode 3989.000000, reward total was -21.000000. running mean: -21.000000
episode 3990.000000, reward total was -21.000000. running mean: -21.000000
episode 3991.000000, reward total was -21.000000. running mean: -21.000000
episode 3992.000000, reward total was -21.000000. running mean: -21.000000
episode 3993.000000, reward total was -21.000000. running mean: -21.000000
episode 3994.000000, rewa

episode 4091.000000, reward total was -21.000000. running mean: -21.000000
episode 4092.000000, reward total was -21.000000. running mean: -21.000000
episode 4093.000000, reward total was -21.000000. running mean: -21.000000
episode 4094.000000, reward total was -21.000000. running mean: -21.000000
episode 4095.000000, reward total was -21.000000. running mean: -21.000000
episode 4096.000000, reward total was -21.000000. running mean: -21.000000
episode 4097.000000, reward total was -21.000000. running mean: -21.000000
episode 4098.000000, reward total was -21.000000. running mean: -21.000000
episode 4099.000000, reward total was -21.000000. running mean: -21.000000
episode 4100.000000, reward total was -21.000000. running mean: -21.000000
episode 4101.000000, reward total was -21.000000. running mean: -21.000000
episode 4102.000000, reward total was -21.000000. running mean: -21.000000
episode 4103.000000, reward total was -21.000000. running mean: -21.000000
episode 4104.000000, rewa

episode 4201.000000, reward total was -21.000000. running mean: -21.000000
episode 4202.000000, reward total was -21.000000. running mean: -21.000000
episode 4203.000000, reward total was -21.000000. running mean: -21.000000
episode 4204.000000, reward total was -21.000000. running mean: -21.000000
episode 4205.000000, reward total was -21.000000. running mean: -21.000000
episode 4206.000000, reward total was -21.000000. running mean: -21.000000
episode 4207.000000, reward total was -21.000000. running mean: -21.000000
episode 4208.000000, reward total was -21.000000. running mean: -21.000000
episode 4209.000000, reward total was -21.000000. running mean: -21.000000
episode 4210.000000, reward total was -21.000000. running mean: -21.000000
episode 4211.000000, reward total was -21.000000. running mean: -21.000000
episode 4212.000000, reward total was -21.000000. running mean: -21.000000
episode 4213.000000, reward total was -21.000000. running mean: -21.000000
episode 4214.000000, rewa

episode 4311.000000, reward total was -21.000000. running mean: -21.000000
episode 4312.000000, reward total was -21.000000. running mean: -21.000000
episode 4313.000000, reward total was -21.000000. running mean: -21.000000
episode 4314.000000, reward total was -21.000000. running mean: -21.000000
episode 4315.000000, reward total was -21.000000. running mean: -21.000000
episode 4316.000000, reward total was -21.000000. running mean: -21.000000
episode 4317.000000, reward total was -21.000000. running mean: -21.000000
episode 4318.000000, reward total was -21.000000. running mean: -21.000000
episode 4319.000000, reward total was -21.000000. running mean: -21.000000
episode 4320.000000, reward total was -21.000000. running mean: -21.000000
episode 4321.000000, reward total was -21.000000. running mean: -21.000000
episode 4322.000000, reward total was -21.000000. running mean: -21.000000
episode 4323.000000, reward total was -21.000000. running mean: -21.000000
episode 4324.000000, rewa

episode 4421.000000, reward total was -21.000000. running mean: -21.000000
episode 4422.000000, reward total was -21.000000. running mean: -21.000000
episode 4423.000000, reward total was -21.000000. running mean: -21.000000
episode 4424.000000, reward total was -21.000000. running mean: -21.000000
episode 4425.000000, reward total was -21.000000. running mean: -21.000000
episode 4426.000000, reward total was -21.000000. running mean: -21.000000
episode 4427.000000, reward total was -21.000000. running mean: -21.000000
episode 4428.000000, reward total was -21.000000. running mean: -21.000000
episode 4429.000000, reward total was -21.000000. running mean: -21.000000
episode 4430.000000, reward total was -21.000000. running mean: -21.000000
episode 4431.000000, reward total was -21.000000. running mean: -21.000000
episode 4432.000000, reward total was -21.000000. running mean: -21.000000
episode 4433.000000, reward total was -21.000000. running mean: -21.000000
episode 4434.000000, rewa

episode 4531.000000, reward total was -21.000000. running mean: -21.000000
episode 4532.000000, reward total was -21.000000. running mean: -21.000000
episode 4533.000000, reward total was -21.000000. running mean: -21.000000
episode 4534.000000, reward total was -21.000000. running mean: -21.000000
episode 4535.000000, reward total was -21.000000. running mean: -21.000000
episode 4536.000000, reward total was -21.000000. running mean: -21.000000
episode 4537.000000, reward total was -21.000000. running mean: -21.000000
episode 4538.000000, reward total was -21.000000. running mean: -21.000000
episode 4539.000000, reward total was -21.000000. running mean: -21.000000
episode 4540.000000, reward total was -21.000000. running mean: -21.000000
episode 4541.000000, reward total was -21.000000. running mean: -21.000000
episode 4542.000000, reward total was -21.000000. running mean: -21.000000
episode 4543.000000, reward total was -21.000000. running mean: -21.000000
episode 4544.000000, rewa

episode 4641.000000, reward total was -21.000000. running mean: -21.000000
episode 4642.000000, reward total was -21.000000. running mean: -21.000000
episode 4643.000000, reward total was -21.000000. running mean: -21.000000
episode 4644.000000, reward total was -21.000000. running mean: -21.000000
episode 4645.000000, reward total was -21.000000. running mean: -21.000000
episode 4646.000000, reward total was -21.000000. running mean: -21.000000
episode 4647.000000, reward total was -21.000000. running mean: -21.000000
episode 4648.000000, reward total was -21.000000. running mean: -21.000000
episode 4649.000000, reward total was -21.000000. running mean: -21.000000
episode 4650.000000, reward total was -21.000000. running mean: -21.000000
episode 4651.000000, reward total was -21.000000. running mean: -21.000000
episode 4652.000000, reward total was -21.000000. running mean: -21.000000
episode 4653.000000, reward total was -21.000000. running mean: -21.000000
episode 4654.000000, rewa

episode 4751.000000, reward total was -21.000000. running mean: -21.000000
episode 4752.000000, reward total was -21.000000. running mean: -21.000000
episode 4753.000000, reward total was -21.000000. running mean: -21.000000
episode 4754.000000, reward total was -21.000000. running mean: -21.000000
episode 4755.000000, reward total was -21.000000. running mean: -21.000000
episode 4756.000000, reward total was -21.000000. running mean: -21.000000
episode 4757.000000, reward total was -21.000000. running mean: -21.000000
episode 4758.000000, reward total was -21.000000. running mean: -21.000000
episode 4759.000000, reward total was -21.000000. running mean: -21.000000
episode 4760.000000, reward total was -21.000000. running mean: -21.000000
episode 4761.000000, reward total was -21.000000. running mean: -21.000000
episode 4762.000000, reward total was -21.000000. running mean: -21.000000
episode 4763.000000, reward total was -21.000000. running mean: -21.000000
episode 4764.000000, rewa

episode 4861.000000, reward total was -21.000000. running mean: -21.000000
episode 4862.000000, reward total was -21.000000. running mean: -21.000000
episode 4863.000000, reward total was -21.000000. running mean: -21.000000
episode 4864.000000, reward total was -21.000000. running mean: -21.000000
episode 4865.000000, reward total was -21.000000. running mean: -21.000000
episode 4866.000000, reward total was -21.000000. running mean: -21.000000
episode 4867.000000, reward total was -21.000000. running mean: -21.000000
episode 4868.000000, reward total was -21.000000. running mean: -21.000000
episode 4869.000000, reward total was -21.000000. running mean: -21.000000
episode 4870.000000, reward total was -21.000000. running mean: -21.000000
episode 4871.000000, reward total was -21.000000. running mean: -21.000000
episode 4872.000000, reward total was -21.000000. running mean: -21.000000
episode 4873.000000, reward total was -21.000000. running mean: -21.000000
episode 4874.000000, rewa

episode 4971.000000, reward total was -21.000000. running mean: -21.000000
episode 4972.000000, reward total was -21.000000. running mean: -21.000000
episode 4973.000000, reward total was -21.000000. running mean: -21.000000
episode 4974.000000, reward total was -21.000000. running mean: -21.000000
episode 4975.000000, reward total was -21.000000. running mean: -21.000000
episode 4976.000000, reward total was -21.000000. running mean: -21.000000
episode 4977.000000, reward total was -21.000000. running mean: -21.000000
episode 4978.000000, reward total was -21.000000. running mean: -21.000000
episode 4979.000000, reward total was -21.000000. running mean: -21.000000
episode 4980.000000, reward total was -21.000000. running mean: -21.000000
episode 4981.000000, reward total was -21.000000. running mean: -21.000000
episode 4982.000000, reward total was -21.000000. running mean: -21.000000
episode 4983.000000, reward total was -21.000000. running mean: -21.000000
episode 4984.000000, rewa

episode 5081.000000, reward total was -21.000000. running mean: -21.000000
episode 5082.000000, reward total was -21.000000. running mean: -21.000000
episode 5083.000000, reward total was -21.000000. running mean: -21.000000
episode 5084.000000, reward total was -21.000000. running mean: -21.000000
episode 5085.000000, reward total was -21.000000. running mean: -21.000000
episode 5086.000000, reward total was -21.000000. running mean: -21.000000
episode 5087.000000, reward total was -21.000000. running mean: -21.000000
episode 5088.000000, reward total was -21.000000. running mean: -21.000000
episode 5089.000000, reward total was -21.000000. running mean: -21.000000
episode 5090.000000, reward total was -21.000000. running mean: -21.000000
episode 5091.000000, reward total was -21.000000. running mean: -21.000000
episode 5092.000000, reward total was -21.000000. running mean: -21.000000
episode 5093.000000, reward total was -21.000000. running mean: -21.000000
episode 5094.000000, rewa

episode 5191.000000, reward total was -21.000000. running mean: -21.000000
episode 5192.000000, reward total was -21.000000. running mean: -21.000000
episode 5193.000000, reward total was -21.000000. running mean: -21.000000
episode 5194.000000, reward total was -21.000000. running mean: -21.000000
episode 5195.000000, reward total was -21.000000. running mean: -21.000000
episode 5196.000000, reward total was -21.000000. running mean: -21.000000
episode 5197.000000, reward total was -21.000000. running mean: -21.000000
episode 5198.000000, reward total was -21.000000. running mean: -21.000000
episode 5199.000000, reward total was -21.000000. running mean: -21.000000
episode 5200.000000, reward total was -21.000000. running mean: -21.000000
episode 5201.000000, reward total was -21.000000. running mean: -21.000000
episode 5202.000000, reward total was -21.000000. running mean: -21.000000
episode 5203.000000, reward total was -21.000000. running mean: -21.000000
episode 5204.000000, rewa

episode 5301.000000, reward total was -21.000000. running mean: -21.000000
episode 5302.000000, reward total was -21.000000. running mean: -21.000000
episode 5303.000000, reward total was -21.000000. running mean: -21.000000
episode 5304.000000, reward total was -21.000000. running mean: -21.000000
episode 5305.000000, reward total was -21.000000. running mean: -21.000000
episode 5306.000000, reward total was -21.000000. running mean: -21.000000
episode 5307.000000, reward total was -21.000000. running mean: -21.000000
episode 5308.000000, reward total was -21.000000. running mean: -21.000000
episode 5309.000000, reward total was -21.000000. running mean: -21.000000
episode 5310.000000, reward total was -21.000000. running mean: -21.000000
episode 5311.000000, reward total was -21.000000. running mean: -21.000000
episode 5312.000000, reward total was -21.000000. running mean: -21.000000
episode 5313.000000, reward total was -21.000000. running mean: -21.000000
episode 5314.000000, rewa

episode 5411.000000, reward total was -21.000000. running mean: -21.000000
episode 5412.000000, reward total was -21.000000. running mean: -21.000000
episode 5413.000000, reward total was -21.000000. running mean: -21.000000
episode 5414.000000, reward total was -21.000000. running mean: -21.000000
episode 5415.000000, reward total was -21.000000. running mean: -21.000000
episode 5416.000000, reward total was -21.000000. running mean: -21.000000
episode 5417.000000, reward total was -21.000000. running mean: -21.000000
episode 5418.000000, reward total was -21.000000. running mean: -21.000000
episode 5419.000000, reward total was -21.000000. running mean: -21.000000
episode 5420.000000, reward total was -21.000000. running mean: -21.000000
episode 5421.000000, reward total was -21.000000. running mean: -21.000000
episode 5422.000000, reward total was -21.000000. running mean: -21.000000
episode 5423.000000, reward total was -21.000000. running mean: -21.000000
episode 5424.000000, rewa

episode 5521.000000, reward total was -21.000000. running mean: -21.000000
episode 5522.000000, reward total was -21.000000. running mean: -21.000000
episode 5523.000000, reward total was -21.000000. running mean: -21.000000
episode 5524.000000, reward total was -21.000000. running mean: -21.000000
episode 5525.000000, reward total was -21.000000. running mean: -21.000000
episode 5526.000000, reward total was -21.000000. running mean: -21.000000
episode 5527.000000, reward total was -21.000000. running mean: -21.000000
episode 5528.000000, reward total was -21.000000. running mean: -21.000000
episode 5529.000000, reward total was -21.000000. running mean: -21.000000
episode 5530.000000, reward total was -21.000000. running mean: -21.000000
episode 5531.000000, reward total was -21.000000. running mean: -21.000000
episode 5532.000000, reward total was -21.000000. running mean: -21.000000
episode 5533.000000, reward total was -21.000000. running mean: -21.000000
episode 5534.000000, rewa

episode 5631.000000, reward total was -21.000000. running mean: -21.000000
episode 5632.000000, reward total was -21.000000. running mean: -21.000000
episode 5633.000000, reward total was -21.000000. running mean: -21.000000
episode 5634.000000, reward total was -21.000000. running mean: -21.000000
episode 5635.000000, reward total was -21.000000. running mean: -21.000000
episode 5636.000000, reward total was -21.000000. running mean: -21.000000
episode 5637.000000, reward total was -21.000000. running mean: -21.000000
episode 5638.000000, reward total was -21.000000. running mean: -21.000000
episode 5639.000000, reward total was -21.000000. running mean: -21.000000
episode 5640.000000, reward total was -21.000000. running mean: -21.000000
episode 5641.000000, reward total was -21.000000. running mean: -21.000000
episode 5642.000000, reward total was -21.000000. running mean: -21.000000
episode 5643.000000, reward total was -21.000000. running mean: -21.000000
episode 5644.000000, rewa

episode 5741.000000, reward total was -21.000000. running mean: -21.000000
episode 5742.000000, reward total was -21.000000. running mean: -21.000000
episode 5743.000000, reward total was -21.000000. running mean: -21.000000
episode 5744.000000, reward total was -21.000000. running mean: -21.000000
episode 5745.000000, reward total was -21.000000. running mean: -21.000000
episode 5746.000000, reward total was -21.000000. running mean: -21.000000
episode 5747.000000, reward total was -21.000000. running mean: -21.000000
episode 5748.000000, reward total was -21.000000. running mean: -21.000000
episode 5749.000000, reward total was -21.000000. running mean: -21.000000
episode 5750.000000, reward total was -21.000000. running mean: -21.000000
episode 5751.000000, reward total was -21.000000. running mean: -21.000000
episode 5752.000000, reward total was -21.000000. running mean: -21.000000
episode 5753.000000, reward total was -21.000000. running mean: -21.000000
episode 5754.000000, rewa

episode 5851.000000, reward total was -21.000000. running mean: -21.000000
episode 5852.000000, reward total was -21.000000. running mean: -21.000000
episode 5853.000000, reward total was -21.000000. running mean: -21.000000
episode 5854.000000, reward total was -21.000000. running mean: -21.000000
episode 5855.000000, reward total was -21.000000. running mean: -21.000000
episode 5856.000000, reward total was -21.000000. running mean: -21.000000
episode 5857.000000, reward total was -21.000000. running mean: -21.000000
episode 5858.000000, reward total was -21.000000. running mean: -21.000000
episode 5859.000000, reward total was -21.000000. running mean: -21.000000
episode 5860.000000, reward total was -21.000000. running mean: -21.000000
episode 5861.000000, reward total was -21.000000. running mean: -21.000000
episode 5862.000000, reward total was -21.000000. running mean: -21.000000
episode 5863.000000, reward total was -21.000000. running mean: -21.000000
episode 5864.000000, rewa

episode 5961.000000, reward total was -21.000000. running mean: -21.000000
episode 5962.000000, reward total was -21.000000. running mean: -21.000000
episode 5963.000000, reward total was -21.000000. running mean: -21.000000
episode 5964.000000, reward total was -21.000000. running mean: -21.000000
episode 5965.000000, reward total was -21.000000. running mean: -21.000000
episode 5966.000000, reward total was -21.000000. running mean: -21.000000
episode 5967.000000, reward total was -21.000000. running mean: -21.000000
episode 5968.000000, reward total was -21.000000. running mean: -21.000000
episode 5969.000000, reward total was -21.000000. running mean: -21.000000
episode 5970.000000, reward total was -21.000000. running mean: -21.000000
episode 5971.000000, reward total was -21.000000. running mean: -21.000000
episode 5972.000000, reward total was -21.000000. running mean: -21.000000
episode 5973.000000, reward total was -21.000000. running mean: -21.000000
episode 5974.000000, rewa

In [5]:
play_game(env, model)

See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.warn(
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]


Episode finished without success, accumulated reward = -20.0
