In [None]:
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import _pickle as pickle
import gym

# hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
resume = False # resume from previous checkpoint?
render = True

# model initialization
D = 80 * 80 # input dimensionality: 80x80 grid
if resume:
  model = pickle.load(open('save.p', 'rb'))
else:
  model = {}
  model['W1'] = np.random.randn(H,D) / np.sqrt(D) # "Xavier" initialization
  model['W2'] = np.random.randn(H) / np.sqrt(H)
  
grad_buffer = { k : np.zeros_like(v) for k,v in model.items() } # update buffers that add up gradients over a batch
rmsprop_cache = { k : np.zeros_like(v) for k,v in model.items() } # rmsprop memory

def sigmoid(x): 
  return 1.0 / (1.0 + np.exp(-x)) # sigmoid "squashing" function to interval [0,1]

def prepro(I):
  """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  I[I == 144] = 0 # erase background (background type 1)
  I[I == 109] = 0 # erase background (background type 2)
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(np.float).ravel()

def discount_rewards(r):
  """ take 1D float array of rewards and compute discounted reward """
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(range(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r

def policy_forward(x):
  h = np.dot(model['W1'], x)
  h[h<0] = 0 # ReLU nonlinearity
  logp = np.dot(model['W2'], h)
  p = sigmoid(logp)
  return p, h # return probability of taking action 2, and hidden state

def policy_backward(eph, epdlogp):
  """ backward pass. (eph is array of intermediate hidden states) """
  dW2 = np.dot(eph.T, epdlogp).ravel()
  dh = np.outer(epdlogp, model['W2'])
  dh[eph <= 0] = 0 # backpro prelu
  dW1 = np.dot(dh.T, epx)
  return {'W1':dW1, 'W2':dW2}

env = gym.make("Pong-v0")
observation = env.reset()
prev_x = None # used in computing the difference frame
xs,hs,dlogps,drs = [],[],[],[]
running_reward = None
reward_sum = 0
episode_number = 0
while True:
  if render: env.render()

  # preprocess the observation, set input to network to be difference image
  cur_x = prepro(observation)
  x = cur_x - prev_x if prev_x is not None else np.zeros(D)
  prev_x = cur_x

  # forward the policy network and sample an action from the returned probability
  aprob, h = policy_forward(x)
  action = 2 if np.random.uniform() < aprob else 3 # roll the dice!

  # record various intermediates (needed later for backprop)
  xs.append(x) # observation
  hs.append(h) # hidden state
  y = 1 if action == 2 else 0 # a "fake label"
  dlogps.append(y - aprob) # grad that encourages the action that was taken to be taken (see http://cs231n.github.io/neural-networks-2/#losses if confused)

  # step the environment and get new measurements
  observation, reward, done, info = env.step(action)
  reward_sum += reward

  drs.append(reward) # record reward (has to be done after we call step() to get reward for previous action)

  if done: # an episode finished
    episode_number += 1

    # stack together all inputs, hidden states, action gradients, and rewards for this episode
    epx = np.vstack(xs)
    eph = np.vstack(hs)
    epdlogp = np.vstack(dlogps)
    epr = np.vstack(drs)
    xs,hs,dlogps,drs = [],[],[],[] # reset array memory

    # compute the discounted reward backwards through time
    discounted_epr = discount_rewards(epr)
    # standardize the rewards to be unit normal (helps control the gradient estimator variance)
    discounted_epr -= np.mean(discounted_epr)
    discounted_epr /= np.std(discounted_epr)

    epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
    grad = policy_backward(eph, epdlogp)
    for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

    # perform rmsprop parameter update every batch_size episodes
    if episode_number % batch_size == 0:
      for k,v in model.items():
        g = grad_buffer[k] # gradient
        rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
        model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
        grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

    # boring book-keeping
    running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
    print('resetting env. episode reward total was %f. running mean: %f', reward_sum, running_reward)
    if episode_number % 100 == 0: pickle.dump(model, open('save.p', 'wb'))
    reward_sum = 0
    observation = env.reset() # reset env
    prev_x = None

  if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
    print('ep %d: game finished, reward: %f', episode_number, reward, '' if reward == -1 else ' !!!!!!!!')

ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
ep %d: game finished, reward: %f 0 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -21.0
ep %d: game finished, reward: %f 1 -1.0 
ep %d: game finished, reward: %f 1 1.0  !!!!!!!!
ep %d: game finis

ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 1.0  !!!!!!!!
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
ep %d: game finished, reward: %f 8 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.93235156572193
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %d: game finished, reward: %f 9 -1.0 
ep %

ep %d: game finished, reward: %f 16 -1.0 
ep %d: game finished, reward: %f 16 -1.0 
ep %d: game finished, reward: %f 16 1.0  !!!!!!!!
ep %d: game finished, reward: %f 16 -1.0 
ep %d: game finished, reward: %f 16 -1.0 
ep %d: game finished, reward: %f 16 -1.0 
ep %d: game finished, reward: %f 16 1.0  !!!!!!!!
ep %d: game finished, reward: %f 16 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.86964305498556
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 1.0  !!!!!!!!
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: game finished, reward: %f 17 -1.0 
ep %d: g

resetting env. episode reward total was %f. running mean: %f -21.0 -20.851180276628018
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 1.0  !!!!!!!!
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, reward: %f 25 -1.0 
ep %d: game finished, r

ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 1.0  !!!!!!!!
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
ep %d: game finished, reward: %f 33 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.816093566001648
ep %d: game finished, reward: %f 34 -1.0 
ep %d: game finished, reward: %f 34 -1.0 
ep %d: game finished, reward: %f 34 -1.0 
ep %d: game finished, reward: %f 34 -1.0 
ep %d: game finished, r

ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 1.0  !!!!!!!!
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
ep %d: game finished, reward: %f 41 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.790797323756863
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, reward: %f 42 -1.0 
ep %d: game finished, r

ep %d: game finished, reward: %f 49 -1.0 
ep %d: game finished, reward: %f 49 -1.0 
ep %d: game finished, reward: %f 49 -1.0 
ep %d: game finished, reward: %f 49 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.778520895463444
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 1.0  !!!!!!!!
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, reward: %f 50 -1.0 
ep %d: game finished, r

resetting env. episode reward total was %f. running mean: %f -21.0 -20.728181926785176
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
ep %d: game finished, reward: %f 58 -1.0 
resetting env. episode reward t

ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
ep %d: game finished, reward: %f 66 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.685198927924215
ep %d: game finished, reward: %f 67 -1.0 
ep %d: game finished, reward: %

ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
ep %d: game finished, reward: %f 74 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.661385328848834
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %f 75 -1.0 
ep %d: game finished, reward: %

ep %d: game finished, reward: %f 82 -1.0 
ep %d: game finished, reward: %f 82 -1.0 
ep %d: game finished, reward: %f 82 -1.0 
ep %d: game finished, reward: %f 82 -1.0 
ep %d: game finished, reward: %f 82 -1.0 
ep %d: game finished, reward: %f 82 -1.0 
ep %d: game finished, reward: %f 82 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.658424455260743
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 1.0  !!!!!!!!
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, reward: %f 83 -1.0 
ep %d: game finished, r

ep %d: game finished, reward: %f 90 -1.0 
ep %d: game finished, reward: %f 90 -1.0 
ep %d: game finished, reward: %f 90 -1.0 
ep %d: game finished, reward: %f 90 -1.0 
ep %d: game finished, reward: %f 90 -1.0 
ep %d: game finished, reward: %f 90 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.59912205058043
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 1.0  !!!!!!!!
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, reward: %f 91 -1.0 
ep %d: game finished, re

ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
ep %d: game finished, reward: %f 99 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.605730225080798
ep %d: game finished, reward: %f 100 -1.0 
ep %d: game finished, reward: 

ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
ep %d: game finished, reward: %f 107 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.578157975027008
ep %d: game finished, reward: %f 108 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
ep %d: game finished, reward: %f 115 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.59153170807543
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game finished, reward: %f 116 -1.0 
ep %d: game

ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
ep %d: game finished, reward: %f 123 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.575531833626435
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: game finished, reward: %f 124 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
ep %d: game finished, reward: %f 131 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.56029251042725
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game finished, reward: %f 132 -1.0 
ep %d: game

ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 1.0  !!!!!!!!
ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 -1.0 
ep %d: game finished, reward: %f 139 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.555150326696523
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep %d: game finished, reward: %f 140 -1.0 
ep

ep %d: game finished, reward: %f 147 -1.0 
ep %d: game finished, reward: %f 147 -1.0 
ep %d: game finished, reward: %f 147 -1.0 
ep %d: game finished, reward: %f 147 -1.0 
ep %d: game finished, reward: %f 147 -1.0 
ep %d: game finished, reward: %f 147 -1.0 
ep %d: game finished, reward: %f 147 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.540700463542223
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: game finished, reward: %f 148 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 155 -1.0 
ep %d: game finished, reward: %f 155 -1.0 
ep %d: game finished, reward: %f 155 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.547162136101313
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: game finished, reward: %f 156 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
ep %d: game finished, reward: %f 164 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.557212880814905
ep %d: game finished, reward: %f 165 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
ep %d: game finished, reward: %f 172 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.58152053501093
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game finished, reward: %f 173 -1.0 
ep %d: game

ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 1.0  !!!!!!!!
ep %d: game finished, reward: %f 180 1.0  !!!!!!!!
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
ep %d: game finished, reward: %f 180 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.516499928781222
ep %d: game finished, reward: %f 181 -1.0 
ep %d: game finished, reward: %f 181 -1.0 
ep %d: game finished, reward: %f 181 

ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
ep %d: game finished, reward: %f 188 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.535117419554272
ep %d: game finished, reward: %f 189 -1.0 
ep %d: game finished, reward: %f 189 -1.0 
ep %d: game finished, reward: %f 189 -1.0 
ep %d: game finished, reward: %f 189 -1.0 
ep %d: game finished, reward: %f 189 -1.0 
ep %d: game finished, reward: %f 189 -1.0 
ep %d: game finished, reward: %f 189 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
ep %d: game finished, reward: %f 196 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.541919174862745
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: game finished, reward: %f 197 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
ep %d: game finished, reward: %f 204 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.539554044004408
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: game finished, reward: %f 205 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 -1.0 
ep %d: game finished, reward: %f 212 1.0  !!!!!!!!
resetting env. episode reward total was %f. running mean: %f -19.0 -20.506996205440267
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep %d: game finished, reward: %f 213 -1.0 
ep

ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
ep %d: game finished, reward: %f 220 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.496067562743168
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: game finished, reward: %f 221 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 228 -1.0 
ep %d: game finished, reward: %f 228 -1.0 
ep %d: game finished, reward: %f 228 -1.0 
ep %d: game finished, reward: %f 228 -1.0 
ep %d: game finished, reward: %f 228 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.505883215677116
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 1.0  !!!!!!!!
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep %d: game finished, reward: %f 229 -1.0 
ep

ep %d: game finished, reward: %f 236 -1.0 
ep %d: game finished, reward: %f 236 -1.0 
ep %d: game finished, reward: %f 236 -1.0 
ep %d: game finished, reward: %f 236 -1.0 
ep %d: game finished, reward: %f 236 -1.0 
ep %d: game finished, reward: %f 236 -1.0 
ep %d: game finished, reward: %f 236 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.467295034061202
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 1.0  !!!!!!!!
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 -1.0 
ep %d: game finished, reward: %f 237 1.0  !!!

ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 1.0  !!!!!!!!
ep %d: game finished, reward: %f 244 -1.0 
ep %d: game finished, reward: %f 244 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.431970330728433
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep %d: game finished, reward: %f 245 -1.0 
ep

ep %d: game finished, reward: %f 252 -1.0 
ep %d: game finished, reward: %f 252 -1.0 
ep %d: game finished, reward: %f 252 -1.0 
ep %d: game finished, reward: %f 252 -1.0 
ep %d: game finished, reward: %f 252 -1.0 
ep %d: game finished, reward: %f 252 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.446934844908004
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 1.0  !!!!!!!!
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 1.0  !!!!!!!!
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 -1.0 
ep %d: game finished, reward: %f 253 

ep %d: game finished, reward: %f 260 -1.0 
ep %d: game finished, reward: %f 260 -1.0 
ep %d: game finished, reward: %f 260 -1.0 
ep %d: game finished, reward: %f 260 -1.0 
ep %d: game finished, reward: %f 260 -1.0 
ep %d: game finished, reward: %f 260 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.43230289490877
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game finished, reward: %f 261 -1.0 
ep %d: game

ep %d: game finished, reward: %f 268 -1.0 
ep %d: game finished, reward: %f 268 -1.0 
ep %d: game finished, reward: %f 268 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.43802000474796
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game finished, reward: %f 269 -1.0 
ep %d: game

resetting env. episode reward total was %f. running mean: %f -21.0 -20.451834941006553
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
ep %d: game finished, reward: %f 277 -1.0 
resetting 

ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
ep %d: game finished, reward: %f 285 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.460327873642598
ep %d: game finished, reward: %f 286 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
ep %d: game finished, reward: %f 293 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.453983786480062
ep %d: game finished, reward: %f 294 -1.0 
ep %d: game finished, reward: %f 294 -1.0 
ep %d: game finished, reward: %f 294 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
ep %d: game finished, reward: %f 301 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.44811522844572
ep %d: game finished, reward: %f 302 -1.0 
ep %d: game finished, reward: %f 302 -1.0 
ep %d: game finished, reward: %f 302 -1.0 
ep %d: game finished, reward: %f 302 -1.0 
ep %d: game finished, reward: %f 302 -1.0 
ep %d: game

ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 1.0  !!!!!!!!
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 1.0  !!!!!!!!
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 1.0  !!!!!!!!
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 1.0  !!!!!!!!
ep %d: game finished, reward: %f 309 -1.0 
ep %d: game finished, reward: %f 309 -1.0 
resetting env. episode reward total was %f. running mean: %f -17.0 -20.411833463618716
ep %d: game finished, reward: %f 310 -1.0 
ep %d: game finished, reward: %f 310 -1.0 
ep %d: game finished,

ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 1.0  !!!!!!!!
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
ep %d: game finished, reward: %f 317 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.41883593503605
ep %d: game finished, reward: %f 318 -1.0 
ep %d: game finished, reward: %f 318 -1.0 
ep %d: game finished, reward: %f 318 -1.0 
ep %d: game finished, reward: %f 318 -1.0 
ep %d: game finished, reward: %f 318 -1.0 
ep 

ep %d: game finished, reward: %f 325 1.0  !!!!!!!!
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
ep %d: game finished, reward: %f 325 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.405309190868337
ep %d: game finished, reward: %f 326 -1.0 
ep %d: game finished, reward: %f 326 -1.0 
ep %d: game finished, reward: %f 326 -1.0 
ep %d: game finished, reward: %f 326 -1.0 
ep %d: game finished, reward: %f 326 -1.0 
ep

ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
ep %d: game finished, reward: %f 333 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.412825449950713
ep %d: game finished, reward: %f 334 -1.0 
ep %d: game finished, reward: %f 334 -1.0 
ep %d: game finished, reward: %f 334 -1.0 
ep %d: game finished, reward: %f 334 -1.0 
ep %d: game finished, reward: %f 334 -1.0 
ep %d: game finished, reward: %f 334 -1.0 
ep %d: game finished, reward: %f 334 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
ep %d: game finished, reward: %f 341 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.438781819238923
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: game finished, reward: %f 342 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 349 -1.0 
ep %d: game finished, reward: %f 349 -1.0 
ep %d: game finished, reward: %f 349 1.0  !!!!!!!!
ep %d: game finished, reward: %f 349 -1.0 
ep %d: game finished, reward: %f 349 -1.0 
ep %d: game finished, reward: %f 349 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.442831941186224
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep %d: game finished, reward: %f 350 -1.0 
ep

ep %d: game finished, reward: %f 357 -1.0 
ep %d: game finished, reward: %f 357 -1.0 
ep %d: game finished, reward: %f 357 -1.0 
ep %d: game finished, reward: %f 357 -1.0 
ep %d: game finished, reward: %f 357 -1.0 
ep %d: game finished, reward: %f 357 -1.0 
ep %d: game finished, reward: %f 357 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.417448437831872
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: game finished, reward: %f 358 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 365 -1.0 
ep %d: game finished, reward: %f 365 -1.0 
ep %d: game finished, reward: %f 365 -1.0 
ep %d: game finished, reward: %f 365 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.42372899329959
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 1.0  !!!!!!!!
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep %d: game finished, reward: %f 366 -1.0 
ep 

ep %d: game finished, reward: %f 373 -1.0 
ep %d: game finished, reward: %f 373 -1.0 
ep %d: game finished, reward: %f 373 -1.0 
ep %d: game finished, reward: %f 373 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.410896591437492
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 1.0  !!!!!!!!
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 1.0  !!!!!!!!
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 -1.0 
ep %d: game finished, reward: %f 374 

ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 -1.0 
ep %d: game finished, reward: %f 381 1.0  !!!!!!!!
resetting env. episode reward total was %f. running mean: %f -18.0 -20.330616214043328
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep %d: game finished, reward: %f 382 -1.0 
ep

ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
ep %d: game finished, reward: %f 389 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.343607901378395
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 1.0  !!!!!!!!
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 1.0  !!!!!!!!
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 -1.0 
ep %d: game finished, reward: %f 390 

ep %d: game finished, reward: %f 397 -1.0 
ep %d: game finished, reward: %f 397 -1.0 
ep %d: game finished, reward: %f 397 -1.0 
ep %d: game finished, reward: %f 397 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.366166466075377
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 1.0  !!!!!!!!
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep %d: game finished, reward: %f 398 -1.0 
ep

ep %d: game finished, reward: %f 405 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.386398014447476
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: game finished, reward: %f 406 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 413 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.375379202641966
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 1.0  !!!!!!!!
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep %d: game finished, reward: %f 414 -1.0 
ep

resetting env. episode reward total was %f. running mean: %f -21.0 -20.375496959130466
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
ep %d: game finished, reward: %f 422 -1.0 
resetting 

ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
ep %d: game finished, reward: %f 430 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.410579087480638
ep %d: game finished, reward: %f 431 -1.0 
ep %d: game finished, reward: %f 431 -1.0 
ep %d: game finished, reward: %f 431 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 1.0  !!!!!!!!
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 1.0  !!!!!!!!
ep %d: game finished, reward: %f 438 -1.0 
ep %d: game finished, reward: %f 438 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.3974921891899
ep %d: game finished, reward: %f 439 -1.0 
ep %d: game finished, reward: %f 439 -1.0 
ep %d: game finished, reward: %f 439 -1

ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
ep %d: game finished, reward: %f 446 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.424633134223605
ep %d: game finished, reward: %f 447 -1.0 
ep %d: game finished, reward: %f 447 -1.0 
ep %d: game finished, reward: %f 447 -1.0 
ep %d: game finished, reward: %f 447 -1.0 
ep %d: game finished, reward: %f 447 -1.0 
ep %d: game finished, reward: %f 447 -1.0 
ep %d: game finished, reward: %f 447 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
ep %d: game finished, reward: %f 454 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.450252723277142
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: game finished, reward: %f 455 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 462 -1.0 
ep %d: game finished, reward: %f 462 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.482723617127785
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 1.0  !!!!!!!!
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep %d: game finished, reward: %f 463 -1.0 
ep

ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
ep %d: game finished, reward: %f 471 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.499203208641724
ep %d: game finished, reward: %f 472 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 1.0  !!!!!!!!
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 1.0  !!!!!!!!
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
ep %d: game finished, reward: %f 479 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.47975482529562
ep %d: game finished, reward: %f 480 -

ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 1.0  !!!!!!!!
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
ep %d: game finished, reward: %f 487 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.462189416787652
ep %d: game finished, reward: %f 488 -1.0 
ep %d: game finished, reward: %f 488 -1.0 
ep

ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
ep %d: game finished, reward: %f 495 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.456372079772525
ep %d: game finished, reward: %f 496 -1.0 
ep %d: game finished, reward: %f 496 -1.0 
ep %d: game finished, reward: %f 496 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
ep %d: game finished, reward: %f 503 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.429562260767216
ep %d: game finished, reward: %f 504 -1.0 
ep %d: game finished, reward: %f 504 -1.0 
ep %d: game finished, reward: %f 504 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
ep %d: game finished, reward: %f 511 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.435195147648407
ep %d: game finished, reward: %f 512 -1.0 
ep %d: game finished, reward: %f 512 -1.0 
ep %d: game finished, reward: %f 512 -1.0 
ep %d: game finished, reward: %f 512 -1.0 
ep %d: game finished, reward: %f 512 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
ep %d: game finished, reward: %f 519 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.440293864132336
ep %d: game finished, reward: %f 520 -1.0 
ep %d: game finished, reward: %f 520 -1.0 
ep %d: game finished, reward: %f 520 -1.0 
ep %d: game finished, reward: %f 520 -1.0 
ep %d: game finished, reward: %f 520 -1.0 
ep %d: game finished, reward: %f 520 -1.0 
ep %d: game finished, reward: %f 520 1.0  !!!!!!!!
ep %d: game finished, reward: %f 520 1.0  !!!

ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
ep %d: game finished, reward: %f 527 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.388031560059137
ep %d: game finished, reward: %f 528 -1.0 
ep %d: game finished, reward: %f 528 -1.0 
ep %d: game finished, reward: %f 528 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 1.0  !!!!!!!!
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
ep %d: game finished, reward: %f 535 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.38726784569922
ep %d: game finished, reward: %f 536 -1.0 
ep %d: game finished, reward: %f 536 -1.0 
ep %d: game finished, reward: %f 536 -1.0 
ep %d: game finished, reward: %f 536 -1.0 
ep 

ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
ep %d: game finished, reward: %f 543 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.33765613282156
ep %d: game finished, reward: %f 544 -1.0 
ep %d: game

ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 1.0  !!!!!!!!
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
ep %d: game finished, reward: %f 551 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.32069116669615
ep 

resetting env. episode reward total was %f. running mean: %f -21.0 -20.299005123857338
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 1.0  !!!!!!!!
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep %d: game finished, reward: %f 559 -1.0 
ep

resetting env. episode reward total was %f. running mean: %f -20.0 -20.295990639753114
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
ep %d: game finished, reward: %f 567 -1.0 
resetting 

ep %d: game finished, reward: %f 574 -1.0 
ep %d: game finished, reward: %f 574 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.273608932424562
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: game finished, reward: %f 575 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 582 -1.0 
ep %d: game finished, reward: %f 582 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.261501744720924
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 1.0  !!!!!!!!
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep %d: game finished, reward: %f 583 -1.0 
ep

ep %d: game finished, reward: %f 590 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.27079555464481
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 1.0  !!!!!!!!
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 -1.0 
ep %d: game finished, reward: %f 591 1.0  !!!!

ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
ep %d: game finished, reward: %f 599 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.295604268337744
ep %d: game finished, reward: %f 600 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 1.0  !!!!!!!!
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
ep %d: game finished, reward: %f 607 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.301593883837967
ep %d: game finished, reward: %f 608 -1.0 
ep %d: game finished, reward: %f 608 -1.0 
ep

ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 1.0  !!!!!!!!
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
ep %d: game finished, reward: %f 615 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.316334660261465
ep %d: game finished, reward: %f 616 -1.0 
ep %d: game finished, reward: %f 616 -1.0 
ep %d: game finished, reward: %f 616 -1.0 
ep %d: game finished, reward: %f 616 -1.0 
ep

ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 1.0  !!!!!!!!
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
ep %d: game finished, reward: %f 623 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.320428643994006
ep %d: game finished, reward: %f 624 -1.0 
ep %d: game finished, reward: %f 624 -1.0 
ep %d: game finished, reward: %f 624 -1.0 
ep %d: game finished, reward: %f 624 -1.0 
ep %d: game finished, reward: %f 624 -1.0 
ep %d: game finished, reward: %f 624 -1.0 
ep

ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 1.0  !!!!!!!!
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 1.0  !!!!!!!!
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 1.0  !!!!!!!!
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
ep %d: game finished, reward: %f 631 -1.0 
resetting env. episode reward total was %f. running mean: %f -18.0 -20.285090544268268
ep %d: game finished, reward: %f 632 -1.0 
ep %d: game finished, reward: %f 632 -1.0 
ep %d: game finished, reward:

ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
ep %d: game finished, reward: %f 639 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.27257767765411
ep %d: game finished, reward: %f 640 -1.0 
ep %d: game

ep %d: game finished, reward: %f 646 -1.0 
ep %d: game finished, reward: %f 646 -1.0 
ep %d: game finished, reward: %f 646 -1.0 
ep %d: game finished, reward: %f 646 -1.0 
ep %d: game finished, reward: %f 646 -1.0 
ep %d: game finished, reward: %f 646 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.185343386759342
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: game finished, reward: %f 647 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 654 -1.0 
ep %d: game finished, reward: %f 654 -1.0 
ep %d: game finished, reward: %f 654 -1.0 
ep %d: game finished, reward: %f 654 -1.0 
ep %d: game finished, reward: %f 654 -1.0 
resetting env. episode reward total was %f. running mean: %f -19.0 -20.19916801215157
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 1.0  !!!!!!!!
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep %d: game finished, reward: %f 655 -1.0 
ep 

ep %d: game finished, reward: %f 662 1.0  !!!!!!!!
ep %d: game finished, reward: %f 662 -1.0 
ep %d: game finished, reward: %f 662 -1.0 
ep %d: game finished, reward: %f 662 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.20386647461961
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 -1.0 
ep %d: game finished, reward: %f 663 1.0  !!!!

ep %d: game finished, reward: %f 670 -1.0 
ep %d: game finished, reward: %f 670 -1.0 
ep %d: game finished, reward: %f 670 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.207427608325965
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: game finished, reward: %f 671 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
ep %d: game finished, reward: %f 679 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.25646746032642
ep %d: game finished, reward: %f 680 -1.0 
ep %d: game

ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
ep %d: game finished, reward: %f 687 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.275483512883692
ep %d: game finished, reward: %f 688 -1.0 
ep %d: game finished, reward: %f 688 -1.0 
ep %d: game finished, reward: %f 688 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 1.0  !!!!!!!!
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
ep %d: game finished, reward: %f 695 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.30243645448988
ep %d: game finished, reward: %f 696 -1.0 
ep %d: game finished, reward: %f 696 -1.0 
ep %d: game finished, reward: %f 696 -1.0 
ep %d: game finished, reward: %f 696 -1.0 
ep %d: game finished, reward: %f 696 -1.0 
ep %d: game finished, reward: %f 696 -1.0 
ep %d: game finished, reward: %f 696 -1.0 
ep 

ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
ep %d: game finished, reward: %f 703 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.298198217660197
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: game finished, reward: %f 704 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
ep %d: game finished, reward: %f 711 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.333585574827822
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: game finished, reward: %f 712 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 719 -1.0 
ep %d: game finished, reward: %f 719 -1.0 
ep %d: game finished, reward: %f 719 -1.0 
ep %d: game finished, reward: %f 719 -1.0 
ep %d: game finished, reward: %f 719 -1.0 
ep %d: game finished, reward: %f 719 -1.0 
ep %d: game finished, reward: %f 719 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.326454704682146
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: game finished, reward: %f 720 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 727 -1.0 
ep %d: game finished, reward: %f 727 -1.0 
ep %d: game finished, reward: %f 727 -1.0 
ep %d: game finished, reward: %f 727 -1.0 
ep %d: game finished, reward: %f 727 -1.0 
ep %d: game finished, reward: %f 727 -1.0 
ep %d: game finished, reward: %f 727 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.320746217315484
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: game finished, reward: %f 728 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 735 -1.0 
ep %d: game finished, reward: %f 735 -1.0 
ep %d: game finished, reward: %f 735 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.34400928535877
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game finished, reward: %f 736 -1.0 
ep %d: game

resetting env. episode reward total was %f. running mean: %f -20.0 -20.356348544983806
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 1.0  !!!!!!!!
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep %d: game finished, reward: %f 744 -1.0 
ep

ep %d: game finished, reward: %f 751 -1.0 
ep %d: game finished, reward: %f 751 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.328233580345927
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 1.0  !!!!!!!!
ep %d: game finished, reward: %f 752 -1.0 
ep %d: game finished, reward: %f 752 -1.0 
ep

ep %d: game finished, reward: %f 759 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.322777745196287
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: game finished, reward: %f 760 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 767 1.0  !!!!!!!!
ep %d: game finished, reward: %f 767 -1.0 
ep %d: game finished, reward: %f 767 -1.0 
ep %d: game finished, reward: %f 767 -1.0 
ep %d: game finished, reward: %f 767 1.0  !!!!!!!!
ep %d: game finished, reward: %f 767 -1.0 
resetting env. episode reward total was %f. running mean: %f -17.0 -20.267363055737352
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 -1.0 
ep %d: game finished, reward: %f 768 

ep %d: game finished, reward: %f 775 -1.0 
ep %d: game finished, reward: %f 775 -1.0 
ep %d: game finished, reward: %f 775 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.285244296140757
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 1.0  !!!!!!!!
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep %d: game finished, reward: %f 776 -1.0 
ep

ep %d: game finished, reward: %f 783 -1.0 
ep %d: game finished, reward: %f 783 -1.0 
ep %d: game finished, reward: %f 783 -1.0 
ep %d: game finished, reward: %f 783 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.273009650879704
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 1.0  !!!!!!!!
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep %d: game finished, reward: %f 784 -1.0 
ep

ep %d: game finished, reward: %f 791 -1.0 
ep %d: game finished, reward: %f 791 -1.0 
ep %d: game finished, reward: %f 791 -1.0 
ep %d: game finished, reward: %f 791 -1.0 
ep %d: game finished, reward: %f 791 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.261421304991806
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 1.0  !!!!!!!!
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep %d: game finished, reward: %f 792 -1.0 
ep

ep %d: game finished, reward: %f 799 1.0  !!!!!!!!
ep %d: game finished, reward: %f 799 -1.0 
ep %d: game finished, reward: %f 799 -1.0 
ep %d: game finished, reward: %f 799 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.27053401318662
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep %d: game finished, reward: %f 800 -1.0 
ep 

ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
ep %d: game finished, reward: %f 808 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.324392792649025
ep %d: game finished, reward: %f 809 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
ep %d: game finished, reward: %f 816 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.338446530412607
ep %d: game finished, reward: %f 817 -1.0 
ep %d: game finished, reward: %f 817 -1.0 
ep %d: game finished, reward: %f 817 1.0  !!!!!!!!
ep

resetting env. episode reward total was %f. running mean: %f -21.0 -20.286630610527926
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 1.0  !!!!!!!!
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 1.0  !!!!!!!!
ep %d: game finished, reward: %f 824 -1.0 
ep %d: game finished, reward: %f 824 

ep %d: game finished, reward: %f 831 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.274090923639218
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: game finished, reward: %f 832 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
ep %d: game finished, reward: %f 840 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.30775761843951
ep %d: game finished, reward: %f 841 -1.0 
ep %d: game

ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
ep %d: game finished, reward: %f 848 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.34221337167784
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game finished, reward: %f 849 -1.0 
ep %d: game

ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
ep %d: game finished, reward: %f 856 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.334897285662077
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: game finished, reward: %f 857 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
ep %d: game finished, reward: %f 864 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.348039544618004
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: game finished, reward: %f 865 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 872 -1.0 
ep %d: game finished, reward: %f 872 -1.0 
ep %d: game finished, reward: %f 872 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.38860594881945
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game finished, reward: %f 873 -1.0 
ep %d: game

ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
ep %d: game finished, reward: %f 881 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.422265187743662
ep %d: game finished, reward: %f 882 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 889 1.0  !!!!!!!!
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
ep %d: game finished, reward: %f 889 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.42855685213107
ep %d: game finished, reward: %f 890 -1.0 
ep %d: game finished, reward: %f 890 -1.0 
ep %d: game finished, reward: %f 890 -1.0 
ep 

ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
ep %d: game finished, reward: %f 897 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.406605284506536
ep %d: game finished, reward: %f 898 -1.0 
ep %d: game finished, reward: %f 898 -1.0 
ep %d: game finished, reward: %f 898 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 905 1.0  !!!!!!!!
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
ep %d: game finished, reward: %f 905 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.35432930407784
ep 

ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
ep %d: game finished, reward: %f 913 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.375870336513184
ep %d: game finished, reward: %f 914 -1.0 
ep %d: game finished, reward: %f 914 -1.0 
ep %d: game finished, reward: %f 914 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 1.0  !!!!!!!!
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
ep %d: game finished, reward: %f 921 -1.0 
resetting env. episode reward total was %f. running mean: %f -20.0 -20.39506402090339
ep %d: game finished, reward: %f 922 -1.0 
ep %d: game finished, reward: %f 922 -1.0 
ep %d: game finished, reward: %f 922 -1.0 
ep %d: game finished, reward: %f 922 -1.0 
ep %d: game finished, reward: %f 922 -1.0 
ep %d: game finished, reward: %f 922 -1.0 
ep %d: game finished, reward: %f 922 -1.0 
ep 

ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
ep %d: game finished, reward: %f 929 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.394053070641974
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: game finished, reward: %f 930 -1.0 
ep %d: gam

ep %d: game finished, reward: %f 937 -1.0 
ep %d: game finished, reward: %f 937 -1.0 
ep %d: game finished, reward: %f 937 -1.0 
ep %d: game finished, reward: %f 937 -1.0 
ep %d: game finished, reward: %f 937 -1.0 
ep %d: game finished, reward: %f 937 -1.0 
ep %d: game finished, reward: %f 937 -1.0 
resetting env. episode reward total was %f. running mean: %f -21.0 -20.42174403235093
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game finished, reward: %f 938 -1.0 
ep %d: game