In [1]:
import numpy as np
from PIL import Image

In [2]:

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

import argparse
import time
import numpy as np
import collections

from tensorboardX import SummaryWriter

In [3]:
import gym

env = gym.make("FetchReach-v1")
DEFAULT_ENV_NAME="FetchReach-v1"

  result = entry_point.load(False)


In [4]:
print(env.action_space)
print(env.action_space.sample())

Box(4,)
[0.09762701 0.43037874 0.20552675 0.08976637]


In [None]:
print(env.observation_space)

Dict(achieved_goal:Box(3,), desired_goal:Box(3,), observation:Box(10,))


In [None]:
class DQN(nn.Module):

    def __init__(self):
        super(DQN, self).__init__()
        # 4 input image channel, 32 output channels, 3x3 square convolution
        # kernel
        self.pipe = nn.Sequential(
            nn.Linear(16,128),
            nn.ReLU(),
            nn.Linear(128,256),
            nn.ReLU(),
            nn.Linear(256,4)
        )

    def forward(self, x):  
        x= x.float()
        return self.pipe(x)


In [None]:
GAMMA = 0.99
BATCH_SIZE = 32
REPLAY_SIZE = 10000
REPLAY_START_SIZE = 10000
LEARNING_RATE = 1e-4
SYNC_TARGET_FRAMES = 1000

In [None]:
#epsilon decay, for choosing random actions to explore space
EPSILON_DECAY_LAST_FRAME = 10**5
EPSILON_START = 1.0
EPSILON_FINAL = 0.02
#Going from 1 to .02 in 100000 frames
MEAN_REWARD_BOUND=50

In [None]:
#define what experience is
Experience = collections.namedtuple("Experience", 
                        field_names = ['state','action','reward', 'done','new_state'])

class ExperienceBuffer:
     #This acts as our replay buffer
    def __init__(self,capacity):
        self.buffer = collections.deque(maxlen=capacity) #rotating buffer
    
    def __len__(self):
        return len(self.buffer)
    
    def append(self, experience):
        self.buffer.append(experience)
        
    def sample(self,batch_size):
        indices = np.random.choice(
            len(self.buffer), batch_size, replace =False)
        
        states,actions,rewards,dones, next_states = \
            zip(*[self.buffer[idx] for idx in indices])
        
        states,actions = np.array(states),np.array(actions)
        rewards = np.array(rewards,dtype=np.float32)
        dones =np.array(dones, dtype=np.float32)
        return states,actions,rewards,dones,next_states

In [None]:
#make our agent that acts and learns in the environment
class Agent:
    def __init__(self, env, exp_buffer):
        self.env = env
        self.exp_buffer = exp_buffer
        self._reset()
        
    def _reset(self):
        state = env.reset()
        
        
        self.state = np.concatenate(list(state.values()))
        self.total_reward = 0.0
    
    def play_step(self, net, epsilong=0.0, device ='cpu'):
        done_reward = None
        
        if np.random.random() < epsilon:
            action = env.action_space.sample()
        else:
            state_a = np.array([self.state], copy=False)
            state_v = torch.tensor(state_a).to(device)
            q_vals_v =  net(state_v)
            _, act_v = torch.max(q_vals_v,dim=1)
            
            #print("whats going on here?")
            action = int(act_v.item())
            
            #take the action and update
            #env.show()
            new_state, reward, is_done, _ = self.env.step(action)  
            new_state = np.concatenate(list(new_state.values()))
            reward +=1.0
            self.total_reward += reward
            #do we need this? is it doing a copy?
            new_state = new_state
            
            exp = Experience(self.state,action,reward,is_done,new_state)
            
            self.exp_buffer.append(exp)
            self.state = new_state
            
            if(is_done):
                done_reward = self.total_reward
                self._reset()
            return done_reward
        
def calc_loss(batch, net, target_net, device='cpu'):
    states,actions,rewards,dones,next_states = batch
    
    
    states_v = torch.tensor(states).to(device)
    next_states_v = torch.tensor(next_states).to(device)
    actions_v = torch.tensor(actions).to(device)
    rewards_v = torch.tensor(rewards).to(device)
    done_mask = torch.ByteTensor(dones).to(device)

    #predictions
    #look into gather further
    state_action_values = net(states_v).gather(1,actions_v.unsqueeze(-1)).squeeze(-1)

    #what is the value of being in the state that our action took us to?
    next_state_values = target_net(next_states_v).max(1)[0] #take the max along the first axis


    # saying the reward of the step after finishing is zero
    # required to converege
    next_state_values[done_mask] = 0.0

    #we do not want to perform backprop on this 
    next_state_values = next_state_values.detach() 

    #-------------Bellman equation------------------
    expected_state_action_values = next_state_values * GAMMA + rewards_v

    return nn.MSELoss()(state_action_values, expected_state_action_values)

In [None]:
if __name__ == "__main__":
    #parser = argparse.ArgumentParser()
    #parser.add_argument("--cude", default=False, action='store_true', help="Enable cuda")
    
    #parser.add_argument("--env", default= DEFAULT_ENV_NAME, 
    #                    help="name of the environment, default="+DEFAULT_ENV_NAME)
    
    #parser.add_argument("--reward", type=float, default=MEAN_REWARD_BOUND, 
    #                    help="Mean reward boundary for stop of training, default= %.2f"% MEAN_REWARD_BOUND)
    
    #args = parser.parse_args()
    args= [False,DEFAULT_ENV_NAME,MEAN_REWARD_BOUND]
    #device= torch.device("cuda" if args.cuda else "cpu")
    device = 'cpu'
    
    env = gym.make(DEFAULT_ENV_NAME)#args.env)
    net = DQN().to(device)
    
    target_net = DQN().to(device)
    
    writer = SummaryWriter(comment='-'+DEFAULT_ENV_NAME)#args.env)
    print(net)
    
    buffer = ExperienceBuffer(REPLAY_SIZE)
    agent = Agent(env, buffer)
    
    epsilon = EPSILON_START
    
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)
    total_rewards = []
    frame_idx = 0
    ts_frame = 0
    ts = time.time()
    
    best_mean_reward = None
    
    while True:
        frame_idx += 1
        epsilon = max(EPSILON_FINAL, EPSILON_START - (frame_idx / EPSILON_DECAY_LAST_FRAME))
        
        reward = agent.play_step(net, epsilon, device=device)
        if reward is not None:
            total_rewards.append(reward)
            speed = (frame_idx - ts_frame) / (time.time() - ts)
            ts_frame = frame_idx
            ts = time.time()
            
            mean_reward = np.mean(total_rewards[-100:])
            print("%d: done %d games, mean reward %.3f, eps %.2f, speed %.2f f/s" %(
                frame_idx, len(total_rewards), mean_reward, epsilon, speed
            ))
            
            writer.add_scalar("epsilon", epsilon, frame_idx)
            writer.add_scalar("speed", speed, frame_idx)
            writer.add_scalar("reward_100", mean_reward, frame_idx)
            writer.add_scalar("reward", reward, frame_idx)
            
            if best_mean_reward  is None or best_mean_reward < mean_reward:
                torch.save(net.state_dict(), DEFAULT_ENV_NAME+"-best.dat")
                if(best_mean_reward is not None):
                    print('best mean reward updated %.3f -> %.3f, model saved'%
                             (best_mean_reward, mean_reward))
                    best_mean_reward = mean_reward
                if(mean_reward > MEAN_REWARD_BOUND):
                    print("solved in %d frames!" % frame_idx)
                    break
                
            if(len(buffer) < REPLAY_START_SIZE):
                continue
            
            if(frame_idx % SYNC_TARGET_FRAMES == 0):
                target_net.load_state_dict(net.state_dict())
            
            optimizer.zero_grad()
            batch = buffer.sample(BATCH_SIZE)
            loss_t = calc_loss(batch, net, target_net, device=device)
            
            loss_t.backward()
            optimizer.step()

DQN(
  (pipe): Sequential(
    (0): Linear(in_features=16, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=4, bias=True)
  )
)
3718: done 1 games, mean reward 0.000, eps 0.96, speed 21183.70 f/s
4767: done 2 games, mean reward 0.500, eps 0.95, speed 9019.97 f/s
5606: done 3 games, mean reward 0.333, eps 0.94, speed 8220.09 f/s
6450: done 4 games, mean reward 0.250, eps 0.94, speed 8127.77 f/s
7146: done 5 games, mean reward 0.400, eps 0.93, speed 6285.20 f/s
7945: done 6 games, mean reward 0.333, eps 0.92, speed 6894.99 f/s
8480: done 7 games, mean reward 0.429, eps 0.92, speed 4919.51 f/s
9052: done 8 games, mean reward 0.375, eps 0.91, speed 5485.77 f/s
9647: done 9 games, mean reward 0.333, eps 0.90, speed 5484.47 f/s
10036: done 10 games, mean reward 0.500, eps 0.90, speed 3993.85 f/s
10431: done 11 games, mean reward 0.455, eps 0.90, speed 3693.94 f/s
10886: don

34638: done 119 games, mean reward 0.110, eps 0.65, speed 1617.72 f/s
34776: done 120 games, mean reward 0.110, eps 0.65, speed 1499.37 f/s
34952: done 121 games, mean reward 0.110, eps 0.65, speed 1806.24 f/s
35122: done 122 games, mean reward 0.110, eps 0.65, speed 1584.34 f/s
35245: done 123 games, mean reward 0.110, eps 0.65, speed 1270.93 f/s
35391: done 124 games, mean reward 0.110, eps 0.65, speed 1590.61 f/s
35534: done 125 games, mean reward 0.110, eps 0.64, speed 1276.41 f/s
35674: done 126 games, mean reward 0.110, eps 0.64, speed 1533.58 f/s
35840: done 127 games, mean reward 0.110, eps 0.64, speed 1811.77 f/s
35986: done 128 games, mean reward 0.110, eps 0.64, speed 1400.31 f/s
36108: done 129 games, mean reward 0.110, eps 0.64, speed 1342.80 f/s
36245: done 130 games, mean reward 0.110, eps 0.64, speed 1501.40 f/s
36374: done 131 games, mean reward 0.110, eps 0.64, speed 1231.70 f/s
36478: done 132 games, mean reward 0.120, eps 0.64, speed 1141.56 f/s
36609: done 133 game

48826: done 237 games, mean reward 0.040, eps 0.51, speed 958.98 f/s
48919: done 238 games, mean reward 0.040, eps 0.51, speed 652.32 f/s
49016: done 239 games, mean reward 0.040, eps 0.51, speed 897.57 f/s
49104: done 240 games, mean reward 0.040, eps 0.51, speed 810.43 f/s
49195: done 241 games, mean reward 0.040, eps 0.51, speed 547.34 f/s
49298: done 242 games, mean reward 0.040, eps 0.51, speed 1034.37 f/s
49397: done 243 games, mean reward 0.040, eps 0.51, speed 1024.81 f/s
49505: done 244 games, mean reward 0.030, eps 0.50, speed 1019.94 f/s
49616: done 245 games, mean reward 0.030, eps 0.50, speed 1009.43 f/s
49722: done 246 games, mean reward 0.030, eps 0.50, speed 978.85 f/s
49822: done 247 games, mean reward 0.030, eps 0.50, speed 772.86 f/s
49924: done 248 games, mean reward 0.030, eps 0.50, speed 955.25 f/s
50021: done 249 games, mean reward 0.030, eps 0.50, speed 851.98 f/s
50124: done 250 games, mean reward 0.030, eps 0.50, speed 1035.88 f/s
50218: done 251 games, mean r

59706: done 356 games, mean reward 0.030, eps 0.40, speed 729.53 f/s
59790: done 357 games, mean reward 0.030, eps 0.40, speed 828.14 f/s
59870: done 358 games, mean reward 0.030, eps 0.40, speed 789.91 f/s
59953: done 359 games, mean reward 0.020, eps 0.40, speed 762.10 f/s
60023: done 360 games, mean reward 0.020, eps 0.40, speed 752.56 f/s
60110: done 361 games, mean reward 0.020, eps 0.40, speed 834.28 f/s
60208: done 362 games, mean reward 0.020, eps 0.40, speed 1015.18 f/s
60291: done 363 games, mean reward 0.020, eps 0.40, speed 771.84 f/s
60373: done 364 games, mean reward 0.020, eps 0.40, speed 788.97 f/s
60459: done 365 games, mean reward 0.020, eps 0.40, speed 871.92 f/s
60549: done 366 games, mean reward 0.020, eps 0.39, speed 689.78 f/s
60629: done 367 games, mean reward 0.020, eps 0.39, speed 756.34 f/s
60709: done 368 games, mean reward 0.020, eps 0.39, speed 691.34 f/s
60795: done 369 games, mean reward 0.020, eps 0.39, speed 806.34 f/s
60871: done 370 games, mean rewar

69032: done 477 games, mean reward 0.060, eps 0.31, speed 609.43 f/s
69106: done 478 games, mean reward 0.060, eps 0.31, speed 753.21 f/s
69172: done 479 games, mean reward 0.060, eps 0.31, speed 606.63 f/s
69244: done 480 games, mean reward 0.060, eps 0.31, speed 485.02 f/s
69315: done 481 games, mean reward 0.060, eps 0.31, speed 544.94 f/s
69387: done 482 games, mean reward 0.060, eps 0.31, speed 599.03 f/s
69466: done 483 games, mean reward 0.060, eps 0.31, speed 762.37 f/s
69534: done 484 games, mean reward 0.060, eps 0.30, speed 622.35 f/s
69598: done 485 games, mean reward 0.060, eps 0.30, speed 651.79 f/s
69675: done 486 games, mean reward 0.060, eps 0.30, speed 709.58 f/s
69754: done 487 games, mean reward 0.060, eps 0.30, speed 808.02 f/s
69832: done 488 games, mean reward 0.060, eps 0.30, speed 659.91 f/s
69903: done 489 games, mean reward 0.060, eps 0.30, speed 655.72 f/s
69971: done 490 games, mean reward 0.060, eps 0.30, speed 698.39 f/s
70043: done 491 games, mean reward

77215: done 596 games, mean reward 0.100, eps 0.23, speed 597.93 f/s
77272: done 597 games, mean reward 0.100, eps 0.23, speed 579.35 f/s
77329: done 598 games, mean reward 0.100, eps 0.23, speed 613.13 f/s
77394: done 599 games, mean reward 0.100, eps 0.23, speed 617.75 f/s
77464: done 600 games, mean reward 0.100, eps 0.23, speed 749.90 f/s
77537: done 601 games, mean reward 0.100, eps 0.22, speed 749.44 f/s
77598: done 602 games, mean reward 0.100, eps 0.22, speed 588.62 f/s
77665: done 603 games, mean reward 0.100, eps 0.22, speed 713.87 f/s
77739: done 604 games, mean reward 0.100, eps 0.22, speed 798.93 f/s
77806: done 605 games, mean reward 0.100, eps 0.22, speed 623.50 f/s
77872: done 606 games, mean reward 0.120, eps 0.22, speed 690.83 f/s
77939: done 607 games, mean reward 0.120, eps 0.22, speed 669.97 f/s
78002: done 608 games, mean reward 0.120, eps 0.22, speed 581.76 f/s
78066: done 609 games, mean reward 0.120, eps 0.22, speed 671.58 f/s
78130: done 610 games, mean reward

84614: done 716 games, mean reward 0.060, eps 0.15, speed 558.91 f/s
84682: done 717 games, mean reward 0.060, eps 0.15, speed 732.92 f/s
84741: done 718 games, mean reward 0.060, eps 0.15, speed 624.94 f/s
84804: done 719 games, mean reward 0.060, eps 0.15, speed 567.24 f/s
84864: done 720 games, mean reward 0.060, eps 0.15, speed 644.66 f/s
84918: done 721 games, mean reward 0.060, eps 0.15, speed 388.52 f/s
84981: done 722 games, mean reward 0.060, eps 0.15, speed 533.57 f/s
85042: done 723 games, mean reward 0.060, eps 0.15, speed 569.59 f/s
85100: done 724 games, mean reward 0.060, eps 0.15, speed 579.11 f/s
85161: done 725 games, mean reward 0.060, eps 0.15, speed 606.67 f/s
85219: done 726 games, mean reward 0.060, eps 0.15, speed 550.80 f/s
85274: done 727 games, mean reward 0.060, eps 0.15, speed 539.08 f/s
85332: done 728 games, mean reward 0.060, eps 0.15, speed 550.10 f/s
85393: done 729 games, mean reward 0.060, eps 0.15, speed 312.33 f/s
85448: done 730 games, mean reward

91353: done 835 games, mean reward 0.100, eps 0.09, speed 525.42 f/s
91409: done 836 games, mean reward 0.100, eps 0.09, speed 606.99 f/s
91461: done 837 games, mean reward 0.100, eps 0.09, speed 565.32 f/s
91516: done 838 games, mean reward 0.100, eps 0.08, speed 531.84 f/s
91569: done 839 games, mean reward 0.100, eps 0.08, speed 515.37 f/s
91624: done 840 games, mean reward 0.100, eps 0.08, speed 595.37 f/s
91677: done 841 games, mean reward 0.090, eps 0.08, speed 494.59 f/s
91733: done 842 games, mean reward 0.090, eps 0.08, speed 607.83 f/s
91785: done 843 games, mean reward 0.090, eps 0.08, speed 564.69 f/s
91844: done 844 games, mean reward 0.090, eps 0.08, speed 575.86 f/s
91896: done 845 games, mean reward 0.090, eps 0.08, speed 566.00 f/s
91951: done 846 games, mean reward 0.090, eps 0.08, speed 596.54 f/s
92002: done 847 games, mean reward 0.090, eps 0.08, speed 480.45 f/s
92056: done 848 games, mean reward 0.090, eps 0.08, speed 568.28 f/s
92110: done 849 games, mean reward

97746: done 956 games, mean reward 0.090, eps 0.02, speed 499.54 f/s
97797: done 957 games, mean reward 0.090, eps 0.02, speed 550.36 f/s
97848: done 958 games, mean reward 0.090, eps 0.02, speed 493.41 f/s
97898: done 959 games, mean reward 0.090, eps 0.02, speed 458.12 f/s
97949: done 960 games, mean reward 0.090, eps 0.02, speed 511.64 f/s
97999: done 961 games, mean reward 0.090, eps 0.02, speed 460.04 f/s
98050: done 962 games, mean reward 0.090, eps 0.02, speed 440.57 f/s
98102: done 963 games, mean reward 0.090, eps 0.02, speed 492.33 f/s
98154: done 964 games, mean reward 0.090, eps 0.02, speed 498.98 f/s
98205: done 965 games, mean reward 0.090, eps 0.02, speed 529.70 f/s
98256: done 966 games, mean reward 0.090, eps 0.02, speed 491.26 f/s
98309: done 967 games, mean reward 0.090, eps 0.02, speed 542.06 f/s
98360: done 968 games, mean reward 0.090, eps 0.02, speed 532.03 f/s
98412: done 969 games, mean reward 0.090, eps 0.02, speed 479.74 f/s
98463: done 970 games, mean reward

103737: done 1073 games, mean reward 0.060, eps 0.02, speed 193.87 f/s
103789: done 1074 games, mean reward 0.060, eps 0.02, speed 211.97 f/s
103840: done 1075 games, mean reward 0.060, eps 0.02, speed 195.96 f/s
103892: done 1076 games, mean reward 0.060, eps 0.02, speed 221.98 f/s
103942: done 1077 games, mean reward 0.060, eps 0.02, speed 220.11 f/s
103993: done 1078 games, mean reward 0.060, eps 0.02, speed 194.05 f/s
104044: done 1079 games, mean reward 0.050, eps 0.02, speed 190.03 f/s
104099: done 1080 games, mean reward 0.050, eps 0.02, speed 260.63 f/s
104150: done 1081 games, mean reward 0.050, eps 0.02, speed 238.37 f/s
104201: done 1082 games, mean reward 0.050, eps 0.02, speed 227.80 f/s
104251: done 1083 games, mean reward 0.050, eps 0.02, speed 220.10 f/s
104304: done 1084 games, mean reward 0.050, eps 0.02, speed 201.27 f/s
104355: done 1085 games, mean reward 0.050, eps 0.02, speed 186.87 f/s
104405: done 1086 games, mean reward 0.050, eps 0.02, speed 222.05 f/s
104457

109668: done 1189 games, mean reward 0.060, eps 0.02, speed 436.50 f/s
109718: done 1190 games, mean reward 0.060, eps 0.02, speed 258.62 f/s
109769: done 1191 games, mean reward 0.060, eps 0.02, speed 280.43 f/s
109819: done 1192 games, mean reward 0.060, eps 0.02, speed 479.66 f/s
109870: done 1193 games, mean reward 0.060, eps 0.02, speed 496.48 f/s
109921: done 1194 games, mean reward 0.060, eps 0.02, speed 409.06 f/s
109971: done 1195 games, mean reward 0.060, eps 0.02, speed 285.06 f/s
110022: done 1196 games, mean reward 0.060, eps 0.02, speed 260.08 f/s
110073: done 1197 games, mean reward 0.060, eps 0.02, speed 496.05 f/s
110124: done 1198 games, mean reward 0.060, eps 0.02, speed 556.04 f/s
110177: done 1199 games, mean reward 0.060, eps 0.02, speed 575.72 f/s
110227: done 1200 games, mean reward 0.060, eps 0.02, speed 536.28 f/s
110278: done 1201 games, mean reward 0.060, eps 0.02, speed 524.96 f/s
110329: done 1202 games, mean reward 0.060, eps 0.02, speed 544.17 f/s
110380

115689: done 1307 games, mean reward 0.050, eps 0.02, speed 543.43 f/s
115741: done 1308 games, mean reward 0.050, eps 0.02, speed 563.63 f/s
115791: done 1309 games, mean reward 0.050, eps 0.02, speed 513.92 f/s
115842: done 1310 games, mean reward 0.050, eps 0.02, speed 512.09 f/s
115893: done 1311 games, mean reward 0.060, eps 0.02, speed 539.83 f/s
115943: done 1312 games, mean reward 0.060, eps 0.02, speed 527.68 f/s
115995: done 1313 games, mean reward 0.060, eps 0.02, speed 543.73 f/s
116045: done 1314 games, mean reward 0.060, eps 0.02, speed 530.57 f/s
116095: done 1315 games, mean reward 0.060, eps 0.02, speed 532.33 f/s
116145: done 1316 games, mean reward 0.060, eps 0.02, speed 526.14 f/s
116197: done 1317 games, mean reward 0.060, eps 0.02, speed 551.27 f/s
116247: done 1318 games, mean reward 0.060, eps 0.02, speed 531.20 f/s
116297: done 1319 games, mean reward 0.060, eps 0.02, speed 506.17 f/s
116347: done 1320 games, mean reward 0.060, eps 0.02, speed 518.44 f/s
116398

121653: done 1424 games, mean reward 0.080, eps 0.02, speed 541.43 f/s
121703: done 1425 games, mean reward 0.080, eps 0.02, speed 544.65 f/s
121755: done 1426 games, mean reward 0.080, eps 0.02, speed 565.20 f/s
121806: done 1427 games, mean reward 0.080, eps 0.02, speed 543.66 f/s
121859: done 1428 games, mean reward 0.080, eps 0.02, speed 574.05 f/s
121910: done 1429 games, mean reward 0.060, eps 0.02, speed 556.27 f/s
121960: done 1430 games, mean reward 0.060, eps 0.02, speed 533.30 f/s
122011: done 1431 games, mean reward 0.060, eps 0.02, speed 549.28 f/s
122061: done 1432 games, mean reward 0.060, eps 0.02, speed 514.51 f/s
122115: done 1433 games, mean reward 0.060, eps 0.02, speed 493.51 f/s
122167: done 1434 games, mean reward 0.070, eps 0.02, speed 470.43 f/s
122218: done 1435 games, mean reward 0.070, eps 0.02, speed 365.49 f/s
122269: done 1436 games, mean reward 0.070, eps 0.02, speed 472.24 f/s
122322: done 1437 games, mean reward 0.070, eps 0.02, speed 556.86 f/s
122374

127623: done 1541 games, mean reward 0.060, eps 0.02, speed 466.95 f/s
127674: done 1542 games, mean reward 0.060, eps 0.02, speed 484.30 f/s
127726: done 1543 games, mean reward 0.060, eps 0.02, speed 548.61 f/s
127776: done 1544 games, mean reward 0.060, eps 0.02, speed 489.23 f/s
127826: done 1545 games, mean reward 0.060, eps 0.02, speed 527.57 f/s
127878: done 1546 games, mean reward 0.060, eps 0.02, speed 542.62 f/s
127929: done 1547 games, mean reward 0.060, eps 0.02, speed 414.96 f/s
127980: done 1548 games, mean reward 0.060, eps 0.02, speed 392.17 f/s
128030: done 1549 games, mean reward 0.060, eps 0.02, speed 502.70 f/s
128080: done 1550 games, mean reward 0.060, eps 0.02, speed 468.44 f/s
128130: done 1551 games, mean reward 0.060, eps 0.02, speed 419.07 f/s
128181: done 1552 games, mean reward 0.060, eps 0.02, speed 265.05 f/s
128232: done 1553 games, mean reward 0.060, eps 0.02, speed 498.50 f/s
128286: done 1554 games, mean reward 0.060, eps 0.02, speed 541.86 f/s
128337

133600: done 1658 games, mean reward 0.090, eps 0.02, speed 509.94 f/s
133650: done 1659 games, mean reward 0.090, eps 0.02, speed 492.26 f/s
133700: done 1660 games, mean reward 0.090, eps 0.02, speed 527.52 f/s
133750: done 1661 games, mean reward 0.090, eps 0.02, speed 525.16 f/s
133802: done 1662 games, mean reward 0.090, eps 0.02, speed 549.45 f/s
133852: done 1663 games, mean reward 0.090, eps 0.02, speed 528.07 f/s
133902: done 1664 games, mean reward 0.090, eps 0.02, speed 524.17 f/s
133952: done 1665 games, mean reward 0.090, eps 0.02, speed 528.02 f/s
134004: done 1666 games, mean reward 0.090, eps 0.02, speed 547.83 f/s
134056: done 1667 games, mean reward 0.090, eps 0.02, speed 517.73 f/s
134108: done 1668 games, mean reward 0.090, eps 0.02, speed 546.04 f/s
134159: done 1669 games, mean reward 0.090, eps 0.02, speed 514.35 f/s
134211: done 1670 games, mean reward 0.090, eps 0.02, speed 539.08 f/s
134263: done 1671 games, mean reward 0.080, eps 0.02, speed 544.65 f/s
134314

139608: done 1776 games, mean reward 0.090, eps 0.02, speed 518.26 f/s
139659: done 1777 games, mean reward 0.090, eps 0.02, speed 549.69 f/s
139712: done 1778 games, mean reward 0.090, eps 0.02, speed 572.08 f/s
139763: done 1779 games, mean reward 0.090, eps 0.02, speed 544.90 f/s
139815: done 1780 games, mean reward 0.090, eps 0.02, speed 560.26 f/s
139867: done 1781 games, mean reward 0.090, eps 0.02, speed 561.45 f/s
139919: done 1782 games, mean reward 0.090, eps 0.02, speed 552.80 f/s
139970: done 1783 games, mean reward 0.090, eps 0.02, speed 548.99 f/s
140020: done 1784 games, mean reward 0.090, eps 0.02, speed 538.73 f/s
140071: done 1785 games, mean reward 0.090, eps 0.02, speed 521.41 f/s
140123: done 1786 games, mean reward 0.090, eps 0.02, speed 534.60 f/s
140173: done 1787 games, mean reward 0.090, eps 0.02, speed 543.58 f/s
140224: done 1788 games, mean reward 0.090, eps 0.02, speed 542.21 f/s
140274: done 1789 games, mean reward 0.070, eps 0.02, speed 544.40 f/s
140325

145578: done 1893 games, mean reward 0.070, eps 0.02, speed 299.77 f/s
145629: done 1894 games, mean reward 0.070, eps 0.02, speed 320.20 f/s
145680: done 1895 games, mean reward 0.070, eps 0.02, speed 289.04 f/s
145730: done 1896 games, mean reward 0.070, eps 0.02, speed 278.27 f/s
145780: done 1897 games, mean reward 0.070, eps 0.02, speed 392.78 f/s
145832: done 1898 games, mean reward 0.070, eps 0.02, speed 534.04 f/s
145883: done 1899 games, mean reward 0.070, eps 0.02, speed 509.06 f/s
145934: done 1900 games, mean reward 0.070, eps 0.02, speed 466.51 f/s
145985: done 1901 games, mean reward 0.070, eps 0.02, speed 538.57 f/s
146036: done 1902 games, mean reward 0.070, eps 0.02, speed 528.72 f/s
146087: done 1903 games, mean reward 0.070, eps 0.02, speed 448.09 f/s
146138: done 1904 games, mean reward 0.070, eps 0.02, speed 520.14 f/s
146189: done 1905 games, mean reward 0.070, eps 0.02, speed 512.19 f/s
146239: done 1906 games, mean reward 0.080, eps 0.02, speed 370.27 f/s
146290

151562: done 2010 games, mean reward 0.140, eps 0.02, speed 474.60 f/s
151613: done 2011 games, mean reward 0.140, eps 0.02, speed 534.52 f/s
151663: done 2012 games, mean reward 0.140, eps 0.02, speed 515.51 f/s
151715: done 2013 games, mean reward 0.140, eps 0.02, speed 523.25 f/s
151765: done 2014 games, mean reward 0.140, eps 0.02, speed 523.50 f/s
151816: done 2015 games, mean reward 0.140, eps 0.02, speed 507.45 f/s
151866: done 2016 games, mean reward 0.140, eps 0.02, speed 520.49 f/s
151916: done 2017 games, mean reward 0.140, eps 0.02, speed 523.64 f/s
151966: done 2018 games, mean reward 0.140, eps 0.02, speed 398.16 f/s
152018: done 2019 games, mean reward 0.130, eps 0.02, speed 354.17 f/s
152069: done 2020 games, mean reward 0.130, eps 0.02, speed 458.44 f/s
152119: done 2021 games, mean reward 0.130, eps 0.02, speed 489.03 f/s
152171: done 2022 games, mean reward 0.130, eps 0.02, speed 542.90 f/s
152223: done 2023 games, mean reward 0.130, eps 0.02, speed 542.42 f/s
152273

157476: done 2126 games, mean reward 0.060, eps 0.02, speed 437.26 f/s
157526: done 2127 games, mean reward 0.060, eps 0.02, speed 544.41 f/s
157577: done 2128 games, mean reward 0.060, eps 0.02, speed 555.46 f/s
157627: done 2129 games, mean reward 0.060, eps 0.02, speed 537.59 f/s
157678: done 2130 games, mean reward 0.060, eps 0.02, speed 553.37 f/s
157733: done 2131 games, mean reward 0.060, eps 0.02, speed 553.74 f/s
157784: done 2132 games, mean reward 0.060, eps 0.02, speed 543.30 f/s
157835: done 2133 games, mean reward 0.060, eps 0.02, speed 554.35 f/s
157887: done 2134 games, mean reward 0.060, eps 0.02, speed 566.45 f/s
157940: done 2135 games, mean reward 0.060, eps 0.02, speed 568.57 f/s
157991: done 2136 games, mean reward 0.060, eps 0.02, speed 555.75 f/s
158041: done 2137 games, mean reward 0.060, eps 0.02, speed 546.90 f/s
158092: done 2138 games, mean reward 0.040, eps 0.02, speed 548.20 f/s
158143: done 2139 games, mean reward 0.040, eps 0.02, speed 552.60 f/s
158195

163497: done 2244 games, mean reward 0.030, eps 0.02, speed 333.18 f/s
163549: done 2245 games, mean reward 0.030, eps 0.02, speed 310.11 f/s
163599: done 2246 games, mean reward 0.030, eps 0.02, speed 288.13 f/s
163649: done 2247 games, mean reward 0.030, eps 0.02, speed 263.88 f/s
163699: done 2248 games, mean reward 0.030, eps 0.02, speed 274.73 f/s
163750: done 2249 games, mean reward 0.030, eps 0.02, speed 337.34 f/s
163800: done 2250 games, mean reward 0.030, eps 0.02, speed 336.55 f/s
163851: done 2251 games, mean reward 0.030, eps 0.02, speed 372.65 f/s
163902: done 2252 games, mean reward 0.030, eps 0.02, speed 301.64 f/s
163955: done 2253 games, mean reward 0.020, eps 0.02, speed 319.67 f/s
164006: done 2254 games, mean reward 0.020, eps 0.02, speed 260.30 f/s
164056: done 2255 games, mean reward 0.020, eps 0.02, speed 247.70 f/s
164107: done 2256 games, mean reward 0.020, eps 0.02, speed 269.20 f/s
164157: done 2257 games, mean reward 0.020, eps 0.02, speed 427.40 f/s
164208

169408: done 2360 games, mean reward 0.070, eps 0.02, speed 536.13 f/s
169458: done 2361 games, mean reward 0.070, eps 0.02, speed 542.64 f/s
169510: done 2362 games, mean reward 0.070, eps 0.02, speed 544.42 f/s
169560: done 2363 games, mean reward 0.070, eps 0.02, speed 531.58 f/s
169610: done 2364 games, mean reward 0.070, eps 0.02, speed 541.47 f/s
169661: done 2365 games, mean reward 0.070, eps 0.02, speed 531.59 f/s
169711: done 2366 games, mean reward 0.070, eps 0.02, speed 527.54 f/s
169763: done 2367 games, mean reward 0.070, eps 0.02, speed 562.72 f/s
169815: done 2368 games, mean reward 0.070, eps 0.02, speed 564.75 f/s
169866: done 2369 games, mean reward 0.070, eps 0.02, speed 528.36 f/s
169917: done 2370 games, mean reward 0.070, eps 0.02, speed 537.10 f/s
169968: done 2371 games, mean reward 0.050, eps 0.02, speed 538.17 f/s
170019: done 2372 games, mean reward 0.050, eps 0.02, speed 515.81 f/s
170071: done 2373 games, mean reward 0.050, eps 0.02, speed 547.49 f/s
170123

175427: done 2478 games, mean reward 0.030, eps 0.02, speed 534.26 f/s
175478: done 2479 games, mean reward 0.030, eps 0.02, speed 353.29 f/s
175529: done 2480 games, mean reward 0.030, eps 0.02, speed 322.57 f/s
175579: done 2481 games, mean reward 0.030, eps 0.02, speed 271.97 f/s
175629: done 2482 games, mean reward 0.030, eps 0.02, speed 259.25 f/s
175681: done 2483 games, mean reward 0.030, eps 0.02, speed 555.13 f/s
175731: done 2484 games, mean reward 0.030, eps 0.02, speed 542.94 f/s
175783: done 2485 games, mean reward 0.030, eps 0.02, speed 553.38 f/s
175835: done 2486 games, mean reward 0.030, eps 0.02, speed 561.52 f/s
175888: done 2487 games, mean reward 0.030, eps 0.02, speed 535.91 f/s
175940: done 2488 games, mean reward 0.030, eps 0.02, speed 528.76 f/s
175994: done 2489 games, mean reward 0.030, eps 0.02, speed 579.64 f/s
176044: done 2490 games, mean reward 0.030, eps 0.02, speed 538.85 f/s
176094: done 2491 games, mean reward 0.030, eps 0.02, speed 530.01 f/s
176148

181389: done 2595 games, mean reward 0.080, eps 0.02, speed 420.46 f/s
181441: done 2596 games, mean reward 0.080, eps 0.02, speed 557.93 f/s
181492: done 2597 games, mean reward 0.080, eps 0.02, speed 407.23 f/s
181544: done 2598 games, mean reward 0.080, eps 0.02, speed 474.15 f/s
181594: done 2599 games, mean reward 0.080, eps 0.02, speed 450.23 f/s
181645: done 2600 games, mean reward 0.080, eps 0.02, speed 433.05 f/s
181695: done 2601 games, mean reward 0.080, eps 0.02, speed 493.14 f/s
181745: done 2602 games, mean reward 0.080, eps 0.02, speed 475.99 f/s
181795: done 2603 games, mean reward 0.080, eps 0.02, speed 524.08 f/s
181845: done 2604 games, mean reward 0.080, eps 0.02, speed 396.75 f/s
181898: done 2605 games, mean reward 0.080, eps 0.02, speed 553.81 f/s
181949: done 2606 games, mean reward 0.080, eps 0.02, speed 531.18 f/s
181999: done 2607 games, mean reward 0.080, eps 0.02, speed 433.41 f/s
182049: done 2608 games, mean reward 0.080, eps 0.02, speed 495.64 f/s
182100

187306: done 2711 games, mean reward 0.040, eps 0.02, speed 505.02 f/s
187356: done 2712 games, mean reward 0.040, eps 0.02, speed 469.93 f/s
187407: done 2713 games, mean reward 0.040, eps 0.02, speed 485.98 f/s
187457: done 2714 games, mean reward 0.040, eps 0.02, speed 407.49 f/s
187508: done 2715 games, mean reward 0.040, eps 0.02, speed 457.43 f/s
187559: done 2716 games, mean reward 0.040, eps 0.02, speed 519.17 f/s
187612: done 2717 games, mean reward 0.040, eps 0.02, speed 558.92 f/s
187663: done 2718 games, mean reward 0.060, eps 0.02, speed 353.93 f/s
187714: done 2719 games, mean reward 0.060, eps 0.02, speed 322.88 f/s
187764: done 2720 games, mean reward 0.060, eps 0.02, speed 298.95 f/s
187814: done 2721 games, mean reward 0.060, eps 0.02, speed 383.90 f/s
187864: done 2722 games, mean reward 0.060, eps 0.02, speed 323.20 f/s
187915: done 2723 games, mean reward 0.060, eps 0.02, speed 385.61 f/s
187965: done 2724 games, mean reward 0.060, eps 0.02, speed 471.25 f/s
188016

193256: done 2828 games, mean reward 0.050, eps 0.02, speed 392.46 f/s
193306: done 2829 games, mean reward 0.050, eps 0.02, speed 472.65 f/s
193356: done 2830 games, mean reward 0.050, eps 0.02, speed 525.44 f/s
193407: done 2831 games, mean reward 0.050, eps 0.02, speed 528.61 f/s
193458: done 2832 games, mean reward 0.050, eps 0.02, speed 478.39 f/s
193508: done 2833 games, mean reward 0.050, eps 0.02, speed 513.26 f/s
193558: done 2834 games, mean reward 0.050, eps 0.02, speed 496.56 f/s
193611: done 2835 games, mean reward 0.050, eps 0.02, speed 559.76 f/s
193662: done 2836 games, mean reward 0.050, eps 0.02, speed 511.24 f/s
193715: done 2837 games, mean reward 0.050, eps 0.02, speed 534.03 f/s
193765: done 2838 games, mean reward 0.050, eps 0.02, speed 504.32 f/s
193817: done 2839 games, mean reward 0.050, eps 0.02, speed 480.34 f/s
193867: done 2840 games, mean reward 0.050, eps 0.02, speed 519.59 f/s
193919: done 2841 games, mean reward 0.050, eps 0.02, speed 520.22 f/s
193969

199169: done 2944 games, mean reward 0.070, eps 0.02, speed 518.16 f/s
199219: done 2945 games, mean reward 0.070, eps 0.02, speed 543.95 f/s
199269: done 2946 games, mean reward 0.070, eps 0.02, speed 524.99 f/s
199319: done 2947 games, mean reward 0.070, eps 0.02, speed 534.96 f/s
199369: done 2948 games, mean reward 0.070, eps 0.02, speed 522.22 f/s
199422: done 2949 games, mean reward 0.070, eps 0.02, speed 416.14 f/s
199472: done 2950 games, mean reward 0.070, eps 0.02, speed 505.66 f/s
199522: done 2951 games, mean reward 0.070, eps 0.02, speed 525.08 f/s
199573: done 2952 games, mean reward 0.070, eps 0.02, speed 516.97 f/s
199623: done 2953 games, mean reward 0.070, eps 0.02, speed 518.19 f/s
199674: done 2954 games, mean reward 0.070, eps 0.02, speed 521.74 f/s
199727: done 2955 games, mean reward 0.070, eps 0.02, speed 557.09 f/s
199777: done 2956 games, mean reward 0.070, eps 0.02, speed 529.21 f/s
199827: done 2957 games, mean reward 0.070, eps 0.02, speed 530.38 f/s
199878

205172: done 3062 games, mean reward 0.090, eps 0.02, speed 522.22 f/s
205223: done 3063 games, mean reward 0.090, eps 0.02, speed 507.54 f/s
205274: done 3064 games, mean reward 0.090, eps 0.02, speed 480.38 f/s
205324: done 3065 games, mean reward 0.090, eps 0.02, speed 298.29 f/s
205376: done 3066 games, mean reward 0.090, eps 0.02, speed 513.18 f/s
205427: done 3067 games, mean reward 0.090, eps 0.02, speed 543.87 f/s
205477: done 3068 games, mean reward 0.090, eps 0.02, speed 515.07 f/s
205529: done 3069 games, mean reward 0.090, eps 0.02, speed 485.63 f/s
205580: done 3070 games, mean reward 0.090, eps 0.02, speed 489.23 f/s
205631: done 3071 games, mean reward 0.090, eps 0.02, speed 480.34 f/s
205682: done 3072 games, mean reward 0.080, eps 0.02, speed 510.96 f/s
205733: done 3073 games, mean reward 0.090, eps 0.02, speed 527.79 f/s
205784: done 3074 games, mean reward 0.090, eps 0.02, speed 511.25 f/s
205836: done 3075 games, mean reward 0.090, eps 0.02, speed 537.00 f/s
205886

211149: done 3179 games, mean reward 0.060, eps 0.02, speed 525.59 f/s
211199: done 3180 games, mean reward 0.060, eps 0.02, speed 500.66 f/s
211250: done 3181 games, mean reward 0.060, eps 0.02, speed 537.74 f/s
211301: done 3182 games, mean reward 0.060, eps 0.02, speed 534.21 f/s
211354: done 3183 games, mean reward 0.060, eps 0.02, speed 558.34 f/s
211405: done 3184 games, mean reward 0.060, eps 0.02, speed 488.45 f/s
211458: done 3185 games, mean reward 0.060, eps 0.02, speed 513.08 f/s
211508: done 3186 games, mean reward 0.060, eps 0.02, speed 539.20 f/s
211559: done 3187 games, mean reward 0.060, eps 0.02, speed 555.72 f/s
211609: done 3188 games, mean reward 0.060, eps 0.02, speed 538.35 f/s
211659: done 3189 games, mean reward 0.060, eps 0.02, speed 543.62 f/s
211709: done 3190 games, mean reward 0.060, eps 0.02, speed 543.33 f/s
211762: done 3191 games, mean reward 0.070, eps 0.02, speed 536.76 f/s
211813: done 3192 games, mean reward 0.070, eps 0.02, speed 551.12 f/s
211864

217083: done 3295 games, mean reward 0.150, eps 0.02, speed 523.29 f/s
217133: done 3296 games, mean reward 0.150, eps 0.02, speed 496.47 f/s
217185: done 3297 games, mean reward 0.150, eps 0.02, speed 543.37 f/s
217236: done 3298 games, mean reward 0.150, eps 0.02, speed 533.15 f/s
217287: done 3299 games, mean reward 0.150, eps 0.02, speed 507.34 f/s
217337: done 3300 games, mean reward 0.150, eps 0.02, speed 500.69 f/s
217390: done 3301 games, mean reward 0.150, eps 0.02, speed 529.51 f/s
217442: done 3302 games, mean reward 0.150, eps 0.02, speed 559.76 f/s
217493: done 3303 games, mean reward 0.150, eps 0.02, speed 552.77 f/s
217545: done 3304 games, mean reward 0.150, eps 0.02, speed 527.57 f/s
217597: done 3305 games, mean reward 0.150, eps 0.02, speed 561.11 f/s
217647: done 3306 games, mean reward 0.150, eps 0.02, speed 495.31 f/s
217699: done 3307 games, mean reward 0.140, eps 0.02, speed 524.31 f/s
217749: done 3308 games, mean reward 0.140, eps 0.02, speed 525.47 f/s
217799

223043: done 3412 games, mean reward 0.060, eps 0.02, speed 376.14 f/s
223094: done 3413 games, mean reward 0.060, eps 0.02, speed 539.39 f/s
223145: done 3414 games, mean reward 0.060, eps 0.02, speed 540.35 f/s
223196: done 3415 games, mean reward 0.060, eps 0.02, speed 507.86 f/s
223246: done 3416 games, mean reward 0.060, eps 0.02, speed 490.10 f/s
223296: done 3417 games, mean reward 0.060, eps 0.02, speed 522.42 f/s
223348: done 3418 games, mean reward 0.060, eps 0.02, speed 522.62 f/s
223399: done 3419 games, mean reward 0.060, eps 0.02, speed 527.60 f/s
223450: done 3420 games, mean reward 0.060, eps 0.02, speed 539.89 f/s
223501: done 3421 games, mean reward 0.070, eps 0.02, speed 516.69 f/s
223554: done 3422 games, mean reward 0.070, eps 0.02, speed 481.08 f/s
223607: done 3423 games, mean reward 0.070, eps 0.02, speed 524.02 f/s
223657: done 3424 games, mean reward 0.070, eps 0.02, speed 526.05 f/s
223710: done 3425 games, mean reward 0.070, eps 0.02, speed 559.06 f/s
223760

228959: done 3528 games, mean reward 0.050, eps 0.02, speed 544.12 f/s
229011: done 3529 games, mean reward 0.050, eps 0.02, speed 547.44 f/s
229063: done 3530 games, mean reward 0.040, eps 0.02, speed 514.04 f/s
229113: done 3531 games, mean reward 0.040, eps 0.02, speed 499.97 f/s
229166: done 3532 games, mean reward 0.040, eps 0.02, speed 527.98 f/s
229217: done 3533 games, mean reward 0.040, eps 0.02, speed 512.40 f/s
229274: done 3534 games, mean reward 0.040, eps 0.02, speed 587.84 f/s
229324: done 3535 games, mean reward 0.040, eps 0.02, speed 500.58 f/s
229375: done 3536 games, mean reward 0.040, eps 0.02, speed 539.40 f/s
229427: done 3537 games, mean reward 0.050, eps 0.02, speed 547.03 f/s
229479: done 3538 games, mean reward 0.060, eps 0.02, speed 549.96 f/s
229530: done 3539 games, mean reward 0.060, eps 0.02, speed 510.42 f/s
229580: done 3540 games, mean reward 0.060, eps 0.02, speed 501.84 f/s
229631: done 3541 games, mean reward 0.060, eps 0.02, speed 508.41 f/s
229682

234999: done 3646 games, mean reward 0.080, eps 0.02, speed 518.96 f/s
235049: done 3647 games, mean reward 0.080, eps 0.02, speed 531.35 f/s
235099: done 3648 games, mean reward 0.080, eps 0.02, speed 531.54 f/s
235149: done 3649 games, mean reward 0.080, eps 0.02, speed 495.55 f/s
235200: done 3650 games, mean reward 0.060, eps 0.02, speed 536.53 f/s
235251: done 3651 games, mean reward 0.060, eps 0.02, speed 540.74 f/s
