In [None]:
import os, random
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
from tqdm import trange
import ale_py

class DQNBreakout(gym.Wrapper):
    def __init__(self, render_mode='human', repeat=4, device='cpu'):
        env = gym.make("ALE/Breakout-v5", render_mode=render_mode)
        super().__init__(env)
        self.repeat = repeat
        self.device = device
        self.frame_buffer = []

    def reset(self):
        obs, info = self.env.reset()
        self.frame_buffer = [obs]
        tensor_state = (
            torch.from_numpy(obs)
                 .permute(2, 0, 1)
                 .unsqueeze(0)
                 .to(self.device)
                 .float()
                 .div(255.0)
        )
        return tensor_state, info

    def step(self, action):
        total_reward = 0
        terminated = False
        truncated = False
        for _ in range(self.repeat):
            obs, reward, terminated, truncated, info = self.env.step(action)
            total_reward += reward
            self.frame_buffer.append(obs)
            if terminated or truncated:
                break
        max_frame = np.max(self.frame_buffer[-2:], axis=0) # Max-pooling över frames
        tensor_frame = (
            torch.from_numpy(max_frame)
                 .permute(2, 0, 1)
                 .unsqueeze(0)
                 .to(self.device)
                 .float()
                 .div(255.0)
        )
        return tensor_frame, total_reward, terminated or truncated, info

    def render(self):
        return self.env.render()

class SimpleDQN(nn.Module):
    def __init__(self, input_shape, n_actions):
        super().__init__()
        c, h, w = input_shape
        self.conv = nn.Sequential(
            nn.Conv2d(c, 16, kernel_size=8, stride=4), 
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=4, stride=2), 
            nn.ReLU(),
        )
        def conv_out(size, kernel, stride):
            return (size - (kernel-1) -1) // stride + 1
        h1 = conv_out(h, 8, 4); 
        h2 = conv_out(h1, 4, 2)
        w1 = conv_out(w, 8, 4); 
        w2 = conv_out(w1, 4, 2)
        lin_input = h2 * w2 * 32
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(lin_input, 256), nn.ReLU(),
            nn.Linear(256, n_actions) # Ger Q(framtida belöning) värde för varje möjlig action [Q(s,a1), Q(s,a2)...]
        )

    def forward(self, x):
        x = self.conv(x)
        return self.fc(x)

class ReplayBuffer:
    def __init__(self, capacity):
        self.buf = deque(maxlen=capacity)
    def push(self, s,a,r,s2,d): #(nuvarande state, action, rward, nästa state givet a, done)
        self.buf.append((s,a,r,s2,d))
    def sample(self, batch_size):
        batch = random.sample(self.buf, batch_size) # Random batch -> stabil lärning / undvika lokala minimum ?
        states,actions, rewards, next_states, dones = zip(*batch)
        return (torch.cat(states), 
                torch.tensor(actions, device=device),
                torch.tensor(rewards, device=device, dtype=torch.float),
                torch.cat(next_states),
                torch.tensor(dones, device=device, dtype=torch.uint8))
    def __len__(self):
        return len(self.buf)

def select_action(net, state, eps, n_actions): # epsilon greedy
    if random.random() < eps:
        return random.randrange(n_actions) # här väljs en random action <- exploration
    with torch.no_grad():
        q = net(state)
        return q.argmax(dim=1).item() # exploitation


# Träning 

if __name__ == "__main__":
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    env = DQNBreakout(render_mode='human', device=device)
    state, _ = env.reset()
    _, c, h, w = state.shape
    n_actions = env.action_space.n

    policy_net = SimpleDQN((c, h, w), n_actions).to(device)
    target_net = SimpleDQN((c, h, w), n_actions).to(device)
    target_net.load_state_dict(policy_net.state_dict())

    optimizer = optim.Adam(policy_net.parameters(), lr=1e-4)
    buffer = ReplayBuffer(capacity=100_000)

    # Kortare träning: 5 000 steg
    num_steps = 58_000
    batch_size = 32
    gamma = 0.99
    target_update = 1_000

    episode_reward = 0
    best_reward = -float('inf')
    for step in trange(num_steps, desc="Training DQN"):
        # e-greedy
        eps = max(0.01, 1 - step / (num_steps * 0.5))
        action = select_action(policy_net, state, eps, n_actions)

        # step
        next_state, reward, done, _ = env.step(action)
        episode_reward += reward
        buffer.push(state, action, reward, next_state, done)
        state = next_state

        # c) träning
        if len(buffer) >= batch_size:
            states, actions, rewards, next_states, dones = buffer.sample(batch_size)
            q_vals = policy_net(states).gather(1, actions.unsqueeze(1)).squeeze(1)
            with torch.no_grad():
                next_q = target_net(next_states).max(1)[0]
                target = rewards + gamma * next_q * (1 - dones) #Bellman
            loss = nn.MSELoss()(q_vals, target)
            optimizer.zero_grad();
            loss.backward(); 
            optimizer.step()

        
        if step % target_update == 0:
            target_net.load_state_dict(policy_net.state_dict())

        if done:           
            print(f"\nEpisode slut – belöning: {episode_reward:.1f}")
            if episode_reward > best_reward:
                best_reward = episode_reward
                torch.save(policy_net.state_dict(), "best_model.pth")
            state, _ = env.reset()
            episode_reward = 0

    print("Träning klar!")


Training DQN:   0%|          | 39/58000 [00:19<8:51:06,  1.82it/s] 


Episode slut – belöning: 0.0


Training DQN:   0%|          | 89/58000 [00:53<9:25:46,  1.71it/s] 


Episode slut – belöning: 1.0


Training DQN:   0%|          | 142/58000 [01:27<8:12:08,  1.96it/s] 


Episode slut – belöning: 1.0


Training DQN:   0%|          | 181/58000 [01:52<8:30:37,  1.89it/s] 


Episode slut – belöning: 0.0


Training DQN:   0%|          | 221/58000 [02:18<8:25:30,  1.90it/s] 


Episode slut – belöning: 0.0


Training DQN:   0%|          | 274/58000 [02:52<8:19:29,  1.93it/s] 


Episode slut – belöning: 0.0


Training DQN:   1%|          | 335/58000 [03:31<11:40:40,  1.37it/s]


Episode slut – belöning: 2.0


Training DQN:   1%|          | 407/58000 [04:14<6:08:31,  2.60it/s] 


Episode slut – belöning: 3.0


Training DQN:   1%|          | 470/58000 [04:35<5:01:00,  3.19it/s]


Episode slut – belöning: 1.0


Training DQN:   1%|          | 538/58000 [04:59<4:31:28,  3.53it/s]


Episode slut – belöning: 3.0


Training DQN:   1%|          | 614/58000 [05:24<4:21:10,  3.66it/s]


Episode slut – belöning: 3.0


Training DQN:   1%|          | 673/58000 [05:44<4:32:15,  3.51it/s]


Episode slut – belöning: 1.0


Training DQN:   1%|          | 714/58000 [05:57<4:26:31,  3.58it/s]


Episode slut – belöning: 0.0


Training DQN:   1%|▏         | 765/58000 [06:14<4:47:34,  3.32it/s]


Episode slut – belöning: 1.0


Training DQN:   1%|▏         | 851/58000 [06:43<5:53:31,  2.69it/s]


Episode slut – belöning: 5.0


Training DQN:   2%|▏         | 907/58000 [07:02<4:33:45,  3.48it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 951/58000 [07:16<4:22:18,  3.62it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 1011/58000 [07:36<4:23:55,  3.60it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 1062/58000 [07:52<4:39:09,  3.40it/s]


Episode slut – belöning: 2.0


Training DQN:   2%|▏         | 1117/58000 [08:10<4:19:15,  3.66it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 1186/58000 [08:32<5:12:37,  3.03it/s]


Episode slut – belöning: 3.0


Training DQN:   2%|▏         | 1240/58000 [08:50<4:52:08,  3.24it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 1298/58000 [09:09<4:27:16,  3.54it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 1343/58000 [09:24<4:15:13,  3.70it/s]


Episode slut – belöning: 0.0


Training DQN:   2%|▏         | 1392/58000 [09:40<4:25:04,  3.56it/s]


Episode slut – belöning: 1.0


Training DQN:   2%|▏         | 1442/58000 [09:56<4:21:22,  3.61it/s]


Episode slut – belöning: 0.0


Training DQN:   3%|▎         | 1512/58000 [10:19<4:27:01,  3.53it/s]


Episode slut – belöning: 3.0


Training DQN:   3%|▎         | 1559/58000 [10:35<4:24:53,  3.55it/s]


Episode slut – belöning: 1.0


Training DQN:   3%|▎         | 1592/58000 [10:45<4:28:24,  3.50it/s]


Episode slut – belöning: 0.0


Training DQN:   3%|▎         | 1639/58000 [11:01<4:25:38,  3.54it/s]


Episode slut – belöning: 1.0


Training DQN:   3%|▎         | 1709/58000 [11:24<5:01:44,  3.11it/s]


Episode slut – belöning: 3.0


Training DQN:   3%|▎         | 1760/58000 [11:40<4:24:15,  3.55it/s]


Episode slut – belöning: 1.0


Training DQN:   3%|▎         | 1819/58000 [12:00<5:14:43,  2.98it/s]


Episode slut – belöning: 2.0


Training DQN:   3%|▎         | 1915/58000 [12:31<4:19:54,  3.60it/s]


Episode slut – belöning: 4.0


Training DQN:   3%|▎         | 2003/58000 [13:00<4:31:20,  3.44it/s]


Episode slut – belöning: 4.0


Training DQN:   4%|▎         | 2066/58000 [13:21<4:14:09,  3.67it/s]


Episode slut – belöning: 1.0


Training DQN:   4%|▎         | 2112/58000 [13:36<4:15:04,  3.65it/s]


Episode slut – belöning: 1.0


Training DQN:   4%|▎         | 2172/58000 [13:55<4:23:55,  3.53it/s]


Episode slut – belöning: 1.0


Training DQN:   4%|▍         | 2221/58000 [14:11<4:24:49,  3.51it/s]


Episode slut – belöning: 1.0


Training DQN:   4%|▍         | 2297/58000 [14:37<4:25:07,  3.50it/s]


Episode slut – belöning: 1.0


Training DQN:   4%|▍         | 2357/58000 [14:56<4:26:00,  3.49it/s]


Episode slut – belöning: 2.0


Training DQN:   4%|▍         | 2438/58000 [15:23<4:17:27,  3.60it/s]


Episode slut – belöning: 3.0


Training DQN:   4%|▍         | 2495/58000 [15:42<4:58:07,  3.10it/s]


Episode slut – belöning: 2.0


Training DQN:   4%|▍         | 2537/58000 [15:56<4:22:13,  3.53it/s]


Episode slut – belöning: 1.0


Training DQN:   4%|▍         | 2588/58000 [16:12<4:23:27,  3.51it/s]


Episode slut – belöning: 1.0


Training DQN:   5%|▍         | 2664/58000 [16:37<4:12:40,  3.65it/s]


Episode slut – belöning: 3.0


Training DQN:   5%|▍         | 2709/58000 [16:52<4:51:15,  3.16it/s]


Episode slut – belöning: 1.0


Training DQN:   5%|▍         | 2791/58000 [17:19<4:14:02,  3.62it/s]


Episode slut – belöning: 2.0


Training DQN:   5%|▍         | 2842/58000 [17:35<4:16:10,  3.59it/s]


Episode slut – belöning: 2.0


Training DQN:   5%|▌         | 2946/58000 [18:10<5:11:47,  2.94it/s]


Episode slut – belöning: 7.0


Training DQN:   5%|▌         | 2998/58000 [18:27<4:15:29,  3.59it/s]


Episode slut – belöning: 0.0


Training DQN:   5%|▌         | 3053/58000 [18:45<4:46:57,  3.19it/s]


Episode slut – belöning: 1.0


Training DQN:   5%|▌         | 3155/58000 [19:18<4:17:23,  3.55it/s]


Episode slut – belöning: 3.0


Training DQN:   6%|▌         | 3246/58000 [19:49<4:11:41,  3.63it/s]


Episode slut – belöning: 4.0


Training DQN:   6%|▌         | 3334/58000 [20:18<4:16:09,  3.56it/s]


Episode slut – belöning: 4.0


Training DQN:   6%|▌         | 3390/58000 [20:36<4:12:10,  3.61it/s]


Episode slut – belöning: 1.0


Training DQN:   6%|▌         | 3449/58000 [20:55<4:46:29,  3.17it/s]


Episode slut – belöning: 2.0


Training DQN:   6%|▌         | 3506/58000 [21:14<4:21:08,  3.48it/s]


Episode slut – belöning: 1.0


Training DQN:   6%|▌         | 3578/58000 [21:38<4:10:20,  3.62it/s]


Episode slut – belöning: 2.0


Training DQN:   6%|▌         | 3617/58000 [21:51<4:10:35,  3.62it/s]


Episode slut – belöning: 0.0


Training DQN:   6%|▋         | 3674/58000 [22:09<4:09:25,  3.63it/s]


Episode slut – belöning: 0.0


Training DQN:   6%|▋         | 3735/58000 [22:30<5:08:08,  2.94it/s]


Episode slut – belöning: 2.0


Training DQN:   7%|▋         | 3781/58000 [22:45<4:10:27,  3.61it/s]


Episode slut – belöning: 0.0


Training DQN:   7%|▋         | 3863/58000 [23:12<4:14:09,  3.55it/s]


Episode slut – belöning: 3.0


Training DQN:   7%|▋         | 3922/58000 [23:31<4:08:03,  3.63it/s]


Episode slut – belöning: 0.0


Training DQN:   7%|▋         | 3957/58000 [23:43<4:13:22,  3.55it/s]


Episode slut – belöning: 0.0


Training DQN:   7%|▋         | 4008/58000 [24:00<5:03:48,  2.96it/s]


Episode slut – belöning: 1.0


Training DQN:   7%|▋         | 4062/58000 [24:17<4:12:22,  3.56it/s]


Episode slut – belöning: 1.0


Training DQN:   7%|▋         | 4126/58000 [24:38<4:05:45,  3.65it/s]


Episode slut – belöning: 2.0


Training DQN:   7%|▋         | 4171/58000 [24:53<4:09:49,  3.59it/s]


Episode slut – belöning: 1.0


Training DQN:   7%|▋         | 4229/58000 [25:12<4:15:17,  3.51it/s]


Episode slut – belöning: 1.0


Training DQN:   7%|▋         | 4294/58000 [25:34<4:52:33,  3.06it/s]


Episode slut – belöning: 2.0


Training DQN:   8%|▊         | 4365/58000 [25:58<4:45:53,  3.13it/s]


Episode slut – belöning: 2.0


Training DQN:   8%|▊         | 4427/58000 [26:18<4:13:56,  3.52it/s]


Episode slut – belöning: 3.0


Training DQN:   8%|▊         | 4498/58000 [26:42<5:20:44,  2.78it/s]


Episode slut – belöning: 2.0


Training DQN:   8%|▊         | 4560/58000 [27:03<4:11:26,  3.54it/s]


Episode slut – belöning: 1.0


Training DQN:   8%|▊         | 4607/58000 [27:19<4:21:04,  3.41it/s]


Episode slut – belöning: 0.0


Training DQN:   8%|▊         | 4693/58000 [27:49<5:05:01,  2.91it/s]


Episode slut – belöning: 8.0


Training DQN:   8%|▊         | 4747/58000 [28:07<4:07:29,  3.59it/s]


Episode slut – belöning: 1.0


Training DQN:   8%|▊         | 4823/58000 [28:32<4:09:29,  3.55it/s]


Episode slut – belöning: 2.0


Training DQN:   8%|▊         | 4897/58000 [28:57<4:12:22,  3.51it/s]


Episode slut – belöning: 3.0


Training DQN:   9%|▊         | 4941/58000 [29:11<4:11:36,  3.51it/s]


Episode slut – belöning: 0.0


Training DQN:   9%|▊         | 5009/58000 [29:34<4:07:50,  3.56it/s]


Episode slut – belöning: 1.0


Training DQN:   9%|▉         | 5075/58000 [29:56<4:32:45,  3.23it/s]


Episode slut – belöning: 2.0


Training DQN:   9%|▉         | 5157/58000 [30:23<4:49:22,  3.04it/s]


Episode slut – belöning: 5.0


Training DQN:   9%|▉         | 5227/58000 [30:47<4:04:58,  3.59it/s]


Episode slut – belöning: 3.0


Training DQN:   9%|▉         | 5292/58000 [31:08<4:08:30,  3.54it/s]


Episode slut – belöning: 3.0


Training DQN:   9%|▉         | 5338/58000 [31:23<4:11:08,  3.49it/s]


Episode slut – belöning: 0.0


Training DQN:   9%|▉         | 5387/58000 [31:39<4:05:23,  3.57it/s]


Episode slut – belöning: 1.0


Training DQN:   9%|▉         | 5466/58000 [32:05<4:51:39,  3.00it/s]


Episode slut – belöning: 4.0


Training DQN:  10%|▉         | 5519/58000 [32:23<4:08:20,  3.52it/s]


Episode slut – belöning: 1.0


Training DQN:  10%|▉         | 5576/58000 [32:42<4:06:42,  3.54it/s]


Episode slut – belöning: 2.0


Training DQN:  10%|▉         | 5613/58000 [32:54<3:58:29,  3.66it/s]


Episode slut – belöning: 0.0


Training DQN:  10%|▉         | 5671/58000 [33:13<4:38:09,  3.14it/s]


Episode slut – belöning: 2.0


Training DQN:  10%|▉         | 5795/58000 [33:56<4:02:57,  3.58it/s]


Episode slut – belöning: 8.0


Training DQN:  10%|█         | 5860/58000 [34:18<4:16:06,  3.39it/s]


Episode slut – belöning: 2.0


Training DQN:  10%|█         | 5911/58000 [34:36<5:05:52,  2.84it/s]


Episode slut – belöning: 2.0


Training DQN:  10%|█         | 5968/58000 [34:56<4:00:21,  3.61it/s]


Episode slut – belöning: 1.0


Training DQN:  10%|█         | 6034/58000 [35:18<4:29:46,  3.21it/s]


Episode slut – belöning: 3.0


Training DQN:  11%|█         | 6098/58000 [35:39<4:01:17,  3.58it/s]


Episode slut – belöning: 1.0


Training DQN:  11%|█         | 6227/58000 [36:22<4:02:58,  3.55it/s]


Episode slut – belöning: 11.0


Training DQN:  11%|█         | 6326/58000 [36:54<3:58:42,  3.61it/s]


Episode slut – belöning: 4.0


Training DQN:  11%|█         | 6376/58000 [37:11<3:54:25,  3.67it/s]


Episode slut – belöning: 1.0


Training DQN:  11%|█         | 6446/58000 [37:34<3:57:39,  3.62it/s]


Episode slut – belöning: 3.0


Training DQN:  11%|█▏        | 6530/58000 [38:01<4:27:14,  3.21it/s]


Episode slut – belöning: 5.0


Training DQN:  11%|█▏        | 6572/58000 [38:15<4:17:24,  3.33it/s]


Episode slut – belöning: 1.0


Training DQN:  11%|█▏        | 6632/58000 [38:35<4:48:12,  2.97it/s]


Episode slut – belöning: 2.0


Training DQN:  12%|█▏        | 6702/58000 [38:58<3:53:47,  3.66it/s]


Episode slut – belöning: 3.0


Training DQN:  12%|█▏        | 6846/58000 [39:45<4:44:52,  2.99it/s]


Episode slut – belöning: 5.0


Training DQN:  12%|█▏        | 6882/58000 [39:57<3:58:13,  3.58it/s]


Episode slut – belöning: 0.0


Training DQN:  12%|█▏        | 6942/58000 [40:17<4:23:07,  3.23it/s]


Episode slut – belöning: 2.0


Training DQN:  12%|█▏        | 7012/58000 [40:40<4:13:17,  3.36it/s]


Episode slut – belöning: 4.0


Training DQN:  12%|█▏        | 7060/58000 [40:57<4:47:32,  2.95it/s]


Episode slut – belöning: 2.0


Training DQN:  12%|█▏        | 7098/58000 [41:10<3:54:46,  3.61it/s]


Episode slut – belöning: 0.0


Training DQN:  12%|█▏        | 7163/58000 [41:33<4:26:24,  3.18it/s]


Episode slut – belöning: 3.0


Training DQN:  13%|█▎        | 7254/58000 [42:04<4:35:36,  3.07it/s]


Episode slut – belöning: 8.0


Training DQN:  13%|█▎        | 7305/58000 [42:21<4:40:52,  3.01it/s]


Episode slut – belöning: 1.0


Training DQN:  13%|█▎        | 7357/58000 [42:38<4:30:34,  3.12it/s]


Episode slut – belöning: 2.0


Training DQN:  13%|█▎        | 7458/58000 [43:12<3:52:03,  3.63it/s]


Episode slut – belöning: 7.0


Training DQN:  13%|█▎        | 7533/58000 [43:36<4:08:14,  3.39it/s]


Episode slut – belöning: 4.0


Training DQN:  13%|█▎        | 7613/58000 [44:03<4:23:29,  3.19it/s]


Episode slut – belöning: 4.0


Training DQN:  13%|█▎        | 7675/58000 [44:24<4:27:24,  3.14it/s]


Episode slut – belöning: 3.0


Training DQN:  13%|█▎        | 7751/58000 [44:50<4:14:53,  3.29it/s]


Episode slut – belöning: 8.0


Training DQN:  13%|█▎        | 7808/58000 [45:10<3:55:53,  3.55it/s]


Episode slut – belöning: 2.0


Training DQN:  14%|█▎        | 7905/58000 [45:42<4:35:40,  3.03it/s]


Episode slut – belöning: 8.0


Training DQN:  14%|█▍        | 7999/58000 [46:15<3:56:40,  3.52it/s]


Episode slut – belöning: 5.0


Training DQN:  14%|█▍        | 8068/58000 [46:39<4:26:01,  3.13it/s]


Episode slut – belöning: 4.0


Training DQN:  14%|█▍        | 8158/58000 [47:10<4:20:18,  3.19it/s]


Episode slut – belöning: 6.0


Training DQN:  14%|█▍        | 8205/58000 [47:26<3:53:44,  3.55it/s]


Episode slut – belöning: 1.0


Training DQN:  14%|█▍        | 8274/58000 [47:49<3:57:56,  3.48it/s]


Episode slut – belöning: 3.0


Training DQN:  14%|█▍        | 8368/58000 [48:20<4:01:11,  3.43it/s]


Episode slut – belöning: 5.0


Training DQN:  15%|█▍        | 8459/58000 [48:50<3:51:30,  3.57it/s]


Episode slut – belöning: 9.0


Training DQN:  15%|█▍        | 8542/58000 [49:19<4:22:57,  3.13it/s]


Episode slut – belöning: 5.0


Training DQN:  15%|█▍        | 8596/58000 [49:37<4:36:39,  2.98it/s]


Episode slut – belöning: 3.0


Training DQN:  15%|█▍        | 8643/58000 [49:53<4:03:11,  3.38it/s]


Episode slut – belöning: 1.0


Training DQN:  15%|█▍        | 8695/58000 [50:10<4:19:08,  3.17it/s]


Episode slut – belöning: 2.0


Training DQN:  15%|█▌        | 8746/58000 [50:28<3:47:50,  3.60it/s]


Episode slut – belöning: 2.0


Training DQN:  15%|█▌        | 8839/58000 [50:59<3:47:14,  3.61it/s]


Episode slut – belöning: 9.0


Training DQN:  15%|█▌        | 8923/58000 [51:27<3:54:13,  3.49it/s]


Episode slut – belöning: 5.0


Training DQN:  15%|█▌        | 8978/58000 [51:45<3:53:01,  3.51it/s]


Episode slut – belöning: 2.0


Training DQN:  16%|█▌        | 9047/58000 [52:08<3:51:29,  3.52it/s]


Episode slut – belöning: 4.0


Training DQN:  16%|█▌        | 9110/58000 [52:29<4:32:46,  2.99it/s]


Episode slut – belöning: 3.0


Training DQN:  16%|█▌        | 9161/58000 [52:46<3:49:49,  3.54it/s]


Episode slut – belöning: 2.0


Training DQN:  16%|█▌        | 9204/58000 [53:00<3:47:50,  3.57it/s]


Episode slut – belöning: 1.0


Training DQN:  16%|█▌        | 9281/58000 [53:25<3:53:14,  3.48it/s]


Episode slut – belöning: 4.0


Training DQN:  16%|█▌        | 9344/58000 [53:46<3:55:04,  3.45it/s]


Episode slut – belöning: 3.0


Training DQN:  16%|█▌        | 9418/58000 [54:11<3:49:15,  3.53it/s]


Episode slut – belöning: 4.0


Training DQN:  16%|█▋        | 9490/58000 [54:35<4:18:41,  3.13it/s]


Episode slut – belöning: 3.0


Training DQN:  16%|█▋        | 9561/58000 [54:59<3:56:17,  3.42it/s]


Episode slut – belöning: 4.0


Training DQN:  17%|█▋        | 9637/58000 [55:24<4:00:53,  3.35it/s]


Episode slut – belöning: 3.0


Training DQN:  17%|█▋        | 9698/58000 [55:44<3:41:18,  3.64it/s]


Episode slut – belöning: 3.0


Training DQN:  17%|█▋        | 9745/58000 [55:59<3:44:59,  3.57it/s]


Episode slut – belöning: 1.0


Training DQN:  17%|█▋        | 9814/58000 [56:22<4:33:19,  2.94it/s]


Episode slut – belöning: 4.0


Training DQN:  17%|█▋        | 9872/58000 [56:42<4:33:04,  2.94it/s]


Episode slut – belöning: 3.0


Training DQN:  17%|█▋        | 9932/58000 [57:02<3:44:21,  3.57it/s]


Episode slut – belöning: 2.0


Training DQN:  17%|█▋        | 9973/58000 [57:16<4:03:46,  3.28it/s]


Episode slut – belöning: 1.0


Training DQN:  17%|█▋        | 10057/58000 [57:44<3:45:39,  3.54it/s]


Episode slut – belöning: 4.0


Training DQN:  17%|█▋        | 10125/58000 [58:07<4:14:11,  3.14it/s]


Episode slut – belöning: 3.0


Training DQN:  18%|█▊        | 10192/58000 [58:30<3:57:19,  3.36it/s]


Episode slut – belöning: 4.0


Training DQN:  18%|█▊        | 10246/58000 [58:48<3:46:37,  3.51it/s]


Episode slut – belöning: 2.0


Training DQN:  18%|█▊        | 10292/58000 [59:03<4:36:31,  2.88it/s]


Episode slut – belöning: 2.0


Training DQN:  18%|█▊        | 10324/58000 [59:14<3:58:02,  3.34it/s]


Episode slut – belöning: 0.0


Training DQN:  18%|█▊        | 10392/58000 [59:37<4:31:32,  2.92it/s]


Episode slut – belöning: 4.0


Training DQN:  18%|█▊        | 10487/58000 [1:00:09<3:57:27,  3.33it/s]


Episode slut – belöning: 6.0


Training DQN:  18%|█▊        | 10543/58000 [1:00:28<3:47:06,  3.48it/s]


Episode slut – belöning: 2.0


Training DQN:  18%|█▊        | 10599/58000 [1:00:46<3:46:11,  3.49it/s]


Episode slut – belöning: 2.0


Training DQN:  18%|█▊        | 10650/58000 [1:01:04<3:41:40,  3.56it/s]


Episode slut – belöning: 1.0


Training DQN:  18%|█▊        | 10701/58000 [1:01:21<3:39:19,  3.59it/s]


Episode slut – belöning: 2.0


Training DQN:  19%|█▊        | 10770/58000 [1:01:44<4:01:36,  3.26it/s]


Episode slut – belöning: 3.0


Training DQN:  19%|█▊        | 10817/58000 [1:02:00<4:14:42,  3.09it/s]


Episode slut – belöning: 2.0


Training DQN:  19%|█▉        | 10909/58000 [1:02:31<3:49:21,  3.42it/s]


Episode slut – belöning: 5.0


Training DQN:  19%|█▉        | 10962/58000 [1:02:49<3:43:09,  3.51it/s]


Episode slut – belöning: 2.0


Training DQN:  19%|█▉        | 11012/58000 [1:03:05<3:45:03,  3.48it/s]


Episode slut – belöning: 1.0


Training DQN:  19%|█▉        | 11057/58000 [1:03:20<3:57:42,  3.29it/s]


Episode slut – belöning: 1.0


Training DQN:  19%|█▉        | 11117/58000 [1:03:41<3:52:43,  3.36it/s]


Episode slut – belöning: 3.0


Training DQN:  19%|█▉        | 11209/58000 [1:04:13<4:09:28,  3.13it/s]


Episode slut – belöning: 6.0


Training DQN:  19%|█▉        | 11260/58000 [1:04:30<3:41:02,  3.52it/s]


Episode slut – belöning: 2.0


Training DQN:  20%|█▉        | 11358/58000 [1:05:03<4:08:08,  3.13it/s]


Episode slut – belöning: 7.0


Training DQN:  20%|█▉        | 11396/58000 [1:05:16<3:57:51,  3.27it/s]


Episode slut – belöning: 1.0


Training DQN:  20%|█▉        | 11439/58000 [1:05:30<3:44:25,  3.46it/s]


Episode slut – belöning: 1.0


Training DQN:  20%|█▉        | 11535/58000 [1:06:02<3:53:37,  3.31it/s]


Episode slut – belöning: 6.0


Training DQN:  20%|██        | 11600/58000 [1:06:24<3:36:46,  3.57it/s]


Episode slut – belöning: 3.0


Training DQN:  20%|██        | 11683/58000 [1:06:52<4:07:51,  3.11it/s]


Episode slut – belöning: 5.0


Training DQN:  20%|██        | 11733/58000 [1:07:09<3:56:22,  3.26it/s]


Episode slut – belöning: 2.0


Training DQN:  20%|██        | 11829/58000 [1:07:42<4:20:24,  2.96it/s]


Episode slut – belöning: 5.0


Training DQN:  20%|██        | 11881/58000 [1:07:59<3:36:33,  3.55it/s]


Episode slut – belöning: 2.0


Training DQN:  21%|██        | 11951/58000 [1:08:22<3:37:22,  3.53it/s]


Episode slut – belöning: 4.0


Training DQN:  21%|██        | 12044/58000 [1:08:54<4:19:55,  2.95it/s]


Episode slut – belöning: 6.0


Training DQN:  21%|██        | 12118/58000 [1:09:18<3:37:16,  3.52it/s]


Episode slut – belöning: 4.0


Training DQN:  21%|██        | 12212/58000 [1:09:49<3:46:45,  3.37it/s]


Episode slut – belöning: 9.0


Training DQN:  21%|██        | 12281/58000 [1:10:12<3:51:57,  3.29it/s]


Episode slut – belöning: 4.0


Training DQN:  21%|██▏       | 12347/58000 [1:10:35<3:58:15,  3.19it/s]


Episode slut – belöning: 3.0


Training DQN:  21%|██▏       | 12435/58000 [1:11:04<3:46:49,  3.35it/s]


Episode slut – belöning: 6.0


Training DQN:  22%|██▏       | 12483/58000 [1:11:21<3:33:17,  3.56it/s]


Episode slut – belöning: 1.0


Training DQN:  22%|██▏       | 12542/58000 [1:11:40<4:34:22,  2.76it/s]


Episode slut – belöning: 3.0


Training DQN:  22%|██▏       | 12632/58000 [1:12:10<3:41:35,  3.41it/s]


Episode slut – belöning: 6.0


Training DQN:  22%|██▏       | 12718/58000 [1:12:38<4:14:38,  2.96it/s]


Episode slut – belöning: 6.0


Training DQN:  22%|██▏       | 12823/58000 [1:13:13<3:27:10,  3.63it/s]


Episode slut – belöning: 3.0


Training DQN:  22%|██▏       | 12895/58000 [1:13:36<3:48:00,  3.30it/s]


Episode slut – belöning: 4.0


Training DQN:  22%|██▏       | 12990/58000 [1:14:06<3:25:40,  3.65it/s]


Episode slut – belöning: 6.0


Training DQN:  23%|██▎       | 13052/58000 [1:14:27<4:00:36,  3.11it/s]


Episode slut – belöning: 3.0


Training DQN:  23%|██▎       | 13130/58000 [1:14:51<3:24:55,  3.65it/s]


Episode slut – belöning: 4.0


Training DQN:  23%|██▎       | 13179/58000 [1:15:07<3:21:11,  3.71it/s]


Episode slut – belöning: 2.0


Training DQN:  23%|██▎       | 13266/58000 [1:15:35<3:35:26,  3.46it/s]


Episode slut – belöning: 6.0


Training DQN:  23%|██▎       | 13328/58000 [1:15:55<4:28:37,  2.77it/s]


Episode slut – belöning: 4.0


Training DQN:  23%|██▎       | 13383/58000 [1:16:13<3:18:36,  3.74it/s]


Episode slut – belöning: 2.0


Training DQN:  23%|██▎       | 13460/58000 [1:16:39<3:37:28,  3.41it/s]


Episode slut – belöning: 5.0


Training DQN:  23%|██▎       | 13522/58000 [1:16:59<3:16:47,  3.77it/s]


Episode slut – belöning: 3.0


Training DQN:  23%|██▎       | 13572/58000 [1:17:15<3:57:23,  3.12it/s]


Episode slut – belöning: 2.0


Training DQN:  24%|██▎       | 13668/58000 [1:17:46<3:27:09,  3.57it/s]


Episode slut – belöning: 6.0


Training DQN:  24%|██▎       | 13752/58000 [1:18:13<3:11:10,  3.86it/s]


Episode slut – belöning: 5.0


Training DQN:  24%|██▍       | 13844/58000 [1:18:42<3:15:25,  3.77it/s]


Episode slut – belöning: 9.0


Training DQN:  24%|██▍       | 13938/58000 [1:19:12<3:15:07,  3.76it/s]


Episode slut – belöning: 6.0


Training DQN:  24%|██▍       | 13993/58000 [1:19:30<3:54:55,  3.12it/s]


Episode slut – belöning: 2.0


Training DQN:  24%|██▍       | 14033/58000 [1:19:43<3:40:11,  3.33it/s]


Episode slut – belöning: 1.0


Training DQN:  24%|██▍       | 14115/58000 [1:20:09<3:45:07,  3.25it/s]


Episode slut – belöning: 5.0


Training DQN:  24%|██▍       | 14204/58000 [1:20:37<3:43:17,  3.27it/s]


Episode slut – belöning: 6.0


Training DQN:  25%|██▍       | 14252/58000 [1:20:53<3:41:39,  3.29it/s]


Episode slut – belöning: 2.0


Training DQN:  25%|██▍       | 14301/58000 [1:21:08<3:37:54,  3.34it/s]


Episode slut – belöning: 2.0


Training DQN:  25%|██▍       | 14356/58000 [1:21:26<3:12:17,  3.78it/s]


Episode slut – belöning: 2.0


Training DQN:  25%|██▍       | 14429/58000 [1:21:50<3:33:20,  3.40it/s]


Episode slut – belöning: 4.0


Training DQN:  25%|██▍       | 14499/58000 [1:22:13<4:12:05,  2.88it/s]


Episode slut – belöning: 4.0


Training DQN:  25%|██▌       | 14558/58000 [1:22:33<3:40:18,  3.29it/s]


Episode slut – belöning: 3.0


Training DQN:  25%|██▌       | 14654/58000 [1:23:04<3:17:58,  3.65it/s]


Episode slut – belöning: 6.0


Training DQN:  25%|██▌       | 14706/58000 [1:23:20<3:30:06,  3.43it/s]


Episode slut – belöning: 2.0


Training DQN:  26%|██▌       | 14798/58000 [1:23:50<3:11:41,  3.76it/s]


Episode slut – belöning: 6.0


Training DQN:  26%|██▌       | 14839/58000 [1:24:03<3:10:39,  3.77it/s]


Episode slut – belöning: 1.0


Training DQN:  26%|██▌       | 14893/58000 [1:24:21<3:45:21,  3.19it/s]


Episode slut – belöning: 3.0


Training DQN:  26%|██▌       | 14989/58000 [1:24:52<3:13:29,  3.70it/s]


Episode slut – belöning: 6.0


Training DQN:  26%|██▌       | 15055/58000 [1:25:13<3:11:07,  3.74it/s]


Episode slut – belöning: 2.0


Training DQN:  26%|██▌       | 15152/58000 [1:25:44<3:15:22,  3.66it/s]


Episode slut – belöning: 6.0


Training DQN:  26%|██▋       | 15242/58000 [1:26:13<3:10:39,  3.74it/s]


Episode slut – belöning: 5.0


Training DQN:  26%|██▋       | 15299/58000 [1:26:31<3:38:36,  3.26it/s]


Episode slut – belöning: 3.0


Training DQN:  27%|██▋       | 15416/58000 [1:27:08<3:11:29,  3.71it/s]


Episode slut – belöning: 8.0


Training DQN:  27%|██▋       | 15493/58000 [1:27:33<3:19:01,  3.56it/s]


Episode slut – belöning: 4.0


Training DQN:  27%|██▋       | 15563/58000 [1:27:55<3:32:16,  3.33it/s]


Episode slut – belöning: 4.0


Training DQN:  27%|██▋       | 15609/58000 [1:28:10<3:48:47,  3.09it/s]


Episode slut – belöning: 2.0


Training DQN:  27%|██▋       | 15674/58000 [1:28:31<3:51:57,  3.04it/s]


Episode slut – belöning: 4.0


Training DQN:  27%|██▋       | 15746/58000 [1:28:54<3:03:42,  3.83it/s]


Episode slut – belöning: 8.0


Training DQN:  27%|██▋       | 15796/58000 [1:29:10<3:31:34,  3.32it/s]


Episode slut – belöning: 2.0


Training DQN:  27%|██▋       | 15842/58000 [1:29:24<3:04:27,  3.81it/s]


Episode slut – belöning: 2.0


Training DQN:  27%|██▋       | 15920/58000 [1:29:49<3:07:28,  3.74it/s]


Episode slut – belöning: 5.0


Training DQN:  28%|██▊       | 15977/58000 [1:30:07<3:05:24,  3.78it/s]


Episode slut – belöning: 2.0


Training DQN:  28%|██▊       | 16047/58000 [1:30:29<3:46:33,  3.09it/s]


Episode slut – belöning: 4.0


Training DQN:  28%|██▊       | 16093/58000 [1:30:44<3:04:15,  3.79it/s]


Episode slut – belöning: 2.0


Training DQN:  28%|██▊       | 16155/58000 [1:31:03<3:44:55,  3.10it/s]


Episode slut – belöning: 4.0


Training DQN:  28%|██▊       | 16249/58000 [1:31:33<3:29:13,  3.33it/s]


Episode slut – belöning: 7.0


Training DQN:  28%|██▊       | 16305/58000 [1:31:51<3:17:17,  3.52it/s]


Episode slut – belöning: 2.0


Training DQN:  28%|██▊       | 16386/58000 [1:32:17<3:05:19,  3.74it/s]


Episode slut – belöning: 5.0


Training DQN:  28%|██▊       | 16465/58000 [1:32:42<3:49:47,  3.01it/s]


Episode slut – belöning: 4.0


Training DQN:  29%|██▊       | 16553/58000 [1:33:10<3:01:49,  3.80it/s]


Episode slut – belöning: 6.0


Training DQN:  29%|██▊       | 16633/58000 [1:33:35<3:18:58,  3.47it/s]


Episode slut – belöning: 6.0


Training DQN:  29%|██▉       | 16683/58000 [1:33:51<3:05:28,  3.71it/s]


Episode slut – belöning: 2.0


Training DQN:  29%|██▉       | 16764/58000 [1:34:17<3:02:44,  3.76it/s]


Episode slut – belöning: 5.0


Training DQN:  29%|██▉       | 16859/58000 [1:34:47<3:37:27,  3.15it/s]


Episode slut – belöning: 7.0


Training DQN:  29%|██▉       | 16897/58000 [1:34:59<3:25:06,  3.34it/s]


Episode slut – belöning: 1.0


Training DQN:  29%|██▉       | 16956/58000 [1:35:18<3:24:14,  3.35it/s]


Episode slut – belöning: 3.0


Training DQN:  29%|██▉       | 17016/58000 [1:35:37<3:01:12,  3.77it/s]


Episode slut – belöning: 3.0


Training DQN:  30%|██▉       | 17129/58000 [1:36:13<3:10:12,  3.58it/s]


Episode slut – belöning: 7.0


Training DQN:  30%|██▉       | 17202/58000 [1:36:36<3:06:57,  3.64it/s]


Episode slut – belöning: 4.0


Training DQN:  30%|██▉       | 17264/58000 [1:36:56<3:22:47,  3.35it/s]


Episode slut – belöning: 3.0


Training DQN:  30%|██▉       | 17326/58000 [1:37:16<3:30:06,  3.23it/s]


Episode slut – belöning: 3.0


Training DQN:  30%|██▉       | 17373/58000 [1:37:31<3:03:05,  3.70it/s]


Episode slut – belöning: 2.0


Training DQN:  30%|███       | 17460/58000 [1:37:58<2:58:43,  3.78it/s]


Episode slut – belöning: 5.0


Training DQN:  30%|███       | 17559/58000 [1:38:30<4:07:14,  2.73it/s]


Episode slut – belöning: 6.0


Training DQN:  30%|███       | 17634/58000 [1:38:54<3:24:22,  3.29it/s]


Episode slut – belöning: 4.0


Training DQN:  31%|███       | 17732/58000 [1:39:25<3:41:02,  3.04it/s]


Episode slut – belöning: 7.0


Training DQN:  31%|███       | 17805/58000 [1:39:49<3:23:18,  3.30it/s]


Episode slut – belöning: 5.0


Training DQN:  31%|███       | 17890/58000 [1:40:16<3:36:00,  3.09it/s]


Episode slut – belöning: 4.0


Training DQN:  31%|███       | 17949/58000 [1:40:35<2:56:24,  3.78it/s]


Episode slut – belöning: 3.0


Training DQN:  31%|███       | 18014/58000 [1:40:55<3:37:31,  3.06it/s]


Episode slut – belöning: 4.0


Training DQN:  31%|███       | 18108/58000 [1:41:25<2:55:55,  3.78it/s]


Episode slut – belöning: 5.0


Training DQN:  31%|███▏      | 18156/58000 [1:41:40<3:01:27,  3.66it/s]


Episode slut – belöning: 2.0


Training DQN:  31%|███▏      | 18230/58000 [1:42:04<2:59:25,  3.69it/s]


Episode slut – belöning: 5.0


Training DQN:  32%|███▏      | 18297/58000 [1:42:25<2:52:55,  3.83it/s]


Episode slut – belöning: 3.0


Training DQN:  32%|███▏      | 18368/58000 [1:42:48<2:57:05,  3.73it/s]


Episode slut – belöning: 4.0


Training DQN:  32%|███▏      | 18449/58000 [1:43:14<3:17:34,  3.34it/s]


Episode slut – belöning: 5.0


Training DQN:  32%|███▏      | 18512/58000 [1:43:34<2:56:40,  3.73it/s]


Episode slut – belöning: 3.0


Training DQN:  32%|███▏      | 18590/58000 [1:43:58<2:53:41,  3.78it/s]


Episode slut – belöning: 4.0


Training DQN:  32%|███▏      | 18639/58000 [1:44:14<2:50:57,  3.84it/s]


Episode slut – belöning: 2.0


Training DQN:  32%|███▏      | 18747/58000 [1:44:49<3:31:02,  3.10it/s]


Episode slut – belöning: 8.0


Training DQN:  32%|███▏      | 18819/58000 [1:45:11<3:16:36,  3.32it/s]


Episode slut – belöning: 3.0


Training DQN:  33%|███▎      | 18926/58000 [1:45:45<2:50:20,  3.82it/s]


Episode slut – belöning: 8.0


Training DQN:  33%|███▎      | 19047/58000 [1:46:24<2:53:02,  3.75it/s]


Episode slut – belöning: 9.0


Training DQN:  33%|███▎      | 19140/58000 [1:46:54<3:01:15,  3.57it/s]


Episode slut – belöning: 6.0


Training DQN:  33%|███▎      | 19243/58000 [1:47:27<2:51:53,  3.76it/s]


Episode slut – belöning: 7.0


Training DQN:  33%|███▎      | 19313/58000 [1:47:49<3:05:12,  3.48it/s]


Episode slut – belöning: 4.0


Training DQN:  34%|███▎      | 19452/58000 [1:48:33<3:14:28,  3.30it/s]


Episode slut – belöning: 10.0


Training DQN:  34%|███▎      | 19499/58000 [1:48:48<2:47:36,  3.83it/s]


Episode slut – belöning: 2.0


Training DQN:  34%|███▎      | 19550/58000 [1:49:05<3:03:45,  3.49it/s]


Episode slut – belöning: 2.0


Training DQN:  34%|███▍      | 19667/58000 [1:49:42<3:28:34,  3.06it/s]


Episode slut – belöning: 8.0


Training DQN:  34%|███▍      | 19737/58000 [1:50:05<3:17:44,  3.23it/s]


Episode slut – belöning: 4.0


Training DQN:  34%|███▍      | 19792/58000 [1:50:22<3:19:08,  3.20it/s]


Episode slut – belöning: 3.0


Training DQN:  34%|███▍      | 19850/58000 [1:50:41<3:00:25,  3.52it/s]


Episode slut – belöning: 3.0


Training DQN:  34%|███▍      | 19901/58000 [1:50:57<2:51:11,  3.71it/s]


Episode slut – belöning: 2.0


Training DQN:  34%|███▍      | 19946/58000 [1:51:11<2:51:07,  3.71it/s]


Episode slut – belöning: 1.0


Training DQN:  35%|███▍      | 20024/58000 [1:51:36<2:47:56,  3.77it/s]


Episode slut – belöning: 4.0


Training DQN:  35%|███▍      | 20156/58000 [1:52:18<2:53:35,  3.63it/s]


Episode slut – belöning: 14.0


Training DQN:  35%|███▍      | 20239/58000 [1:52:44<2:56:32,  3.57it/s]


Episode slut – belöning: 5.0


Training DQN:  35%|███▌      | 20342/58000 [1:53:17<2:56:25,  3.56it/s]


Episode slut – belöning: 7.0


Training DQN:  35%|███▌      | 20421/58000 [1:53:42<2:45:52,  3.78it/s]


Episode slut – belöning: 5.0


Training DQN:  35%|███▌      | 20510/58000 [1:54:10<2:45:19,  3.78it/s]


Episode slut – belöning: 6.0


Training DQN:  36%|███▌      | 20612/58000 [1:54:43<3:08:03,  3.31it/s]


Episode slut – belöning: 9.0


Training DQN:  36%|███▌      | 20694/58000 [1:55:09<3:09:56,  3.27it/s]


Episode slut – belöning: 6.0


Training DQN:  36%|███▌      | 20776/58000 [1:55:35<2:50:31,  3.64it/s]


Episode slut – belöning: 4.0


Training DQN:  36%|███▌      | 20893/58000 [1:56:13<2:46:54,  3.71it/s]


Episode slut – belöning: 5.0


Training DQN:  36%|███▌      | 20955/58000 [1:56:32<3:10:49,  3.24it/s]


Episode slut – belöning: 3.0


Training DQN:  36%|███▋      | 21051/58000 [1:57:03<2:46:55,  3.69it/s]


Episode slut – belöning: 7.0


Training DQN:  36%|███▋      | 21121/58000 [1:57:26<3:37:53,  2.82it/s]


Episode slut – belöning: 4.0


Training DQN:  37%|███▋      | 21186/58000 [1:57:46<3:06:30,  3.29it/s]


Episode slut – belöning: 4.0


Training DQN:  37%|███▋      | 21281/58000 [1:58:16<2:53:08,  3.53it/s]


Episode slut – belöning: 6.0


Training DQN:  37%|███▋      | 21384/58000 [1:58:49<2:46:01,  3.68it/s]


Episode slut – belöning: 7.0


Training DQN:  37%|███▋      | 21476/58000 [1:59:19<2:52:15,  3.53it/s]


Episode slut – belöning: 6.0


Training DQN:  37%|███▋      | 21600/58000 [1:59:58<3:00:45,  3.36it/s]


Episode slut – belöning: 10.0


Training DQN:  37%|███▋      | 21666/58000 [2:00:19<2:40:38,  3.77it/s]


Episode slut – belöning: 4.0


Training DQN:  37%|███▋      | 21749/58000 [2:00:46<2:40:52,  3.76it/s]


Episode slut – belöning: 4.0


Training DQN:  38%|███▊      | 21848/58000 [2:01:17<2:46:32,  3.62it/s]


Episode slut – belöning: 7.0


Training DQN:  38%|███▊      | 21942/58000 [2:01:47<3:03:33,  3.27it/s]


Episode slut – belöning: 6.0


Training DQN:  38%|███▊      | 22004/58000 [2:02:07<2:42:37,  3.69it/s]


Episode slut – belöning: 3.0


Training DQN:  38%|███▊      | 22090/58000 [2:02:34<3:01:03,  3.31it/s]


Episode slut – belöning: 6.0


Training DQN:  38%|███▊      | 22164/58000 [2:02:58<2:41:36,  3.70it/s]


Episode slut – belöning: 4.0


Training DQN:  38%|███▊      | 22229/58000 [2:03:18<2:38:01,  3.77it/s]


Episode slut – belöning: 3.0


Training DQN:  38%|███▊      | 22286/58000 [2:03:36<3:04:36,  3.22it/s]


Episode slut – belöning: 3.0


Training DQN:  39%|███▊      | 22395/58000 [2:04:11<2:47:38,  3.54it/s]


Episode slut – belöning: 8.0


Training DQN:  39%|███▊      | 22465/58000 [2:04:34<3:00:05,  3.29it/s]


Episode slut – belöning: 4.0


Training DQN:  39%|███▉      | 22557/58000 [2:05:03<2:40:18,  3.68it/s]


Episode slut – belöning: 6.0


Training DQN:  39%|███▉      | 22676/58000 [2:05:41<2:48:06,  3.50it/s]


Episode slut – belöning: 11.0


Training DQN:  39%|███▉      | 22788/58000 [2:06:17<2:43:47,  3.58it/s]


Episode slut – belöning: 8.0


Training DQN:  39%|███▉      | 22860/58000 [2:06:40<2:41:45,  3.62it/s]


Episode slut – belöning: 4.0


Training DQN:  40%|███▉      | 22968/58000 [2:07:14<2:34:11,  3.79it/s]


Episode slut – belöning: 6.0


Training DQN:  40%|███▉      | 23050/58000 [2:07:40<2:59:15,  3.25it/s]


Episode slut – belöning: 6.0


Training DQN:  40%|███▉      | 23109/58000 [2:07:59<2:35:46,  3.73it/s]


Episode slut – belöning: 3.0


Training DQN:  40%|███▉      | 23195/58000 [2:08:26<2:46:29,  3.48it/s]


Episode slut – belöning: 6.0


Training DQN:  40%|████      | 23293/58000 [2:08:58<3:08:45,  3.06it/s]


Episode slut – belöning: 7.0


Training DQN:  40%|████      | 23405/58000 [2:09:33<2:41:32,  3.57it/s]


Episode slut – belöning: 10.0


Training DQN:  41%|████      | 23495/58000 [2:10:02<2:46:03,  3.46it/s]


Episode slut – belöning: 6.0


Training DQN:  41%|████      | 23679/58000 [2:11:00<2:32:55,  3.74it/s]


Episode slut – belöning: 10.0


Training DQN:  41%|████      | 23782/58000 [2:11:33<2:55:14,  3.25it/s]


Episode slut – belöning: 7.0


Training DQN:  41%|████      | 23905/58000 [2:12:13<2:53:38,  3.27it/s]


Episode slut – belöning: 12.0


Training DQN:  41%|████▏     | 23999/58000 [2:12:43<3:03:15,  3.09it/s]


Episode slut – belöning: 6.0


Training DQN:  42%|████▏     | 24077/58000 [2:13:08<2:33:05,  3.69it/s]


Episode slut – belöning: 5.0


Training DQN:  42%|████▏     | 24162/58000 [2:13:35<2:30:18,  3.75it/s]


Episode slut – belöning: 5.0


Training DQN:  42%|████▏     | 24241/58000 [2:14:00<2:42:19,  3.47it/s]


Episode slut – belöning: 5.0


Training DQN:  42%|████▏     | 24336/58000 [2:14:30<3:05:20,  3.03it/s]


Episode slut – belöning: 6.0


Training DQN:  42%|████▏     | 24465/58000 [2:15:11<2:38:21,  3.53it/s]


Episode slut – belöning: 11.0


Training DQN:  42%|████▏     | 24511/58000 [2:15:26<2:47:22,  3.33it/s]


Episode slut – belöning: 2.0


Training DQN:  42%|████▏     | 24619/58000 [2:16:00<2:49:03,  3.29it/s]


Episode slut – belöning: 8.0


Training DQN:  43%|████▎     | 24745/58000 [2:16:41<3:03:09,  3.03it/s]


Episode slut – belöning: 6.0


Training DQN:  43%|████▎     | 24890/58000 [2:17:27<2:34:21,  3.58it/s]


Episode slut – belöning: 11.0


Training DQN:  43%|████▎     | 25015/58000 [2:18:06<2:31:43,  3.62it/s]


Episode slut – belöning: 9.0


Training DQN:  43%|████▎     | 25086/58000 [2:18:29<2:34:56,  3.54it/s]


Episode slut – belöning: 4.0


Training DQN:  43%|████▎     | 25193/58000 [2:19:03<2:55:43,  3.11it/s]


Episode slut – belöning: 8.0


Training DQN:  44%|████▎     | 25300/58000 [2:19:37<2:24:59,  3.76it/s]


Episode slut – belöning: 7.0


Training DQN:  44%|████▍     | 25416/58000 [2:20:15<2:43:41,  3.32it/s]


Episode slut – belöning: 12.0


Training DQN:  44%|████▍     | 25501/58000 [2:20:42<2:33:04,  3.54it/s]


Episode slut – belöning: 5.0


Training DQN:  44%|████▍     | 25619/58000 [2:21:19<2:32:09,  3.55it/s]


Episode slut – belöning: 9.0


Training DQN:  44%|████▍     | 25701/58000 [2:21:45<2:49:19,  3.18it/s]


Episode slut – belöning: 5.0


Training DQN:  44%|████▍     | 25798/58000 [2:22:16<2:46:10,  3.23it/s]


Episode slut – belöning: 6.0


Training DQN:  45%|████▍     | 25914/58000 [2:22:53<2:36:36,  3.41it/s]


Episode slut – belöning: 8.0


Training DQN:  45%|████▍     | 25969/58000 [2:23:11<2:20:44,  3.79it/s]


Episode slut – belöning: 3.0


Training DQN:  45%|████▍     | 26087/58000 [2:23:48<2:25:53,  3.65it/s]


Episode slut – belöning: 9.0


Training DQN:  45%|████▌     | 26242/58000 [2:24:38<2:55:35,  3.01it/s]


Episode slut – belöning: 20.0


Training DQN:  45%|████▌     | 26344/58000 [2:25:10<2:53:54,  3.03it/s]


Episode slut – belöning: 7.0


Training DQN:  46%|████▌     | 26426/58000 [2:25:37<2:53:22,  3.04it/s]


Episode slut – belöning: 6.0


Training DQN:  46%|████▌     | 26530/58000 [2:26:14<2:52:20,  3.04it/s]


Episode slut – belöning: 8.0


Training DQN:  46%|████▌     | 26660/58000 [2:26:58<2:20:29,  3.72it/s]


Episode slut – belöning: 10.0


Training DQN:  46%|████▌     | 26747/58000 [2:27:28<2:55:49,  2.96it/s]


Episode slut – belöning: 5.0


Training DQN:  46%|████▋     | 26836/58000 [2:27:57<2:22:25,  3.65it/s]


Episode slut – belöning: 5.0


Training DQN:  46%|████▋     | 26935/58000 [2:28:31<2:52:56,  2.99it/s]


Episode slut – belöning: 7.0


Training DQN:  47%|████▋     | 27050/58000 [2:29:10<2:56:05,  2.93it/s]


Episode slut – belöning: 9.0


Training DQN:  47%|████▋     | 27141/58000 [2:29:41<3:00:13,  2.85it/s]


Episode slut – belöning: 6.0


Training DQN:  47%|████▋     | 27212/58000 [2:30:04<2:39:01,  3.23it/s]


Episode slut – belöning: 4.0


Training DQN:  47%|████▋     | 27332/58000 [2:30:49<2:23:05,  3.57it/s]


Episode slut – belöning: 12.0


Training DQN:  47%|████▋     | 27443/58000 [2:31:25<2:41:33,  3.15it/s]


Episode slut – belöning: 8.0


Training DQN:  47%|████▋     | 27546/58000 [2:32:02<2:50:55,  2.97it/s]


Episode slut – belöning: 11.0


Training DQN:  48%|████▊     | 27680/58000 [2:32:47<2:22:46,  3.54it/s]


Episode slut – belöning: 14.0


Training DQN:  48%|████▊     | 27889/58000 [2:33:57<2:48:02,  2.99it/s]


Episode slut – belöning: 9.0


Training DQN:  48%|████▊     | 27958/58000 [2:34:20<2:39:46,  3.13it/s]


Episode slut – belöning: 4.0


Training DQN:  48%|████▊     | 28062/58000 [2:34:55<3:01:41,  2.75it/s]


Episode slut – belöning: 7.0


Training DQN:  49%|████▊     | 28136/58000 [2:35:21<2:23:34,  3.47it/s]


Episode slut – belöning: 5.0


Training DQN:  49%|████▊     | 28227/58000 [2:35:51<2:33:41,  3.23it/s]


Episode slut – belöning: 6.0


Training DQN:  49%|████▉     | 28298/58000 [2:36:15<2:13:51,  3.70it/s]


Episode slut – belöning: 4.0


Training DQN:  49%|████▉     | 28408/58000 [2:36:52<2:24:40,  3.41it/s]


Episode slut – belöning: 9.0


Training DQN:  49%|████▉     | 28513/58000 [2:37:28<2:47:24,  2.94it/s]


Episode slut – belöning: 7.0


Training DQN:  49%|████▉     | 28631/58000 [2:38:08<2:38:18,  3.09it/s]


Episode slut – belöning: 9.0


Training DQN:  49%|████▉     | 28704/58000 [2:38:33<3:00:32,  2.70it/s]


Episode slut – belöning: 5.0


Training DQN:  50%|████▉     | 28810/58000 [2:39:10<2:41:54,  3.00it/s]


Episode slut – belöning: 7.0


Training DQN:  50%|████▉     | 28891/58000 [2:39:38<2:41:23,  3.01it/s]


Episode slut – belöning: 6.0


Training DQN:  50%|████▉     | 28984/58000 [2:40:11<2:33:12,  3.16it/s]


Episode slut – belöning: 5.0


Training DQN:  50%|█████     | 29105/58000 [2:40:51<2:15:48,  3.55it/s]


Episode slut – belöning: 9.0


Training DQN:  51%|█████     | 29354/58000 [2:42:14<2:26:57,  3.25it/s]


Episode slut – belöning: 6.0


Training DQN:  51%|█████     | 29416/58000 [2:42:35<2:16:46,  3.48it/s]


Episode slut – belöning: 4.0


Training DQN:  51%|█████     | 29482/58000 [2:42:56<2:11:40,  3.61it/s]


Episode slut – belöning: 4.0


Training DQN:  51%|█████     | 29564/58000 [2:43:23<2:11:37,  3.60it/s]


Episode slut – belöning: 9.0


Training DQN:  51%|█████     | 29634/58000 [2:43:46<2:24:36,  3.27it/s]


Episode slut – belöning: 5.0


Training DQN:  51%|█████     | 29717/58000 [2:44:13<2:15:31,  3.48it/s]


Episode slut – belöning: 9.0


Training DQN:  51%|█████▏    | 29857/58000 [2:45:02<2:40:53,  2.92it/s]


Episode slut – belöning: 14.0


Training DQN:  52%|█████▏    | 29963/58000 [2:45:39<2:17:21,  3.40it/s]


Episode slut – belöning: 7.0


Training DQN:  52%|█████▏    | 30365/58000 [2:47:54<2:20:21,  3.28it/s]


Episode slut – belöning: 11.0


Training DQN:  53%|█████▎    | 30467/58000 [2:48:29<2:16:30,  3.36it/s]


Episode slut – belöning: 8.0


Training DQN:  53%|█████▎    | 30578/58000 [2:49:06<2:06:43,  3.61it/s]


Episode slut – belöning: 9.0


Training DQN:  53%|█████▎    | 30640/58000 [2:49:26<2:11:18,  3.47it/s]


Episode slut – belöning: 4.0


Training DQN:  54%|█████▎    | 31148/58000 [2:52:25<2:40:45,  2.78it/s]


Episode slut – belöning: 5.0


Training DQN:  54%|█████▍    | 31254/58000 [2:53:04<2:10:57,  3.40it/s]


Episode slut – belöning: 8.0


Training DQN:  54%|█████▍    | 31341/58000 [2:53:32<2:05:10,  3.55it/s]


Episode slut – belöning: 6.0


Training DQN:  54%|█████▍    | 31454/58000 [2:54:11<2:02:51,  3.60it/s]


Episode slut – belöning: 10.0


Training DQN:  54%|█████▍    | 31483/58000 [2:54:21<2:37:56,  2.80it/s]