In [71]:
import random

class Node():
    def __init__(self, left, right, is_leaf = False, idx = None):
        self.left = left
        self.right = right
        self.is_leaf = is_leaf
        self.idx = idx

        if not self.is_leaf:
            self.value = left.value + right.value

        self.parent = None

        if left is not None:
            left.parent = self
        if right is not None:
            right.parent = self

    @classmethod
    def create_leaf(cls, value, idx):
        leaf = cls(None, None, True, idx)
        leaf.value = value
        return leaf


class SumTree():
    def __init__(self, weights: list):
        nodes = [Node.create_leaf(v, i) for i,v in enumerate(weights)]
        self.leaf_nodes = nodes
        while len(nodes) > 1:
            inodes = iter(nodes)
            nodes = [Node(*pair) for pair in zip(inodes, inodes)]

        self.top_node = nodes[0]
        
    def retrieve(self, value: float, node: Node):
        if node.is_leaf:
            return node
        if node.left.value >= value:
            return self.retrieve(value, node.left)
        else:
            return self.retrieve(value - node.left.value, node.right)

    def update(self, idx, new_value: float):
        node = self.leaf_nodes[idx]
        change = new_value - node.value
        node.value = new_value
        self.propagate_changes(change, node.parent)

    def propagate_changes(self, change:float, node: Node):
        node.value += change
        if node.parent is not None:
            self.propagate_changes(change, node.parent)

    def draw_idx(self):
        u = random.uniform(0, self.top_node.value)
        return self.retrieve(u, self.top_node).idx


In [4]:
sumTree = SumTree([1,4,2,3])
sumTree.top_node.value

10

In [5]:
sumTree.update(3, 10)

In [6]:
sumTree.top_node.value

17

In [7]:
x = []
for i in range(1000000):
    x.append(sumTree.draw_idx())

sum([1 for i in x if i == 3]) / sum([1 for j in x if j == 0])

9.984864185367014

In [35]:
sumTree.draw_idx()

3

[1,
 3,
 1,
 1,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 0,
 3,
 3,
 3,
 3,
 2,
 1,
 1,
 3,
 0,
 3,
 3,
 3,
 3,
 1,
 3,
 3,
 3,
 0,
 3,
 3]

In [139]:
from typing import Tuple, List, Union
from collections import namedtuple, deque
import numpy as np

Experience = namedtuple("Experience", 
                            field_names = ["state", "action", "reward", "done", "next_state"])

class ReplayMemory:
    """
    Original Replay Memory by Lin. Used for vanilla DQN, no prioritized Replay or bootstrapping with n>1.
    Used to store and sample experiences
    """
    def __init__(self, capacity: int, epsilon = 0.01, alpha = 0.6, beta = 0.4) -> None:
        """
        Args: 
            capacity: size of buffer
        """
        self.buffer = deque(maxlen=capacity)
        self.sumTree = SumTree([0 for x in range(capacity)])
        self.epsilon = epsilon
        self.alpha = alpha
        self.beta = beta
        self.first_append = True

    def __len__(self):
        return len(self.buffer)

    def append(self, sample: Experience, tderror) -> None:
        """
        Append sample
        Args:
            sample: A sample of an experience to store. Experience is a tuple(state, action, reward, done, next_state)
        """
        self.buffer.append(sample)
        priority = self.get_priority(tderror)
        #print(priority)
        self.sumTree.update(len(self.buffer), priority)


    def get_priority(self, tderror):            
        pi = abs(tderror) + self.epsilon
        
        if self.first_append:
            Pi = (pi ** self.alpha) / pi
            self.first_append = False
        else:
            Pi = (pi ** self.alpha) / self.sumTree.top_node.value
        print(Pi)
        print(self.sumTree.top_node.value)
        print("-----")
        return Pi

    def sample(self, batch_size: int = 1) -> Tuple:
        """
        Return batch of buffer, randomly (uniformely).
        Args: 
            batch_size: size of batch
        """
        idxs = []
        for i in range(batch_size):
            idxs.append(self.sumTree.draw_idx())
        #[idxs.append(self.sumTree.draw_idx()) for _ in range(batch_size)]
        
        states, actions, rewards, dones, next_states = \
            zip(*[self.buffer[idx] for idx in idxs])

        #TODO IS_WEIGHTS

        return np.array(states), np.array(actions), \
            np.array(rewards, dtype=np.float32), np.array(dones, dtype=bool), \
            np.array(next_states)

# NOW LETS TEST IT

In [37]:
import numpy as np
import os
os.environ.setdefault('PATH', '')
from collections import deque
import gym
from gym import spaces


USE_PIL = True
if USE_PIL:
    # you should use pillow-simd, as it is faster than stardand Pillow
    from PIL import Image
else:
    import cv2
    cv2.ocl.setUseOpenCL(False)


class TimeLimit(gym.Wrapper):
    def __init__(self, env, max_episode_steps=None):
        super(TimeLimit, self).__init__(env)
        self._max_episode_steps = max_episode_steps
        self._elapsed_steps = 0

    def step(self, ac):
        observation, reward, done, info = self.env.step(ac)
        self._elapsed_steps += 1
        if self._elapsed_steps >= self._max_episode_steps:
            done = True
            info['TimeLimit.truncated'] = True
        return observation, reward, done, info

    def reset(self, **kwargs):
        self._elapsed_steps = 0
        return self.env.reset(**kwargs)

class ClipActionsWrapper(gym.Wrapper):
    def step(self, action):
        import numpy as np
        action = np.nan_to_num(action)
        action = np.clip(action, self.action_space.low, self.action_space.high)
        return self.env.step(action)

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)


class NoopResetEnv(gym.Wrapper):
    def __init__(self, env, noop_max=30):
        """Sample initial states by taking random number of no-ops on reset.
        No-op is assumed to be action 0.
        """
        gym.Wrapper.__init__(self, env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

    def reset(self, **kwargs):
        """ Do no-op action for a number of steps in [1, noop_max]."""
        self.env.reset(**kwargs)
        if self.override_num_noops is not None:
            noops = self.override_num_noops
        else:
            noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101
        assert noops > 0
        obs = None
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)

class FireResetEnv(gym.Wrapper):
    def __init__(self, env):
        """Take action on reset for environments that are fixed until firing."""
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

    def reset(self, **kwargs):
        self.env.reset(**kwargs)
        obs, _, done, _ = self.env.step(1)
        if done:
            self.env.reset(**kwargs)
        obs, _, done, _ = self.env.step(2)
        if done:
            self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)

class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env):
        """Make end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        """
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done  = True

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert sometimes we stay in lives == 0 condition for a few frames
            # so it's important to keep lives > 0, so that we only reset once
            # the environment advertises done.
            done = True
        self.lives = lives
        return obs, reward, done, info

    def reset(self, **kwargs):
        """Reset only when lives are exhausted.
        This way all states are still reachable even though lives are episodic,
        and the learner need not know about any of this behind-the-scenes.
        """
        if self.was_real_done:
            obs = self.env.reset(**kwargs)
        else:
            # no-op step to advance from terminal/lost life state
            obs, _, _, _ = self.env.step(0)
        self.lives = self.env.unwrapped.ale.lives()
        return obs

class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env, skip=4):
        """Return only every `skip`-th frame"""
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip       = skip

    def step(self, action):
        """Repeat action, sum reward, and max over last observations."""
        total_reward = 0.0
        done = None
        for i in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            if i == self._skip - 2: self._obs_buffer[0] = obs
            if i == self._skip - 1: self._obs_buffer[1] = obs
            total_reward += reward
            if done:
                break
        # Note that the observation on the done=True frame
        # doesn't matter
        max_frame = self._obs_buffer.max(axis=0)

        return max_frame, total_reward, done, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

class ClipRewardEnv(gym.RewardWrapper):
    def __init__(self, env):
        gym.RewardWrapper.__init__(self, env)

    def reward(self, reward):
        """Bin reward to {+1, 0, -1} by its sign."""
        return np.sign(reward)


class WarpFrame(gym.ObservationWrapper):
    def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None):
        """
        Warp frames to 84x84 as done in the Nature paper and later work.
        If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which
        observation should be warped.
        """
        super().__init__(env)
        self._width = width
        self._height = height
        self._grayscale = grayscale
        self._key = dict_space_key
        if self._grayscale:
            num_colors = 1
        else:
            num_colors = 3

        new_space = gym.spaces.Box(
            low=0,
            high=255,
            shape=(self._height, self._width, num_colors),
            dtype=np.uint8,
        )
        if self._key is None:
            original_space = self.observation_space
            self.observation_space = new_space
        else:
            original_space = self.observation_space.spaces[self._key]
            self.observation_space.spaces[self._key] = new_space
        assert original_space.dtype == np.uint8 and len(original_space.shape) == 3

    def observation(self, obs):
        if self._key is None:
            frame = obs
        else:
            frame = obs[self._key]
        if USE_PIL:
            frame = Image.fromarray(frame)
            if self._grayscale:
                frame = frame.convert("L")
            frame = frame.resize((self._width, self._height))
            frame = np.array(frame)
        else:
            if self._grayscale:
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            frame = cv2.resize(
                frame, (self._width, self._height),
                interpolation=cv2.INTER_AREA
            )
        if self._grayscale:
            frame = np.expand_dims(frame, -1)

        if self._key is None:
            obs = frame
        else:
            obs = obs.copy()
            obs[self._key] = frame
        return obs

class LazyFrames(object):
    def __init__(self, frames):
        """This object ensures that common frames between the observations are only stored once.
        It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay
        buffers.
        This object should only be converted to numpy array before being passed to the model.
        You'd not believe how complex the previous solution was."""
        self._frames = frames
        self._out = None

    def _force(self):
        if self._out is None:
            self._out = np.concatenate(self._frames, axis=0)
            self._frames = None
        return self._out

    def __array__(self, dtype=None):
        out = self._force()
        if dtype is not None:
            out = out.astype(dtype)
        return out

    def __len__(self):
        return len(self._force())

    def __getitem__(self, i):
        return self._force()[i]

    def count(self):
        frames = self._force()
        return frames.shape[1:frames.ndim]

    def frame(self, i):
        return self._force()[i, ...]


class FrameStack(gym.Wrapper):
    def __init__(self, env, k):
        """Stack k last frames.
        Returns lazy array, which is much more memory efficient.
        See Also
        --------
        baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0]*k, shp[1], shp[2]), dtype=env.observation_space.dtype)

    def reset(self):
        ob = self.env.reset()
        for _ in range(self.k):
            self.frames.append(ob)
        return self._get_ob()

    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        self.frames.append(ob)
        return self._get_ob(), reward, done, info

    def _get_ob(self):
        assert len(self.frames) == self.k
        return LazyFrames(list(self.frames))

class ScaledFloatFrame(gym.ObservationWrapper):
    def __init__(self, env):
        gym.ObservationWrapper.__init__(self, env)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)

    def observation(self, observation):
        # careful! This undoes the memory optimization, use
        # with smaller replay buffers only.
        return np.array(observation).astype(np.float32) / 255.0

class SkipEnv(gym.Wrapper):
    def __init__(self, env, skip=4):
        """Return only every `skip`-th frame"""
        gym.Wrapper.__init__(self, env)
        self._skip       = skip

    def step(self, action):
        """Repeat action, sum reward, and max over last observations."""
        total_reward = 0.0
        done = None
        for i in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            total_reward += reward
            if done:
                break
        return obs, total_reward, done, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)


def make_atari(env_id, max_episode_steps=None,
               skip_noop=False, skip_maxskip=False):
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    if not skip_noop:
        env = NoopResetEnv(env, noop_max=30)
    if not skip_maxskip:
        env = MaxAndSkipEnv(env, skip=4)
    else:
        env = SkipEnv(env, skip=4)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env


class ImageToPyTorch(gym.ObservationWrapper):
    """
    Change image shape to CWH
    """
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        new_shape = (old_shape[-1], old_shape[0], old_shape[1])
        self.observation_space = gym.spaces.Box(
            low=0.0, high=1.0, shape=new_shape, dtype=np.uint8)

    def observation(self, observation):
        return np.swapaxes(observation, 2, 0)


def wrap_deepmind(env, episode_life=True, clip_rewards=True,
                  frame_stack=False, scale=False, pytorch_img=False,
                  frame_stack_count=4, skip_firereset=False):
    """Configure environment for DeepMind-style Atari.
    """
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        if not skip_firereset:
            env = FireResetEnv(env)
    env = WarpFrame(env)
    if pytorch_img:
        env = ImageToPyTorch(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, frame_stack_count)
    return env

In [38]:
import torch
import torch.nn as nn
import numpy as np

class DQN(nn.Module):
    """
    Neural Network, choosing actions
    """
    def __init__(self, n_in, n_out):
        super(DQN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(n_in[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(n_in)

        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_out)
        )

    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        #x = x.float() #!!! not sure. I need this, because the images are now stored as uint8 and not float32 anymore
        conv_out = self.conv(x).view(x.size()[0], -1)
        return self.fc(conv_out)



In [132]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import time
from datetime import datetime
import os
import resource


class PER_Agent():
    """
        Vanilla DQN Agent from Mnih2013, Mnih2015
    """
    def __init__(self, env, env_name, config):
        #Hyperparameters
        self.hp_replay_memory_capacity = config['hp_replay_memory_capacity']
        self.hp_gamma = config['hp_gamma']
        self.hp_epsilon_start = config['hp_epsilon_start']
        self.hp_epsilon_end = config['hp_epsilon_end']
        self.hp_epsilon_decay_last_frame = config['hp_epsilon_decay_last_frame']
        self.hp_learning_rate = config['hp_learning_rate']
        self.hp_replay_memory_start_after = config['hp_replay_memory_start_after']
        self.hp_batch_size = config['hp_batch_size']
        self.hp_target_update_after = config['hp_target_update_after']
        self.hp_update_frequency = config['hp_update_frequency']

        #Training loop: Episode length differs in each game, so timesteps / frames are better! (bellemare 2017)
        self.nr_of_total_frames = config['nr_of_total_frames'] 
        self.nr_of_evaluation_frames = config['nr_of_evaluation_frames']
        self.nr_of_frames_before_evaluation = config['nr_of_frames_before_evaluation']
        
        #Evaluation variables
        self.timesteps_overall = -1
        self.timesteps_after_last_episode = 0
        self.train_obtained_returns = []
        self.train_avg_returns = []
        self.eval_obtained_returns = []
        self.eval_counter = 0
        self.eval_epsilon = 0.05
        self.eval_state_samples = []

        self.env = env
        self.env_name = env_name
           
        self.replay_memory = ReplayMemory(self.hp_replay_memory_capacity)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
        self.training_start_timestamp = datetime.now(tz=None).strftime("%Y-%m-%d_%H-%M-%S")

        #Network
        self.policy_net = DuelingDQN(self.env.observation_space.shape, self.env.action_space.n).to(self.device)
        self.target_net = DuelingDQN(self.env.observation_space.shape, self.env.action_space.n).to(self.device)
        self._update_target_net()

        self._initialize_evaluation_state_samples_for_max_pred_q_vals(100)


    def _update_target_net(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())


    def _reset(self):
        """
            reset environment and reset the obtained return
        """
        self.state = self.env.reset()
        self.last_obtained_return = 0.0

    #DDQN! need to tidy up a bit TODO
    def _calc_loss(self, batch):
        """
            Calculate L1-Loss for given batch.
        """
        states, actions, rewards, dones, next_states = batch
    
        states_v = torch.from_numpy(states).to(self.device)
        next_states_v = torch.from_numpy(next_states).to(self.device)
        actions_v = torch.from_numpy(actions).to(self.device)
        rewards_v = torch.from_numpy(rewards).to(self.device)
        done_mask = torch.BoolTensor(dones).to(self.device)
        
        state_action_values = self.policy_net(states_v).gather(1, actions_v.unsqueeze(-1)).squeeze(-1)
        
        with torch.no_grad():
            next_actions = self.policy_net(next_states_v).max(1)[1]
            next_state_values = self.target_net(next_states_v).gather(1, next_actions.unsqueeze(-1)).squeeze(-1)
            next_state_values[done_mask] = 0.0
        next_state_values = next_state_values.detach()
        expected_state_action_values = (next_state_values * self.hp_gamma) + rewards_v

        return F.smooth_l1_loss(state_action_values, expected_state_action_values)

    def _get_epsilon(self):
        """
            Get current value for epsilon
        """
        return max(self.hp_epsilon_end, self.hp_epsilon_start - self.timesteps_overall / self.hp_epsilon_decay_last_frame)


    def _select_action(self, eval):
        """
            Select action
        """
        epsilon = self._get_epsilon()
        if eval:
            epsilon = self.eval_epsilon

        with torch.no_grad():
            state = torch.tensor(np.array([self.state], copy=False)).to(self.device)
            q_vals = self.policy_net(state)

        if np.random.random() < epsilon:
            action = self.env.action_space.sample()
            with torch.no_grad():
                action_ = torch.tensor([action]).to(self.device)
                q_val = q_vals.gather(1, action_.unsqueeze(-1)).squeeze(-1)
        else:
            with torch.no_grad():
                q_val, action = torch.max(q_vals, dim=1)
                action = int(action.item())
                del state

        #TODO TDERROR, 1 pass more through target-net :( The next_state is only known afterwards! 
        return action, q_val #qvals


    def _play_step(self, eval=False):
        """
            Play one step and return if episode ended
        """
        action, state_action_values = self._select_action(eval)
        
        next_state, reward, done, _ = self.env.step(action)

        self.last_obtained_return += reward

        if not eval:
            #calculate q_vals of maxa for next state
            #calculate tderror TODO
            with torch.no_grad():
                next_state_ = torch.tensor(np.array([next_state], copy=False)).to(self.device)
                next_state_values = self.target_net(next_state_).max(1)[0]
                td_error = state_action_values - reward + (self.hp_gamma * next_state_values)
                #print(td_error)
            exp = Experience(self.state, action, reward, done, next_state)
            self.replay_memory.append(exp, td_error.item())

        self.state = next_state
        del next_state
        return done


    def train_agent(self):
        """
            Train the Agent. 
        """
        writer = SummaryWriter(comment="_"+self.env_name)
        optimizer = optim.Adam(self.policy_net.parameters(), lr=self.hp_learning_rate)
        print ("Start Training on %s" % self.device)

        episode = 0
        while True:
            episode += 1
            self._reset()
            done = False
            ts_episode_started = time.time()

            while(not done):
                self.timesteps_overall += 1
                done = self._play_step()
                
                if len(self.replay_memory) < self.hp_replay_memory_start_after:
                    continue
                    

                #learn
                if self.timesteps_overall % self.hp_update_frequency == 0:
                    optimizer.zero_grad()
                    batch = self.replay_memory.sample(self.hp_batch_size)
                    loss = self._calc_loss(batch)
                    loss.backward()
                    #gradient clipping, like dueling DQN proposes
                    clipping_value = 10 
                    torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), clipping_value)
                    optimizer.step()

                if self.timesteps_overall % self.hp_target_update_after == 0:
                    self._update_target_net()


            speed = (self.timesteps_overall - self.timesteps_after_last_episode) / (time.time() - ts_episode_started)         
            ram_usage = int(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1024 / 1024

            self.timesteps_after_last_episode = self.timesteps_overall
            self.train_obtained_returns.append(self.last_obtained_return)
            self.train_avg_returns.append(np.mean(self.train_obtained_returns[-100:]))

            self._write_tensorboard(writer, speed, ram_usage)

            print("Episode %d completed, timesteps played: %d, return: %d, speed %f, epsilon %f" 
                % (episode, self.timesteps_overall, self.last_obtained_return, speed, self._get_epsilon()))
            print("Mean return of last 100 games: %f" % self.train_avg_returns[-1])
            print("Pytorch memory usage: %2f (gb)" % ram_usage)
            print("Size of Replay Memory: %d" % len(self.replay_memory))
            
            #evaluate every nr_of_frames_before_evaluation
            if self.timesteps_overall >= self.nr_of_frames_before_evaluation * (self.eval_counter+1):
                self._evaluate(writer)

            if self.timesteps_overall >= self.nr_of_total_frames:
                break

        writer.close()


    def _initialize_evaluation_state_samples_for_max_pred_q_vals(self, n):
        """
            Get samples for evaluation by sampling n states from random agent
        """
        self.env.reset()
        for i in range(n):
            with torch.no_grad():
                action = self.env.action_space.sample()
                next_state, reward, done, _ =  self.env.step(action)
                self.eval_state_samples.append(next_state)
                del next_state


    def _get_max_pred_q_vals(self):
        """
            Get max q-values for each state in evaluation_state_samples
        """
        with torch.no_grad():
            max_q_vals = []
            for state in self.eval_state_samples:
                state = torch.from_numpy(np.array([state], copy=False)).to(self.device)#.type(self.dtype)
                q_vals = self.policy_net(state)
                max_q_vals.append(q_vals.max().item())
            return np.mean(max_q_vals)


    def _save_model_snapshot(self, score):
        """
            Save model. For each Evaluation Metric, always save the best.
        """ 
        if not os.path.isdir("models/DQN/" + self.env_name + "/" +  self.training_start_timestamp):
            os.mkdir("models/DQN/%s/%s" % (self.env_name, self.training_start_timestamp))
        torch.save(self.policy_net.state_dict(), "models/DQN/%s/%s/snapshot_%d_score_%d.dat" 
                                % (self.env_name, self.training_start_timestamp, self.eval_counter, score))


    def _evaluate(self, writer):
        """
            Pause Training and evaluate agent by running environment like training, 
            but without actually training the model and decreased exploration.
        """
        _eval_iteration_returns = []
        _eval_iteration_max_q_vals = []

        print("***************** Start Evaluation *****************")
        evaluation_episode = 0
        evaluation_frames = 0
        while True:
            evaluation_episode += 1
            self._reset()
            done = False
            while(not done):
                evaluation_frames += 1
                done = self._play_step(eval=True) 

            _eval_iteration_returns.append(self.last_obtained_return)
            _eval_iteration_max_q_vals.append(self._get_max_pred_q_vals())
            print("Evaluation episode %d ended with return %d" %(evaluation_episode, self.last_obtained_return))      

            if evaluation_frames > self.nr_of_evaluation_frames:
                break

        _score_avg_returns = np.mean(_eval_iteration_returns)
        _score_max_q = np.mean(_eval_iteration_max_q_vals)

        print("Avg evaluation score: %f" % _score_avg_returns)
        print("***************** End Evaluation *****************")

        self.eval_counter += 1
        self._save_model_snapshot(_score_avg_returns)
        self.eval_obtained_returns.append(_score_avg_returns)

        writer.add_scalar('Evaluation/AvgTotalReturnPerEpisode', _score_avg_returns, self.eval_counter)
        writer.add_scalar('Evaluation/MaxPredQVals', _score_max_q, self.eval_counter)

    def _write_tensorboard(self, writer, speed, ram_usage):
        writer.add_scalar('Training/AvgTotalReturn', self.train_avg_returns[-1], self.timesteps_overall)
        writer.add_scalar('Training/ObtainedReturns', self.last_obtained_return, self.timesteps_overall)
        writer.add_scalar('Training/MaxPredQVals', self._get_max_pred_q_vals(), self.timesteps_overall)
        writer.add_scalar('Parameter/Epsilon', self._get_epsilon(), self.timesteps_overall)
        writer.add_scalar('Parameter/Speed', speed, self.timesteps_overall)
        writer.add_scalar('Parameter/MemoryUsage', ram_usage, self.timesteps_overall)
        writer.add_scalar('Parameter/ReplayMemorySize', len(self.replay_memory), self.timesteps_overall)

   



In [133]:
class DuelingDQN(nn.Module):
    def __init__(self, n_in, n_out):
        super(DuelingDQN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(n_in[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_out(n_in)

        self.value_stream = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, 1)
        )

        self.advantage_stream = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_out)
        )

        self.conv.apply(self.init_weights)
        self.value_stream.apply(self.init_weights)
        self.advantage_stream.apply(self.init_weights)

        self.dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor #here

    def _get_conv_out(self, shape):
        o = self.conv(torch.zeros(1, *shape))
        return int(np.prod(o.size()))

    def forward(self, x):
        x = x.type(self.dtype) #!!! not sure. I need this, because the images are now stored as uint8 and not float32 anymore
        conv_out = self.conv(x).view(x.size()[0], -1)
        values = self.value_stream(conv_out)
        advantages = self.advantage_stream(conv_out)
        q_vals = values + (advantages - advantages.mean())

        return q_vals

    def init_weights(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)

In [140]:
#env_name = 'PongNoFrameskip-v4'
import json  
env_name = 'PongNoFrameskip-v4'
env = make_atari(env_name, skip_noop=False, skip_maxskip=True)
env = wrap_deepmind(env, pytorch_img=True, frame_stack=False)
config = json.loads('{\
    "test_config": \
        {\
            "hp_replay_memory_capacity": 100000,\
            "hp_gamma": 0.99,\
            "hp_epsilon_start": 1.0,\
            "hp_epsilon_end": 0.1,\
            "hp_epsilon_decay_last_frame": 100000,\
            "hp_learning_rate": 0.00001,\
            "hp_replay_memory_start_after": 10000,\
            "hp_batch_size":32,\
            "hp_target_update_after": 10000,\
            "nr_of_total_frames":  25000000,\
            "nr_of_evaluation_frames": 10000,\
            "nr_of_frames_before_evaluation": 50000,\
            "hp_update_frequency": 4\
        },\
    "config_mnih2013":\
        {\
            "hp_replay_memory_capacity": 1000000, \
            "hp_gamma": 0.99,\
            "hp_epsilon_start": 1.0,\
            "hp_epsilon_end": 0.1,\
            "hp_epsilon_decay_last_frame": 1e6,\
            "hp_learning_rate": 0.00001,\
            "hp_replay_memory_start_after": 50000,\
            "hp_batch_size":32,\
            "hp_target_update_after": 40000,\
            "nr_of_total_frames":  10e6,\
            "nr_of_evaluation_frames": 10e3,\
            "nr_of_frames_before_evaluation": 50e3,\
            "hp_update_frequency": 4\
        },\
    "config_mnih2015":\
        {\
            "hp_replay_memory_capacity": 1000000, \
            "hp_gamma": 0.99,\
            "hp_epsilon_start": 1.0,\
            "hp_epsilon_end": 0.1,\
            "hp_epsilon_decay_last_frame": 1e6,\
            "hp_learning_rate": 0.00001,\
            "hp_replay_memory_start_after": 50000,\
            "hp_batch_size":32,\
            "hp_target_update_after": 10000,\
            "nr_of_total_frames":  50e6,\
            "nr_of_evaluation_frames": 520e3,\
            "nr_of_frames_before_evaluation": 50e3,\
            "hp_update_frequency": 4\
        }\
}')

dqn = PER_Agent(env, env_name, config['test_config'])
dqn.train_agent()

Start Training on cuda
0.31808150778174005
0
-----
7.831204289189859
0.31808150778174005
-----
0.6893852388674514
8.149285796971599
-----
0.2859036435119703
8.83867103583905
-----
0.5610784079869724
9.12457467935102
-----
0.4836863297714358
9.685653087337993
-----
0.5722539581112602
10.169339417109429
-----
0.44370504036477665
10.741593375220688
-----
0.5311151552026265
11.185298415585464
-----
0.49874618346113536
11.716413570788092
-----
0.42130060043273265
12.215159754249227
-----
0.3355760084232688
12.63646035468196
-----
0.41233072361424034
12.972036363105229
-----
0.34123425157959264
13.384367086719468
-----
0.4122208014159609
13.725601338299061
-----
0.3143489086045308
14.137822139715022
-----
0.30906830208946995
14.452171048319553
-----
0.3022898364154292
14.761239350409022
-----
0.3070163396510295
15.063529186824452
-----
0.3030469150636286
15.370545526475482
-----
0.2888295489006427
15.67359244153911
-----
0.16812095810086708
15.962421990439752
-----
0.3546644077443775
16.1305

0.05923296569071286
43.80320651609708
-----
0.05989149318240915
43.86243948178779
-----
0.12874342403516809
43.9223309749702
-----
0.12894594221371772
44.05107439900537
-----
0.10684881029712813
44.18002034121909
-----
0.11158361742220538
44.28686915151622
-----
0.12903186174639994
44.39845276893843
-----
0.06105950199823853
44.527484630684825
-----
0.06126845147209275
44.58854413268306
-----
0.10649693942896589
44.64981258415516
-----
0.10501788763488946
44.75630952358412
-----
0.05622987668931128
44.86132741121901
-----
0.11341154398141026
44.917557287908316
-----
0.09748987924485912
45.03096883188972
-----
0.056801834504994805
45.128458711134584
-----
0.05900781033467038
45.18526054563958
-----
0.10297736066665167
45.24426835597425
-----
0.0561826568973926
45.3472457166409
-----
0.10023072902961781
45.40342837353829
-----
0.09446841518762926
45.50365910256791
-----
0.12262504707744595
45.598127517755536
-----
0.09979268406060357
45.720752564832985
-----
0.11108379020783166
45.820545

0.07189354985216716
61.7733081053994
-----
0.07649201852375938
61.84520165525157
-----
0.07470693135849789
61.92169367377533
-----
0.0363578290800822
61.99640060513383
-----
0.09187164962949758
62.03275843421391
-----
0.038060711644920454
62.12463008384341
-----
0.07350243784978301
62.162690795488324
-----
0.07271974473209286
62.236193233338106
-----
0.07457210182790323
62.3089129780702
-----
0.09172784214494926
62.3834850798981
-----
0.08158255677573634
62.47521292204305
-----
0.04094733720695875
62.556795478818785
-----
0.0812981751994426
62.59774281602574
-----
0.07055184955994719
62.679040991225186
-----
0.08197883289670307
62.749592840785134
-----
0.07500262035612122
62.831571673681836
-----
0.07541782058124406
62.90657429403796
-----
0.07960311513694973
62.9819921146192
-----
0.06870443321689619
63.06159522975615
-----
0.08001917933819297
63.130299662973044
-----
0.09240769162970254
63.21031884231124
-----
0.09030309598930605
63.30272653394094
-----
0.0724216273272991
63.39302962

0.07695780400455332
76.06404640781116
-----
0.032270305963141774
76.14100421181571
-----
0.06010041343627819
76.17327451777885
-----
0.06667943648521726
76.23337493121512
-----
0.06565492830003823
76.30005436770034
-----
0.07528104174319167
76.36570929600038
-----
0.06122930002686562
76.44099033774357
-----
0.06156747264666737
76.50221963777044
-----
0.0642055142755617
76.56378711041711
-----
0.0609183445478158
76.62799262469267
-----
0.06285929292487628
76.68891096924048
-----
0.03446118098626925
76.75177026216535
-----
0.05945659672589626
76.78623144315162
-----
0.05997360111591018
76.84568803987752
-----
0.05840782698588979
76.90566164099343
-----
0.06676392717487936
76.96406946797931
-----
0.06179835816865783
77.03083339515419
-----
0.06046094243951361
77.09263175332285
-----
0.06730379877525272
77.15309269576237
-----
0.0766082976145042
77.22039649453762
-----
0.06662224412914158
77.29700479215212
-----
0.06702296338906372
77.36362703628126
-----
0.059232993406844334
77.4306499996

0.05391092489833675
86.81203076129573
-----
0.054032396655415864
86.86594168619406
-----
0.053631463193485024
86.91997408284948
-----
0.0326776382955432
86.97360554604296
-----
0.05301913865591659
87.0062831843385
-----
0.053257523272462526
87.05930232299441
-----
0.0576001325002875
87.11255984626688
-----
0.06674434229988634
87.17015997876716
-----
0.05250732371721714
87.23690432106704
-----
0.055055397130483665
87.28941164478427
-----
0.02988743966909704
87.34446704191475
-----
0.060171207514836005
87.37435448158385
-----
0.05365680595135636
87.43452568909869
-----
0.051711099222036795
87.48818249505005
-----
0.06599513294373341
87.53989359427209
-----
0.057486374588455916
87.60588872721583
-----
0.052961681244054135
87.66337510180428
-----
0.05780005421980708
87.71633678304833
-----
0.05761355865109327
87.77413683726814
-----
0.054422018194390055
87.83175039591924
-----
0.057490894646379076
87.88617241411363
-----
0.05970066673518012
87.94366330876001
-----
0.0640922805593888
88.003

0.052069685533333604
97.25201248871684
-----
0.05920264958412898
97.30408217425018
-----
0.048330751600897295
97.3632848238343
-----
0.04876207945557001
97.4116155754352
-----
0.04749815357437984
97.46037765489078
-----
0.02596635645093108
97.50787580846516
-----
0.059530315751874015
97.5338421649161
-----
0.05967195761456336
97.59337248066797
-----
0.052067018768358224
97.65304443828254
-----
0.048975495421486055
97.70511145705089
-----
0.04812440924144849
97.75408695247238
-----
0.04769422376677447
97.80221136171383
-----
0.04768949935981655
97.8499055854806
-----
0.04513895971578975
97.89759508484042
-----
0.04395879076968993
97.9427340445562
-----
0.04775508666186665
97.9866928353259
-----
0.04847456236419097
98.03444792198776
-----
0.046555286795863175
98.08292248435195
-----
0.029271050571438112
98.12947777114782
-----
0.04907185410320238
98.15874882171926
-----
0.05318994728283047
98.20782067582246
-----
0.048422794474467706
98.26101062310529
-----
0.047837346223897244
98.309433

0.0258596154949772
107.13392866812688
-----
0.05367056288826354
107.15978828362186
-----
0.055398907163191596
107.21345884651012
-----
0.04270503899252685
107.26885775367332
-----
0.04248829347522752
107.31156279266584
-----
0.044053276088383504
107.35405108614107
-----
0.04908570401745415
107.39810436222945
-----
0.05430461624780474
107.4471900662469
-----
0.053242634082903265
107.50149468249471
-----
0.044511666022339344
107.55473731657762
-----
0.0452527005098237
107.59924898259996
-----
0.045067180206781854
107.64450168310978
-----
0.05425463192122453
107.68956886331657
-----
0.04441435897577083
107.74382349523779
-----
0.043854722843907706
107.78823785421356
-----
0.04513429444024831
107.83209257705747
-----
0.054669780435024073
107.87722687149771
-----
0.02747212531712731
107.93189665193273
-----
0.04950994856319168
107.95936877724986
-----
0.05459990403745862
108.00887872581306
-----
0.04446719910154962
108.06347862985052
-----
0.05258078823764585
108.10794582895207
-----
0.0480

0.0444826843710566
116.0411240726597
-----
0.04144170745900295
116.08560675703076
-----
0.039253077733432666
116.12704846448976
-----
0.0408810119943556
116.1663015422232
-----
0.04144219045534972
116.20718255421755
-----
0.023442784163079752
116.24862474467291
-----
0.04509606027220158
116.27206752883599
-----
0.03938934205437276
116.31716358910819
-----
0.041894890773843355
116.35655293116257
-----
0.0254734312245965
116.39844782193641
-----
0.051178325547917715
116.42392125316101
-----
0.02470801506805398
116.47509957870894
-----
0.026526270936795618
116.49980759377699
-----
0.04461076828642661
116.52633386471379
-----
0.038734231634337986
116.57094463300022
-----
0.04109014069805088
116.60967886463456
-----
0.04962423015506776
116.6507690053326
-----
0.04015613204896868
116.70039323548767
-----
0.03861167178254562
116.74054936753663
-----
0.04065298903723066
116.77916103931918
-----
0.04434432724133877
116.8198140283564
-----
0.04883070644980854
116.86415835559774
-----
0.043848353

0.04191473686054136
124.35657032713033
-----
0.04728868537363959
124.39848506399088
-----
0.03958158365202716
124.44577374936452
-----
0.03831892089721886
124.48535533301654
-----
0.04205876170326386
124.52367425391377
-----
0.038811630916093506
124.56573301561703
-----
0.04161913462159372
124.60454464653311
-----
0.04169098349579787
124.64616378115471
-----
0.03891798115724286
124.68785476465051
-----
0.04229731340179938
124.72677274580775
-----
0.038464072337593316
124.76907005920955
-----
0.02470042371384118
124.80753413154714
-----
0.04077840907003009
124.83223455526098
-----
0.04149278603561021
124.87301296433101
-----
0.038371572820184344
124.91450575036662
-----
0.047124659840810844
124.9528773231868
-----
0.035776815884745135
125.0000019830276
-----
0.04618902945220048
125.03577879891235
-----
0.047138143346180376
125.08196782836454
-----
0.02402872222126178
125.12910597171071
-----
0.038199959987768976
125.15313469393197
-----
0.037653624473761374
125.19133465391974
-----
0.04

0.0353663259827317
132.37362978815534
-----
0.022278800962632895
132.40899611413806
-----
0.03746858546089828
132.4312749151007
-----
0.036930432051919225
132.46874350056157
-----
0.03814314635354136
132.5056739326135
-----
0.022534441419153443
132.54381707896704
-----
0.036348120227629135
132.56635152038618
-----
0.03505425336264061
132.60269964061382
-----
0.04004246986674823
132.63775389397645
-----
0.04460957177574607
132.6777963638432
-----
0.037127289941212664
132.72240593561895
-----
0.021671572238707637
132.75953322556018
-----
0.02251880480693979
132.7812047977989
-----
0.044910354100501454
132.80372360260583
-----
0.03630826488798281
132.84863395670632
-----
0.03862761407167037
132.88494222159432
-----
0.035953706437492426
132.92356983566597
-----
0.03887883719484972
132.95952354210345
-----
0.034473840366936424
132.9984023792983
-----
0.03639091977320106
133.03287621966524
-----
0.03704177544399707
133.06926713943844
-----
0.0348945755335187
133.10630891488245
-----
0.038592

0.018207798065243606
139.0094585195527
-----
0.039971376520180436
139.02766631761796
-----
0.03238588121664608
139.06763769413814
-----
0.03782488280915786
139.10002357535478
-----
0.03539618198311595
139.13784845816394
-----
0.03259171494235878
139.17324464014706
-----
0.041646014946754954
139.20583635508942
-----
0.030783977404698775
139.24748237003618
-----
0.030901332516589858
139.27826634744088
-----
0.03616446417908144
139.30916767995745
-----
0.037025037404270135
139.34533214413653
-----
0.03667638218317288
139.3823571815408
-----
0.01868278126561511
139.41903356372399
-----
0.04127139660720678
139.4377163449896
-----
0.041129199155814894
139.47898774159682
-----
0.0412372638280231
139.52011694075264
-----
0.041199090627073616
139.56135420458065
-----
0.038251983091508616
139.6025532952077
-----
0.03684578866049266
139.6408052782992
-----
0.03687996132412284
139.6776510669597
-----
0.03317437211635275
139.71453102828383
-----
0.03661962254425006
139.74770540040018
-----
0.041539

0.037060713192722104
146.13597981535452
-----
0.033012866240943954
146.17304052854723
-----
0.04020574072746742
146.20605339478817
-----
0.01943733997714964
146.24625913551563
-----
0.03556482070197942
146.26569647549277
-----
0.033226947330999825
146.30126129619475
-----
0.030657041088759374
146.33448824352575
-----
0.01884226617582204
146.36514528461453
-----
0.03357289540938202
146.38398755079035
-----
0.03222212411284594
146.41756044619973
-----
0.03904626233409954
146.44978257031258
-----
0.0392251486196342
146.48882883264667
-----
0.03201113641124494
146.5280539812663
-----
0.040545550134948524
146.56006511767757
-----
0.03278091931553788
146.6006106678125
-----
0.03985064146035284
146.63339158712805
-----
0.03541558659284165
146.6732422285884
-----
0.032786389610942886
146.70865781518125
-----
0.031077575401343364
146.7414442047922
-----
0.033490914475891316
146.77252178019356
-----
0.033361867440807186
146.80601269466945
-----
0.02022306528283189
146.83937456211027
-----
0.0402

0.0164909887342532
153.0391958050629
-----
0.03143674112641249
153.05568679379715
-----
0.018392576895397507
153.08712353492356
-----
0.017985924346039753
153.10551611181896
-----
0.017912039441070917
153.123502036165
-----
0.0375077579610937
153.14141407560606
-----
0.02954315688034997
153.17892183356716
-----
0.033312747979403194
153.2084649904475
-----
0.016642723338806613
153.24177773842692
-----
0.029317290405990563
153.25842046176572
-----
0.034712258808228834
153.28773775217172
-----
0.01802424466786016
153.32245001097996
-----
0.03814368774513198
153.34047425564782
-----
0.0313739400947442
153.37861794339295
-----
0.037976365661834086
153.4099918834877
-----
0.030248550001571277
153.44796824914954
-----
0.020550843621760847
153.4782167991511
-----
0.03184923306776449
153.49876764277286
-----
0.031028644978001174
153.53061687584062
-----
0.03775465946767189
153.56164552081862
-----
0.01807942669193891
153.5994001802863
-----
0.038012537391195235
153.61747960697824
-----
0.017757

0.035239417967575876
159.45990090570623
-----
0.03017182837810219
159.4951403236738
-----
0.036049493530076214
159.52531215205192
-----
0.02943246996232153
159.561361645582
-----
0.029118486018148114
159.59079411554433
-----
0.032130386055077016
159.61991260156248
-----
0.027647122825845964
159.65204298761756
-----
0.030366621902120198
159.6796901104434
-----
0.03515371224194279
159.71005673234552
-----
0.03576397472191544
159.74521044458746
-----
0.03645612590014102
159.78097441930936
-----
0.02875186005249435
159.8174305452095
-----
0.03657009304003863
159.846182405262
-----
0.028536716650243297
159.88275249830204
-----
0.0296966975056338
159.9112892149523
-----
0.016800935201721733
159.94098591245793
-----
0.032893738920542995
159.95778684765966
-----
0.017009213276368278
159.9906805865802
-----
0.02827477362949676
160.00768979985656
-----
0.014475679885598402
160.03596457348607
-----
0.015690734128271083
160.05044025337168
-----
0.03609294745243877
160.06613098749995
-----
0.032155

0.02839578699311735
165.6835339278234
-----
0.029451243353147246
165.7119297148165
-----
0.016927951785478894
165.74138095816966
-----
0.02864372523027816
165.75830890995513
-----
0.031586653537519445
165.7869526351854
-----
0.02851508084255807
165.81853928872292
-----
0.01669899959182178
165.84705436956548
-----
0.035702039032001795
165.8637533691573
-----
0.030277505878329537
165.8994554081893
-----
0.017615891001960277
165.92973291406764
-----
0.0367149597900429
165.9473488050696
-----
0.01711405011812104
165.98406376485963
-----
0.027646739806820603
166.00117781497775
-----
0.03554805033446244
166.02882455478456
-----
0.028129233971894096
166.06437260511902
-----
0.027650406051025114
166.09250183909091
-----
0.027651700152914106
166.12015224514195
-----
0.028933838859105512
166.14780394529487
-----
0.03178161272699078
166.17673778415397
-----
0.029088402700614267
166.20851939688097
-----
0.02876365380356001
166.23760779958158
-----
0.0288373281772968
166.26637145338515
-----
0.0308

0.030085523789189182
171.095106109843
-----
0.014101713437106709
171.12519163363217
-----
0.02633181732019468
171.13929334706927
-----
0.03355650357046433
171.16562516438947
-----
0.027710055868670525
171.19918166795992
-----
0.02541633428882793
171.2268917238286
-----
0.027436166466471342
171.25230805811742
-----
0.013289174527602164
171.27974422458388
-----
0.01354176624969714
171.2930333991115
-----
0.02538514771717337
171.30657516536118
-----
0.01275904864891746
171.33196031307835
-----
0.0284722426232526
171.34471936172727
-----
0.013146663035126198
171.37319160435052
-----
0.033461833778831926
171.38633826738564
-----
0.015024234831018805
171.41980010116447
-----
0.033234137692546205
171.4348243359955
-----
0.025104171088534608
171.46805847368805
-----
0.03249501714814704
171.4931626447766
-----
0.025477256290001127
171.52565766192473
-----
0.03321786795420792
171.55113491821473
-----
0.0270151448427428
171.58435278616895
-----
0.025717808984848423
171.6113679310117
-----
0.02751

0.013821051701360098
176.93465943750084
-----
0.028986045548297763
176.9484804892022
-----
0.025863147562133027
176.9774665347505
-----
0.028597232482621176
177.00332968231262
-----
0.03270794058349097
177.03192691479524
-----
0.026194276088135602
177.06463485537873
-----
0.03277156086464167
177.09082913146688
-----
0.026407648841346488
177.1236006923315
-----
0.0258948124873152
177.15000834117285
-----
0.024780579119423937
177.17590315366016
-----
0.026297699582523728
177.2006837327796
-----
0.02870403477368288
177.22698143236212
-----
0.026697788552784034
177.2556854671358
-----
0.028205163406401133
177.2823832556886
-----
0.028376405992866508
177.31058841909498
-----
0.025178669860096564
177.33896482508786
-----
0.03246436420080082
177.36414349494794
-----
0.014206303013645267
177.39660785914873
-----
0.0258353114329095
177.41081416216238
-----
0.026452092870731067
177.43664947359528
-----
0.026481937773819304
177.463101566466
-----
0.02771652366106596
177.48958350423982
-----
0.029

0.013894895125161615
182.60679344440422
-----
0.0159424587877206
182.62068833952938
-----
0.028381102319812557
182.6366307983171
-----
0.028296552513676368
182.6650119006369
-----
0.026742136395848534
182.69330845315056
-----
0.02568013787578534
182.72005058954642
-----
0.02630962445158873
182.7457307274222
-----
0.0158482747012697
182.77204035187378
-----
0.01583771791926645
182.78788862657504
-----
0.015836345773775184
182.8037263444943
-----
0.025393618304216298
182.81956269026807
-----
0.03218799545182082
182.84495630857228
-----
0.025134166070664325
182.87714430402409
-----
0.026657644594892428
182.90227847009476
-----
0.026604845632693122
182.92893611468966
-----
0.028851540244118214
182.95554096032237
-----
0.03251147853074547
182.98439250056649
-----
0.024837501777156826
183.01690397909724
-----
0.02828234518567173
183.0417414808744
-----
0.014995432107300085
183.07002382606007
-----
0.02574766704400094
183.08501925816736
-----
0.03160284163950409
183.11076692521135
-----
0.029

0.025728836815971094
188.1380217464397
-----
0.024623645324409218
188.16375058325568
-----
0.03149553022663566
188.18837422858007
-----
0.015181172771670024
188.2198697588067
-----
0.01517994840880252
188.23505093157837
-----
0.02461233346495326
188.25023087998719
-----
0.031481065274865036
188.27484321345213
-----
0.0275725866845443
188.306324278727
-----
0.03087637358513161
188.33389686541156
-----
0.024983446276577698
188.3647732389967
-----
0.025090250511053732
188.38975668527326
-----
0.030862123704096207
188.4148469357843
-----
0.031423801156374616
188.4457090594884
-----
0.025803571768293992
188.47713286064476
-----
0.014686754346696805
188.50293643241307
-----
0.025527521482861405
188.51762318675975
-----
0.024787476516876305
188.54315070824262
-----
0.014363565742694321
188.5679381847595
-----
0.026382679719399352
188.58230175050218
-----
0.025109731626768682
188.6086844302216
-----
0.03095701270946437
188.63379416184836
-----
0.014866489437570038
188.6647511745578
-----
0.031

0.025636676589675837
193.60281660183458
-----
0.027910798485015785
193.62845327842425
-----
0.031217964282006842
193.65636407690926
-----
0.02486342115375648
193.68758204119126
-----
0.03052659548373458
193.71244546234502
-----
0.023759776555355067
193.74297205782875
-----
0.02323167508707725
193.7667318343841
-----
0.030178127020843236
193.7899635094712
-----
0.029924939077291293
193.82014163649202
-----
0.02400325046242586
193.85006657556931
-----
0.02408821322773038
193.87406982603173
-----
0.030327179191428338
193.89815803925947
-----
0.02418382106292846
193.9284852184509
-----
0.023501039300185447
193.95266903951384
-----
0.024476821347403106
193.97617007881402
-----
0.026352875130464304
194.00064690016143
-----
0.026004812517865284
194.0269997752919
-----
0.023785043769355363
194.05300458780977
-----
0.01280261058574251
194.07678963157912
-----
0.024515014307181154
194.08959224216485
-----
0.026720315671421517
194.11410725647204
-----
0.013737431081453949
194.14082757214345
-----

0.025382661362722797
198.29304983366194
-----
0.022597420866905624
198.31843249502467
-----
0.02830138057119121
198.34102991589157
-----
0.023168482606413625
198.36933129646275
-----
0.028540179167443346
198.39249977906917
-----
0.024035777876297845
198.4210399582366
-----
0.026458550428802727
198.4450757361129
-----
0.02629806464794213
198.4715342865417
-----
0.028928687675682435
198.49783235118963
-----
0.026002286061888664
198.52676103886532
-----
0.028536633307423585
198.5527633249272
-----
0.02858113442825787
198.58129995823464
-----
0.02399187768102137
198.6098810926629
-----
0.012740096074335534
198.63387297034393
-----
0.029022254274168484
198.64661306641827
-----
0.024152558868889975
198.67563532069244
-----
0.02351457465970183
198.6997878795613
-----
0.029300123898445565
198.72330245422103
-----
0.023608435077860083
198.75260257811948
-----
0.009692918923987654
198.77621101319733
-----
0.022366580010367863
198.7859039321213
-----
0.023729676564457205
198.80827051213166
-----


0.02375294131081447
203.5757058226198
-----
0.02849793051003123
203.5994587639306
-----
0.022221286863033696
203.62795669444066
-----
0.024720371174443846
203.6501779813037
-----
0.02494333240406969
203.67489835247815
-----
0.021601461620533106
203.69984168488222
-----
0.02539119715169111
203.72144314650274
-----
0.022737336783199186
203.74683434365443
-----
0.023501572097873624
203.76957168043762
-----
0.0216543767300523
203.7930732525355
-----
0.021815792716901174
203.81472762926555
-----
0.022166076744468473
203.83654342198244
-----
0.02776512346713711
203.8587094987269
-----
0.028734503216152556
203.88647462219404
-----
0.02823262976491309
203.9152091254102
-----
0.02216034415493012
203.94344175517512
-----
0.022547217670797884
203.96560209933006
-----
0.02267159552184248
203.98814931700085
-----
0.028628848847768062
204.0108209125227
-----
0.02249477592982183
204.03944976137046
-----
0.022723849664502218
204.0619445373003
-----
0.0132709628212173
204.0846683869648
-----
0.01327009

0.02222393932827027
208.41263811537073
-----
0.024255738720609175
208.43486205469898
-----
0.022006394123798415
208.45911779341958
-----
0.021236093262354892
208.48112418754337
-----
0.02332005054820814
208.50236028080573
-----
0.012444783774995768
208.52568033135395
-----
0.027875742531394083
208.53812511512896
-----
0.012115236533191581
208.56600085766036
-----
0.02209074118231087
208.57811609419355
-----
0.027381992140331317
208.60020683537587
-----
0.027405216131102535
208.6275888275162
-----
0.022483341885381355
208.65499404364732
-----
0.027358074124752073
208.6774773855327
-----
0.011452454858356295
208.70483545965746
-----
0.02742056592792648
208.71628791451582
-----
0.027175375861535327
208.74370848044376
-----
0.024555143935394232
208.7708838563053
-----
0.027924350529571873
208.7954390002407
-----
0.0249475257250638
208.82336335077025
-----
0.025144075460524048
208.8483108764953
-----
0.022387413454798905
208.87345495195584
-----
0.022066993984806237
208.89584236541063
-----

0.02247655405856452
213.23801950808874
-----
0.02233058099788572
213.2604960621473
-----
0.02733990716346909
213.28282664314517
-----
0.02053615538892304
213.31016655030865
-----
0.02680652333365088
213.33070270569758
-----
0.02169896278885737
213.35750922903122
-----
0.011782293590933753
213.37920819182008
-----
0.026872203224541386
213.390990485411
-----
0.027064101873770986
213.41786268863555
-----
0.022342438264415888
213.44492679050933
-----
0.021714648302440527
213.46726922877374
-----
0.022068825197449488
213.48898387707618
-----
0.0213007824266996
213.51105270227362
-----
0.02433460372816684
213.5323534847003
-----
0.022390294643420645
213.55668808842847
-----
0.025010740106923248
213.5790783830719
-----
0.023512777913961454
213.60408912317882
-----
0.02093256873561155
213.6276019010928
-----
0.026185379212402188
213.6485344698284
-----
0.009639934660151743
213.6747198490408
-----
0.020704828607991832
213.68435978370096
-----
0.021765360309625587
213.70506461230895
-----
0.0222

0.023513522122634148
218.00576847326371
-----
0.022360405371667048
218.02928199538636
-----
0.0274215529866802
218.05164240075803
-----
0.021335891223457065
218.0790639537447
-----
0.026931516980491694
218.10039984496817
-----
0.021072638179843683
218.12733136194865
-----
0.02109011459135051
218.1484040001285
-----
0.012780810610273897
218.16949411471984
-----
0.023895890258380695
218.18227492533012
-----
0.02074746436780968
218.2061708155885
-----
0.022323780010750038
218.22691827995632
-----
0.024367706214831
218.24924205996706
-----
0.027724049055283553
218.2736097661819
-----
0.021702219608457318
218.30133381523717
-----
0.026280302229447543
218.32303603484564
-----
0.021285295617337798
218.3493163370751
-----
0.02599474802130954
218.37060163269243
-----
0.02165007759034974
218.39659638071373
-----
0.021927112611281205
218.41824645830408
-----
0.011964599281915459
218.44017357091536
-----
0.021593447460201594
218.4521381701973
-----
0.02672329225413971
218.47373161765748
-----
0.01

221.8903890305691
-----
0.020094591276781374
221.9017514254846
-----
0.02136626529805007
221.92184601676138
-----
0.02091161358220189
221.94321228205942
-----
0.021037809614492663
221.96412389564162
-----
0.025382860937504265
221.9851617052561
-----
0.025880810260946883
222.0105445661936
-----
0.020017881170957867
222.03642537645456
-----
0.02082803618239569
222.05644325762552
-----
0.02104988953287079
222.0772712938079
-----
0.02077038190936364
222.09832118334077
-----
0.022992239631027558
222.11909156525013
-----
0.02324844885279884
222.14208380488117
-----
0.026250589313516304
222.16533225373396
-----
0.025941389205827486
222.19158284304748
-----
0.020832095970728423
222.2175242322533
-----
0.023335553144403564
222.23835632822403
-----
0.02122536882798414
222.26169188136842
-----
0.019513620440453273
222.2829172501964
-----
0.02017066556868657
222.30243087063684
-----
0.023099890950780022
222.32260153620553
-----
0.011689922545615954
222.3457014271563
-----
0.022867147157074438
222.

0.009486707851026408
226.34926535555778
-----
0.02061901306473502
226.35875206340882
-----
0.020715602230726427
226.37937107647355
-----
0.02103482485461489
226.40008667870427
-----
0.021521418990701573
226.42112150355888
-----
0.02571567061994365
226.44264292254957
-----
0.025545312304948917
226.4683585931695
-----
0.025282814605072926
226.49390390547447
-----
0.02031600428049469
226.51918672007955
-----
0.020600065363907197
226.53950272436003
-----
0.025885565027703363
226.56010278972394
-----
0.02257792274514944
226.58598835475163
-----
0.0196637236777431
226.60856627749678
-----
0.02031971968648737
226.62823000117453
-----
0.02559421548582321
226.648549720861
-----
0.02650234288435665
226.67414393634684
-----
0.025604048172721366
226.7006462792312
-----
0.02276145523989249
226.7262503274039
-----
0.020900035378187746
226.7490117826438
-----
0.025542445940218397
226.769911818022
-----
0.01121697161988226
226.79545426396223
-----
0.022333297911986235
226.80667123558212
-----
0.025307

230.86370898082572
-----
0.01890822150782333
230.888376247573
-----
0.02476657653070306
230.90728446908082
-----
0.020319202460290627
230.93205104561153
-----
0.02208167690768811
230.95237024807182
-----
0.02025465019608977
230.9744519249795
-----
0.01925116277696802
230.99470657517557
-----
0.019981636955541105
231.01395773795255
-----
0.019515215465510077
231.0339393749081
-----
0.019346214618533024
231.0534545903736
-----
0.024845112632513464
231.07280080499214
-----
0.02225486816903111
231.09764591762465
-----
0.02067590532914905
231.11990078579367
-----
0.020318384548082432
231.14057669112282
-----
0.010565698079294744
231.1608950756709
-----
0.020078508451727633
231.1714607737502
-----
0.02208242143651056
231.19153928220192
-----
0.019694635236753293
231.21362170363844
-----
0.024585344400045776
231.2333163388752
-----
0.019177140331670218
231.25790168327524
-----
0.01143189550655158
231.2770788236069
-----
0.01941632931282489
231.28851071911345
-----
0.011111179112065623
231.307

0.0196830586777194
235.28083776614451
-----
0.011174052104093066
235.30052082482223
-----
0.020462348032182137
235.3116948769263
-----
0.020108920906767972
235.33215722495848
-----
0.019825584377889297
235.35226614586526
-----
0.019897709776930006
235.37209173024314
-----
0.021689124509199305
235.39198944002007
-----
0.018536194677378888
235.41367856452928
-----
0.020675806136558738
235.43221475920666
-----
0.021289344865855025
235.4528905653432
-----
0.024783019193485353
235.47417991020907
-----
0.019212888205205784
235.49896292940255
-----
0.02495766149966526
235.51817581760776
-----
0.02160389543359153
235.5431334791074
-----
0.024638686334610253
235.564737374541
-----
0.01921209836143022
235.58937606087562
-----
0.010930976722780272
235.60858815923706
-----
0.024591556386922855
235.61951913595985
-----
0.011267381276245953
235.64411069234677
-----
0.019485716690266497
235.655378073623
-----
0.019988241121297913
235.67486379031328
-----
0.02102947101140597
235.69485203143458
-----
0

0.019439373036548614
239.17187512309025
-----
0.020394623124191935
239.1913144961268
-----
0.021584590217328985
239.211709119251
-----
0.011817078583204427
239.2332937094683
-----
0.022730261651884503
239.2451107880515
-----
0.024669617427738738
239.26784104970338
-----
0.019542922717631837
239.29251066713113
-----
0.019070863840729226
239.31205358984874
-----
0.021790650426917725
239.33112445368948
-----
0.019801353909924022
239.3529151041164
-----
0.022274793765506007
239.3727164580263
-----
0.020386672622493423
239.39499125179182
-----
0.025032297038562586
239.41537792441432
-----
0.01950783925407699
239.44041022145288
-----
0.019614639332064896
239.45991806070697
-----
0.023888435461307388
239.47953270003904
-----
0.01133820395801821
239.50342113550036
-----
0.01963403437985481
239.51475933945838
-----
0.023895639961676162
239.53439337383824
-----
0.02023495079908094
239.5582890137999
-----
0.020734335426554572
239.578523964599
-----
0.020219124007280032
239.59925830002555
-----
0.

0.023883786947022884
243.46640120654402
-----
0.01929468383811351
243.49028499349103
-----
0.018898728506379473
243.50957967732916
-----
0.023996750850721987
243.52847840583553
-----
0.011071250745967853
243.55247515668626
-----
0.021400624265876238
243.56354640743223
-----
0.024095444319283472
243.5849470316981
-----
0.019624701788353807
243.60904247601738
-----
0.011699424058652277
243.62866717780574
-----
0.018839622309339474
243.6403666018644
-----
0.02127597205070859
243.65920622417374
-----
0.02132812291706188
243.68048219622446
-----
0.019644147227787127
243.70181031914152
-----
0.01988282322078179
243.7214544663693
-----
0.02405302774102957
243.74133728959006
-----
0.011586119087318724
243.76539031733108
-----
0.019638090157525965
243.7769764364184
-----
0.0241547350727158
243.79661452657592
-----
0.024152342121417186
243.82076926164865
-----
0.02380711911910783
243.84492160377008
-----
0.019246762090623534
243.8687287228892
-----
0.019231538698819207
243.88797548497982
-----
0

0.019315420040361672
247.58129182733754
-----
0.019534830752314422
247.6006072473779
-----
0.023054694846389955
247.62014207813021
-----
0.018904713719642453
247.6431967729766
-----
0.019546815367337638
247.66210148669626
-----
0.02347485555529565
247.6816483020636
-----
0.01926476826077263
247.70512315761889
-----
0.019387396825475606
247.72438792587965
-----
0.01866743878376925
247.74377532270512
-----
0.01067327477060298
247.7624427614889
-----
0.019125382884344847
247.7731160362595
-----
0.018860142354769724
247.79224141914383
-----
0.018822629974279685
247.81110156149862
-----
0.010093904187365648
247.82992419147288
-----
0.023157158618300012
247.84001809566024
-----
0.022148600364560605
247.86317525427853
-----
0.020896350107223377
247.8853238546431
-----
0.023537093026095867
247.90622020475033
-----
0.01900736173144287
247.92975729777643
-----
0.019251705393074636
247.94876465950787
-----
0.018137976347498114
247.96801636490096
-----
0.023268910517315087
247.98615434124847
-----

251.55483626677926
-----
0.018537098283984577
251.57401586439823
-----
0.023652451786994468
251.59255296268222
-----
0.01853399009055588
251.61620541446922
-----
0.01918285405674411
251.63473940455978
-----
0.019181391803001113
251.65392225861652
-----
0.023644881565026046
251.6731036504195
-----
0.023594829250737005
251.69674853198453
-----
0.02360063800442363
251.72034336123525
-----
0.018537422672601556
251.74394399923966
-----
0.023596161309891395
251.76248142191227
-----
0.010674705361630862
251.78607758322215
-----
0.018892043094120684
251.79675228858378
-----
0.018165452533563536
251.8156443316779
-----
0.018885372408439283
251.83380978421147
-----
0.02277395214550845
251.85269515661992
-----
0.018342372124555528
251.87546910876543
-----
0.02025256142538263
251.89381148088998
-----
0.019566644840921996
251.91406404231537
-----
0.02388144387281966
251.9336306871563
-----
0.018806197118860148
251.95751213102912
-----
0.0191715976367202
251.97631832814798
-----
0.018985371703001307

0.018271866913513908
255.24701484545113
-----
0.018596605554797686
255.26528671236466
-----
0.010830325229961643
255.28388331791945
-----
0.010442127361392271
255.2947136431494
-----
0.01808645269835551
255.3051557705108
-----
0.00928641054281187
255.32324222320915
-----
0.009348166742021949
255.33252863375196
-----
0.022196037018951537
255.341876800494
-----
0.01769415458731148
255.36407283751294
-----
0.01795949486940536
255.38176699210024
-----
0.009361487646993431
255.39972648696966
-----
0.018251781304455237
255.40908797461665
-----
0.018962237849113515
255.4273397559211
-----
0.022465243284793303
255.44630199377022
-----
0.022173505471050174
255.468767237055
-----
0.02204298966857673
255.49094074252605
-----
0.02241964393697212
255.5129837321946
-----
0.02059868272228925
255.5354033761316
-----
0.018191723575908005
255.5560020588539
-----
0.01859548526529482
255.5741937824298
-----
0.01841248096105899
255.5927892676951
-----
0.009469802413375267
255.61120174865616
-----
0.0220355

0.022473715339951517
259.0859909351998
-----
0.017479102367292644
259.10846465053976
-----
0.02199615417299281
259.12594375290706
-----
0.018129054909406722
259.14793990708006
-----
0.01776137446326957
259.1660689619895
-----
0.0185669885606402
259.18383033645273
-----
0.022164977197404154
259.20239732501335
-----
0.01787967793012959
259.22456230221076
-----
0.017233062702637594
259.2424419801409
-----
0.016871422638598203
259.2596750428435
-----
0.017699427284591444
259.2765464654821
-----
0.010913399325535672
259.2942458927667
-----
0.018272984957091207
259.30515929209224
-----
0.018195699715156274
259.32343227704933
-----
0.020116563855191148
259.34162797676447
-----
0.0205235683930926
259.36174454061967
-----
0.02038292711412297
259.3822681090128
-----
0.018066317836887183
259.4026510361269
-----
0.022128219657827002
259.42071735396377
-----
0.018407069013636278
259.4428455736216
-----
0.01771519132483765
259.4612526426352
-----
0.01977116461539785
259.47896783396004
-----
0.017620

0.019435978130559993
263.02521077126215
-----
0.017499964356465565
263.0446467493927
-----
0.009895543815553576
263.0621467137492
-----
0.02191253190883347
263.07204225756476
-----
0.017175019405597805
263.09395478947357
-----
0.01685960309274125
263.1111298088792
-----
0.017002491762846586
263.12798941197195
-----
0.022077591079749655
263.1449919037348
-----
0.022221875398009606
263.16706949481454
-----
0.022364965227051076
263.1892913702126
-----
0.010763279523982388
263.2116563354396
-----
0.022863965177654044
263.2224196149636
-----
0.022470965504926322
263.24528358014123
-----
0.02190397525510839
263.26775454564614
-----
0.021271867047251303
263.28965852090124
-----
0.020623644581737648
263.3109303879485
-----
0.0177465532625625
263.3315540325302
-----
0.00916704063382498
263.3493005857928
-----
0.016857092011118586
263.3584676264266
-----
0.009969031922262594
263.3753247184377
-----
0.019212852173465345
263.38529375035995
-----
0.019372905693637747
263.4045066025334
-----
0.01933

0.019415203724263346
266.9153478109556
-----
0.02226281201441066
266.93476301467985
-----
0.022343501169618953
266.95702582669423
-----
0.022224161494806785
266.9793693278639
-----
0.01954087000933196
267.0015934893587
-----
0.010577808608923633
267.02113435936803
-----
0.01715411134437027
267.03171216797693
-----
0.017974865409832577
267.0488662793213
-----
0.02201922073568796
267.06684114473114
-----
0.017206582939826087
267.0888603654668
-----
0.021950713401882625
267.10606694840664
-----
0.017218681112618185
267.12801766180854
-----
0.018204384432444033
267.1452363429212
-----
0.010484268186455275
267.16344072735365
-----
0.009897153251566431
267.1739249955401
-----
0.01088374289072076
267.1838221487917
-----
0.02159323935387915
267.1947058916824
-----
0.017734035185836144
267.2162991310363
-----
0.01800217408391808
267.2340331662221
-----
0.02168551449125953
267.252035340306
-----
0.01747131593657203
267.27372085479726
-----
0.01763162939126628
267.29119217073384
-----
0.017996638

0.01717900469292554
270.6396173311454
-----
0.01738134474674456
270.65679633583835
-----
0.021406455630700736
270.6741776805851
-----
0.010050101950382553
270.6955841362158
-----
0.01763831840060347
270.70563423816617
-----
0.017951683961866402
270.72327255656677
-----
0.017159421850066418
270.74122424052865
-----
0.017748253363200364
270.7583836623787
-----
0.018107201747350317
270.7761319157419
-----
0.019063352337816523
270.79423911748927
-----
0.010296875525170406
270.8133024698271
-----
0.02191120238583066
270.82359934535225
-----
0.021857767901386063
270.84551054773806
-----
0.017008065462289185
270.86736831563945
-----
0.019380421538054606
270.88437638110173
-----
0.009796521912536415
270.9037568026398
-----
0.01747100467178368
270.91355332455237
-----
0.009699505450248213
270.93102432922416
-----
0.020768078822141038
270.94072383467443
-----
0.01618804237050881
270.96149191349656
-----
0.021060918736418775
270.9776799558671
-----
0.010588127274107103
270.99874087460347
-----
0.

0.020452074514001436
274.0130854922193
-----
0.009178080391453129
274.03353756673334
-----
0.017300104861442112
274.0427156471248
-----
0.017853809199116822
274.0600157519862
-----
0.016583731194401607
274.0778695611853
-----
0.021175802799592976
274.0944532923797
-----
0.016487026985361412
274.11562909517926
-----
0.01652423473317398
274.13211612216463
-----
0.009804978802611546
274.14864035689783
-----
0.021295123111256354
274.15844533570043
-----
0.016053863070904034
274.1797404588117
-----
0.0171798021100926
274.1957943218826
-----
0.01922392874885915
274.2129741239927
-----
0.009976370101095741
274.2321980527416
-----
0.01880705340228099
274.24217442284265
-----
0.016092745373230993
274.2609814762449
-----
0.009183729014631426
274.27707422161814
-----
0.018827941849902698
274.2862579506328
-----
0.020577340791784596
274.3050858924827
-----
0.01623733727535764
274.32566323327444
-----
0.009907188466146475
274.3419005705498
-----
0.019311004505388217
274.35180775901597
-----
0.01901

277.56641924542345
-----
0.018581104288775525
277.5871423183983
-----
0.01661708976188812
277.6057234226871
-----
0.01896046307806689
277.622340512449
-----
0.01661550109363942
277.6413009755271
-----
0.009753768805717938
277.65791647662076
-----
0.00931943102544231
277.66767024542645
-----
0.020397944776182352
277.6769896764519
-----
0.00903394881759254
277.6973876212281
-----
0.021217355587169483
277.7064215700457
-----
0.016495977011733255
277.72763892563285
-----
0.017401986541608263
277.7441349026446
-----
0.016709818721765222
277.7615368891862
-----
0.021365227744404004
277.77824670790795
-----
0.01046115305196229
277.79961193565236
-----
0.021246869050294638
277.8100730887043
-----
0.009787900372892767
277.8313199577546
-----
0.010010069031492071
277.8411078581275
-----
0.010013736200728457
277.85111792715895
-----
0.016623825555018082
277.8611316633597
-----
0.010124473199722903
277.8777554889147
-----
0.010013517911940243
277.8878799621144
-----
0.017400892134538482
277.897893

0.016660373832955946
281.2354229809629
-----
0.008868593843568535
281.25208335479584
-----
0.01595698681125192
281.2609519486394
-----
0.02090488223173995
281.27690893545065
-----
0.016310414876627207
281.29781381768237
-----
0.018720572932625173
281.314124232559
-----
0.01643070198976947
281.3328448054916
-----
0.020907689816670336
281.34927550748137
-----
0.018381710844081512
281.37018319729805
-----
0.017082286955251606
281.38856490814214
-----
0.020606834474660884
281.4056471950974
-----
0.010140967193907386
281.4262540295721
-----
0.02108554468761591
281.436394996766
-----
0.010597272074907339
281.4574805414536
-----
0.018451304354496708
281.4680778135285
-----
0.017038454095471035
281.486529117883
-----
0.01016032520626119
281.50356757197846
-----
0.010104167350844167
281.5137278971847
-----
0.02088094853454616
281.52383206453555
-----
0.02077095406181946
281.5447130130701
-----
0.016988025706409324
281.56548396713197
-----
0.02045562616024812
281.58247199283835
-----
0.020720410

0.01681904594654019
284.7000864761568
-----
0.009498332250565433
284.71690552210333
-----
0.02054217372636776
284.7264038543539
-----
0.020500175821470103
284.7469460280803
-----
0.01849068480876259
284.7674462039018
-----
0.01697218501940925
284.7859368887105
-----
0.016852477205916244
284.80290907372995
-----
0.009237472357940566
284.8197615509359
-----
0.017692671511673592
284.8289990232938
-----
0.017904987910651572
284.8466916948055
-----
0.018714418590324383
284.8645966827161
-----
0.01691250628908337
284.88331110130645
-----
0.020597711000421338
284.90022360759554
-----
0.016653432330294194
284.920821318596
-----
0.016166422649256287
284.9374747509263
-----
0.01605305578202879
284.95364117357553
-----
0.009515532833782956
284.9696942293576
-----
0.018218338805714416
284.9792097621914
-----
0.01692232629261867
284.99742810099707
-----
0.02063455628529247
285.0143504272897
-----
0.020661503944640724
285.034984983575
-----
0.020700770630712868
285.05564648751965
-----
0.01827937881

Episode 9 completed, timesteps played: 8849, return: -20, speed 519.590914, epsilon 0.911510
Mean return of last 100 games: -19.666667
Pytorch memory usage: 3.651093 (gb)
Size of Replay Memory: 8850
0.020221302187740677
287.78281242221135
-----
0.02013420856564122
287.8030337243991
-----
0.020063014595064328
287.82316793296474
-----
0.016676477153220265
287.8432309475598
-----
0.009855831193191118
287.859907424713
-----
0.016609691009418732
287.8697632559062
-----
0.016427399073117487
287.8863729469156
-----
0.015454823305536323
287.90280034598874
-----
0.01805526252623429
287.9182551692943
-----
0.016773302917264202
287.9363104318205
-----
0.01632577157306285
287.9530837347378
-----
0.01792142813183943
287.96940950631085
-----
0.015963684036941306
287.9873309344427
-----
0.015562120145310014
288.00329461847963
-----
0.019256375654305744
288.01885673862495
-----
0.015933345036365055
288.03811311427927
-----
0.019508005447937498
288.05404645931566
-----
0.018180769902995168
288.07355446

0.019941445250266417
291.2054476398525
-----
0.019769151565599226
291.22538908510273
-----
0.01945636773105864
291.2451582366683
-----
0.016405962832091395
291.26461460439936
-----
0.016005355545169925
291.28102056723145
-----
0.01996653294923205
291.29702592277664
-----
0.016257074129269324
291.3169924557259
-----
0.015595728181281738
291.33324952985515
-----
0.016370959834853316
291.3488452580364
-----
0.01663898865208743
291.36521621787125
-----
0.01559219672939213
291.3818552065233
-----
0.016455587534910502
291.3974474032527
-----
0.009398037254190405
291.4139029907876
-----
0.017771430453475547
291.4233010280418
-----
0.019838096156870957
291.4410724584953
-----
0.02004444840311639
291.4609105546522
-----
0.0163305876905913
291.48095500305527
-----
0.019966749566017936
291.4972855907459
-----
0.01610578821103446
291.5172523403119
-----
0.016170911525242963
291.53335812852293
-----
0.01612839007072637
291.5495290400482
-----
0.017389736050513308
291.5656574301189
-----
0.019477799

0.015206875047539615
294.6603974116452
-----
0.00869231495346859
294.67560428669276
-----
0.015860210477061834
294.6842966016462
-----
0.019643011861611784
294.7001568121233
-----
0.016366463442634732
294.7197998239849
-----
0.01988917290344042
294.7361662874275
-----
0.016347218380711705
294.75605546033097
-----
0.01772598960003273
294.77240267871167
-----
0.01764332563108988
294.7901286683117
-----
0.01773951081652416
294.80777199394277
-----
0.020102049855269744
294.8255115047593
-----
0.01628432289038937
294.84561355461454
-----
0.016283423556925445
294.86189787750493
-----
0.015680540714669164
294.87818130106183
-----
0.01765860349976794
294.89386184177647
-----
0.01607418875477675
294.9115204452762
-----
0.016226834906217995
294.927594634031
-----
0.016278900673442235
294.9438214689372
-----
0.019912456573921714
294.96010036961064
-----
0.016281603113252597
294.9800128261846
-----
0.009141984971502506
294.99629442929785
-----
0.015738714561396062
295.00543641426935
-----
0.015768

0.01967286589337334
298.1462973572481
-----
0.017297052387073117
298.16597022314147
-----
0.016114286336790913
298.1832672755285
-----
0.01974636831920283
298.1993815618653
-----
0.01978524141036392
298.2191279301845
-----
0.019820809669923126
298.2389131715949
-----
0.019821918269602053
298.2587339812648
-----
0.016198927481699658
298.2785558995344
-----
0.01974005484707167
298.2947548270161
-----
0.00922848889396668
298.3144948818632
-----
0.019815174828695385
298.32372337075714
-----
0.015455529433221604
298.34353854558583
-----
0.01973580464411451
298.35899407501904
-----
0.01606780054837544
298.3787298796632
-----
0.01588452618045785
298.3947976802116
-----
0.016101767461772803
298.41068220639204
-----
0.017167712550655904
298.4267839738538
-----
0.009371723377868436
298.44395168640443
-----
0.015677591912257142
298.4533234097823
-----
0.017740205299087913
298.46900100169455
-----
0.020129412413629626
298.48674120699366
-----
0.016081203090143596
298.50687061940727
-----
0.0160837

Episode 10 completed, timesteps played: 9689, return: -20, speed 525.518825, epsilon 0.903110
Mean return of last 100 games: -19.700000
Pytorch memory usage: 3.651093 (gb)
Size of Replay Memory: 9690
0.019298095012067838
301.5501380207927
-----
0.016014477445882993
301.56943611580476
-----
0.015008909213400156
301.58545059325064
-----
0.015998029739290594
301.60045950246405
-----
0.01688048506887916
301.61645753220336
-----
0.015616268408530679
301.63333801727225
-----
0.014912697672971292
301.64895428568076
-----
0.017263336964873702
301.6638669833537
-----
0.01933613566329562
301.68113032031863
-----
0.009094733617394211
301.70046645598194
-----
0.016184284611221362
301.70956118959936
-----
0.014954688837730172
301.72574547421056
-----
0.016618125373057468
301.7407001630483
-----
0.008114991644540635
301.75731828842135
-----
0.018988308771535588
301.7654332800659
-----
0.015718029744588094
301.78442158883746
-----
0.014769289111204422
301.800139618582
-----
0.008608312106129381
301.8

0.019611145813099935
304.68761757038357
-----
0.017318733392604337
304.70722871619665
-----
0.018649110828908716
304.72454744958924
-----
0.007064282495035435
304.74319656041814
-----
0.017642031625672554
304.7502608429132
-----
0.015268337512462087
304.76790287453883
-----
0.015175749811380526
304.7831712120513
-----
0.014590264536421699
304.79834696186265
-----
0.008493946593751788
304.8129372263991
-----
0.016680471547527876
304.82143117299285
-----
0.014551894027302602
304.8381116445404
-----
0.014363707504704046
304.8526635385677
-----
0.015297803325916631
304.8670272460724
-----
0.018677748648110185
304.8823250493983
-----
0.01487776862350931
304.90100279804636
-----
0.008560990358676706
304.9158805666699
-----
0.014253687884864433
304.92444155702856
-----
0.015003557680151259
304.93869524491345
-----
0.015553936667638452
304.9536988025936
-----
0.016040257071138698
304.9692527392612
-----
0.019082976867369466
304.98529299633236
-----
0.00842922480497119
305.0043759731997
-----
0

308.2471437112824
-----
0.006932710466631178
308.2572374399821
-----
0.00933773768634498
308.2641701504487
-----
0.008218541910108198
308.27350788813504
-----
0.010268760352346687
308.2817264300451
-----
0.008579695637939486
308.2919951903975
-----
0.007960403249597518
308.3005748860354
-----
0.01047562988881298
308.308535289285
-----
0.011012772611095819
308.31901091917376
-----
0.009837330503759269
308.33002369178485
-----
0.011799966598102896
308.33986102228863
-----
0.010631318949905016
308.35166098888675
-----
0.011118473608190398
308.36229230783664
-----
0.009836796526914621
308.3734107814448
-----
0.011642811487987088
308.38324757797176
-----
0.012012215555842096
308.39489038945976
-----
0.010516027698056013
308.4069026050156
-----
0.010876795301604536
308.41741863271363
-----
0.011883195432829771
308.42829542801525
-----
0.012290314115931907
308.44017862344805
-----
0.010848593124650033
308.452468937564
-----
0.01227239382864495
308.46331753068864
-----
0.012259905098581613
308

0.013639917085461485
311.5223287052791
-----
0.013563899099440384
311.5359686223646
-----
0.01284718092068403
311.549532521464
-----
0.01291320951733632
311.5623797023847
-----
0.012803338672995187
311.57529291190207
-----
0.012889878559836932
311.58809625057506
-----
0.012724314955166391
311.6009861291349
-----
0.0124587159136783
311.6137104440901
-----
0.012080268798772569
311.62616916000377
-----
0.012201646486132176
311.63824942880257
-----
0.01280395367385307
311.6504510752887
-----
0.012269524517201839
311.66325502896257
-----
0.011358050330281413
311.67552455347976
-----
0.013047230930991445
311.68688260381003
-----
0.011929003641215966
311.699929834741
-----
0.012763565179633273
311.7118588383822
-----
0.011630135628902132
311.72462240356185
-----
0.012064354619191631
311.73625253919073
-----
0.012082648647174447
311.74831689380994
-----
0.011455291576527407
311.7603995424571
-----
0.013053477478868019
311.7718548340336
-----
0.011605379160436935
311.7849083115125
-----
0.01307

0.017370810929351232
314.7841357999086
-----
0.017544726681677125
314.801506610838
-----
0.017543748921808315
314.8190513375196
-----
0.01993931514146316
314.83659508644143
-----
0.017394028475431856
314.8565344015829
-----
0.017513306918053265
314.87392843005836
-----
0.017058519547154496
314.8914417369764
-----
0.0175460896985385
314.90850025652355
-----
0.019567619863978078
314.9260463462221
-----
0.018538627026009488
314.9456139660861
-----
0.0209395523348545
314.96415259311215
-----
0.018746390879773447
314.985092145447
-----
0.020944622813618096
315.0038385363268
-----
0.018610234593686145
315.0247831591404
-----
0.017960361976202772
315.04339339373405
-----
0.01885250748088575
315.06135375571023
-----
0.019265724803615002
315.0802062631911
-----
0.021219771301224915
315.0994719879947
-----
0.019059714171580363
315.12069175929594
-----
0.019613012128866073
315.1397514734675
-----
0.02119246656485339
315.15936448559637
-----
0.01862482685448634
315.1805569521612
-----
0.0191107521

0.019581440759579277
317.9682928145299
-----
0.018742976704534644
317.98787425528946
-----
0.019801804558419938
318.00661723199397
-----
0.01889741479567519
318.0264190365524
-----
0.01916518264015906
318.04531645134807
-----
0.02041099704355616
318.06448163398824
-----
0.01969948766485359
318.0848926310318
-----
0.020038228667453216
318.10459211869664
-----
0.01894097827272969
318.1246303473641
-----
0.019050492234569533
318.1435713256368
-----
0.019599241746430095
318.16262181787135
-----
0.018955036586618
318.1822210596178
-----
0.019303730952034002
318.2011760962044
-----
0.01925937982443625
318.22047982715645
-----
0.019056102717183575
318.2397392069809
-----
0.019430789628656006
318.2587953096981
-----
0.01968523424145667
318.27822609932673
-----
0.019383605936480527
318.2979113335682
-----
0.01889555170000818
318.3172949395047
-----
0.01966020924154038
318.33619049120466
-----
0.01959935994392115
318.3558507004462
-----
0.01966044247709143
318.3754500603901
-----
0.0194585780601

IndexError: deque index out of range

In [141]:
[l.value for l in dqn.replay_memory.sumTree.leaf_nodes]

[0,
 0.31808150778174005,
 7.831204289189859,
 0.6893852388674514,
 0.2859036435119703,
 0.5610784079869724,
 0.4836863297714358,
 0.5722539581112602,
 0.44370504036477665,
 0.5311151552026265,
 0.49874618346113536,
 0.42130060043273265,
 0.3355760084232688,
 0.41233072361424034,
 0.34123425157959264,
 0.4122208014159609,
 0.3143489086045308,
 0.30906830208946995,
 0.3022898364154292,
 0.3070163396510295,
 0.3030469150636286,
 0.2888295489006427,
 0.16812095810086708,
 0.3546644077443775,
 0.34569800843088977,
 0.28327161253077904,
 0.3007194498365242,
 0.2953991371960584,
 0.2698004606917925,
 0.14248948422278238,
 0.14502244385952365,
 0.14250957260767946,
 0.25615873442126735,
 0.23089691103722318,
 0.2625206517382457,
 0.23687484950004534,
 0.2390872541672533,
 0.23479198999300052,
 0.22170047829308398,
 0.12011414107959087,
 0.2486975737916927,
 0.22944785935990997,
 0.2277681122991334,
 0.250922444471039,
 0.25180481308226754,
 0.24613967631019426,
 0.23753091566360351,
 0.193178

In [198]:
x = []
for i in range(10000000):
    x.append(dqn.replay_memory.sumTree.draw_idx())
sum([1 for i in x if i == 2]) / sum([1 for j in x if j == 1])
#26, passt! (schau drueber was richtig)

KeyboardInterrupt: 

In [196]:
len([l.value for l in dqn.replay_memory.sumTree.leaf_nodes])

100000

In [127]:
dqn.replay_memory.sumTree.top_node.value

tensor([inf], device='cuda:0')

In [130]:
dqn.replay_memory.sumTree.top_node.value.item()

inf

In [111]:
q_vals = torch.tensor([[ 9.4305,  5.4226,  8.4617,  6.7611,  8.7324, 17.9572]])
action_ = torch.tensor([env.action_space.sample()])
q_vals.gather(1, action_.unsqueeze(-1)).squeeze(-1)

tensor([6.7611])

In [108]:
action

tensor([5])

In [109]:
action_

tensor(5)