In [53]:
"""A queue for storing previous experiences to sample from."""
import numpy as np


class ReplayQueue(object):
    """A queue for storing previous experiences to sample from."""

    def __init__(self,
        size: int=250000,
        image_size: tuple=(84, 84),
        agent_history_length: int=4
    ) -> None:
        """
        Initialize a new replay buffer with a given size.

        Args:
            size: the maximum number of experiences to store
            image_size: the size of the images to store
            agent_history_length

        Returns:
            None

        """
        # verify size
        if not isinstance(size, int):
            raise TypeError('size must be of type int')
        if size < 1:
            raise ValueError('size must be at least 1')
        # assign size to self
        self._size = size
        self._buffer_size = agent_history_length + size
        self.agent_history_length = agent_history_length
        # setup the queues
        self.s = np.zeros((*image_size, self._buffer_size)).astype('uint8')
        self.a = np.zeros(self._buffer_size).astype('uint8')
        self.r = np.zeros(self._buffer_size).astype('int8')
        self.d = np.zeros(self._buffer_size).astype(bool)
        # setup variables for the index and top
        self.index = 0
        self.top = 0

    def __repr__(self) -> str:
        """Return an executable string representation of self."""
        return '{}(size={})'.format(self.__class__.__name__, self.size)

    def __len__(self) -> int:
        """Return the number of items in the queue."""
        return self.top

    @property
    def size(self) -> int:
        """Return the size of the queue."""
        return self._size

    @property
    def num_bytes(self) -> int:
        """Return the number of byte this object consumes."""
        from sys import getsizeof
        s = getsizeof(self.s)
        a = getsizeof(self.a)
        r = getsizeof(self.r)
        d = getsizeof(self.d)

        return s + a + r + d

    def push(self, s, a, r, d) -> None:
        """
        Push a new experience onto the queue.

        Args:
            s: the current state
            a: the action to get from state to next state
            r: the reward as a result of the action
            d: a flag indicating if the episode (game) has ended

        Returns:
            None

        """
        # ensure types are the smallest possible before storing in the queue
        s = s.astype('uint8')
        a = int(a)
        r = int(r)
        # push the variables onto the queue
        self.s[:, :, self.index] = s.astype('uint8')
        self.a[self.index] = int(a)
        self.r[self.index] = int(r)
        self.d[self.index] = d
        # increment the index
        if self.index == self.size - 1:
            self.index = 0
        else:
            self.index += 1
        # increment the top pointer
        if self.top < self.size:
            self.top += 1

    def current(self) -> tuple:
        """Pop an item off the queue and return it."""
        s = self.s[:, :, self.index - self.agent_history_length:self.index]
        a = self.a[self.index - self.agent_history_length:self.index]
        r = self.r[self.index - self.agent_history_length:self.index]
        d = self.d[self.index - self.agent_history_length:self.index]
        s2 = self.s[:, :, self.index - self.agent_history_length + 1:self.index + 1]
        return s, a, r, d, s2

    def sample(self, size: int=32):
        """
        Return a random sample of items from the queue.

        Args:
            size: the number of items to sample and return

        Returns:
            A random sample from the queue sampled uniformly

        """
        # generate and index of items to sample
        index = np.random.randint(0, len(self), size)
        # extract the items for this batch
        s = self.s[index - self.agent_history_length:index]
        a = self.a[index - self.agent_history_length:index]
        r = self.r[index - self.agent_history_length:index]
        d = self.d[index - self.agent_history_length:index]
        s2 = self.s2[index - self.agent_history_length + 1:index + 1]

        return s, a, r, d, s2


In [54]:
def random_state() -> tuple:
    """Return an arbitrary randomized state"""
    s = np.random.randint(0, 256, (84, 84))
    a = np.random.randint(6)
    r = np.random.randint(2) - 1
    d = bool(np.random.randint(1))
    return s, a, r, d

In [55]:
q = ReplayQueue(1000)

for _ in range(4):
    q.push(*random_state())

In [56]:
for item in q.current():
    print(item.shape)

(84, 84, 4)
(4,)
(4,)
(4,)
(84, 84, 4)


In [57]:
s, a, r, d, s2 = q.current()

# Sample

In [59]:
q = ReplayQueue(1000)

for _ in range(100):
    q.push(*random_state())

In [60]:
q.sample()

TypeError: only integer scalar arrays can be converted to a scalar index