In [1]:
import gym
import cv2
import numpy as np

In [2]:
env = gym.make('SpaceInvadersDeterministic-v4')

In [3]:
type(env)

gym.wrappers.time_limit.TimeLimit

# Gym

## Space

In [4]:
env.observation_space

Box(210, 160, 3)

In [5]:
env.action_space.n

6

In [6]:
[env.action_space.sample() for _ in range(10)]

[4, 5, 0, 3, 3, 3, 1, 3, 5, 2]

## Game Environment Setup

In [7]:
env.reset()
env.render()

True

## Step

In [8]:
next_state, reward, done, info = env.step(action=env.action_space.sample())
env.render()

True

In [9]:
def downsample(
    frame: np.ndarray,
    x: int=8,
    y: int=14,
    image_size: tuple=(84, 84)
) -> np.ndarray:
    """
    Down-sample the given frame from RGB to B&W with a reduced size.

    Args:
        frame: the frame to down-sample
        x: the number of x pixels to crop
        y: the number of y pixels to crop

    Returns:
        a down-sample B&W frame

    """
    # convert the frame from RGB to gray scale
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    # crop the image
    frame = frame[2*y:frame.shape[0] - y, x:frame.shape[1] - x]

    # zero out specific colors
    # 142 is the generic gray color
    frame[frame == 142] = 0

    # resize the frame to the expected shape
    frame = cv2.resize(frame, image_size)

    return frame

In [10]:
down = downsample(next_state)[:, :, np.newaxis]
down = np.repeat(down, 4, axis=2).astype('uint8')

In [11]:
down.shape

(84, 84, 4)

In [12]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input
from keras.optimizers import RMSprop
from keras.layers import Lambda


def build_deep_mind_model(
    image_size: tuple=(84, 84),
    num_frames: int=4,
    num_actions: int=6,
    loss=tf.losses.huber_loss,
    optimizer=RMSprop(lr=0.00025, rho=0.95, epsilon=0.01)
) -> Model:
    """
    Build and return the Deep Mind model for the given domain parameters.

    Notes:
        Color Space: this CNN expects single channel images (B&W)

    Args:
        input_shape: the shape of the image states for the model
                     Atari games are (192, 160), but DeepMind reduced the
                     size to (84, 84) to reduce computational load
        num_frames: the number of frames being stacked together
                    DeepMind uses 4 frames in their original implementation
        num_actions: the output shape for the model, this represents the
                     number of discrete actions available to a game
        loss: the loss metric to use at the end of the network
        optimizer: the optimizer for reducing error from batches

    Returns:
        a blank DeepMind CNN for image classification in a reinforcement agent

    """
    # build the CNN using the functional API
    cnn_input = Input((*image_size, num_frames), name='cnn')
    cnn = Lambda(lambda x: x / 255.0)(cnn_input)
    cnn = Lambda(lambda x: x * 255.0)(cnn)

    # build the model
    model = Model(input=cnn_input, output=cnn)
    # compile the model with the default loss and optimization technique
    model.compile(loss=loss, optimizer=optimizer)

    return model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [13]:
model = build_deep_mind_model()



In [14]:
pred = model.predict(down[np.newaxis, :, :, :])

In [15]:
np.array_equal(down, pred)

False

In [16]:
(down - pred).sum()

0.0