In [1]:
import warnings
warnings.filterwarnings('ignore')
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT #Importing simple controls

In [2]:
#Creating the environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')

In [3]:
#Random action
acts = env.get_action_meanings()
print(acts)

#Input space shape
shp = env.observation_space.shape
print(shp)


['NOOP']
(240, 256, 3)


### Preparing wrappers for preprocessing the environment

In [4]:
#Necessary imports
import numpy as np
import gym
import collections 

In [5]:
#Gym's wrappers to transform the input
#Taken from: https://console.paperspace.com/ml-showcase/notebook/rcrd0w769nip72j?file=mario_notebook.ipynb

class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env=None, skip=4):
        """Return only every `skip`-th frame"""
        super(MaxAndSkipEnv, self).__init__(env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = collections.deque(maxlen=2)
        self._skip = skip

    def step(self, action):
        total_reward = 0.0
        done = None
        for _ in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            self._obs_buffer.append(obs)
            total_reward += reward
            if done:
                break
        max_frame = np.max(np.stack(self._obs_buffer), axis=0)
        return max_frame, total_reward, done, info

    def reset(self):
        """Clear past frame buffer and init to first obs"""
        self._obs_buffer.clear()
        obs = self.env.reset()
        self._obs_buffer.append(obs)
        return obs


class ImageToPyTorch(gym.ObservationWrapper):
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(old_shape[-1], old_shape[0], old_shape[1]),
                                                dtype=np.float32)

    def observation(self, observation):
        return np.moveaxis(observation, 2, 0)


class ScaledFloatFrame(gym.ObservationWrapper):
    """Normalize pixel values in frame --> 0 to 1"""
    def observation(self, obs):
        return np.array(obs).astype(np.float32) / 255.0


class BufferWrapper(gym.ObservationWrapper):
    def __init__(self, env, n_steps, dtype=np.float32):
        super(BufferWrapper, self).__init__(env)
        self.dtype = dtype
        old_space = env.observation_space
        self.observation_space = gym.spaces.Box(old_space.low.repeat(n_steps, axis=0),
                                                old_space.high.repeat(n_steps, axis=0), dtype=dtype)

    def reset(self):
        self.buffer = np.zeros_like(self.observation_space.low, dtype=self.dtype)
        return self.observation(self.env.reset())

    def observation(self, observation):
        self.buffer[:-1] = self.buffer[1:]
        self.buffer[-1] = observation
        return self.buffer


## Preprocessing the environment

In [6]:
from gym.wrappers import ResizeObservation, GrayScaleObservation

In [7]:
env = gym_super_mario_bros.make('SuperMarioBros-v0')

def wrap_env(env):
    env = MaxAndSkipEnv(env)
    env = GrayScaleObservation(env, keep_dim=True) #Converting to grayscale
    env = ResizeObservation(env, 84) #Resizing the observation
    env = ImageToPyTorch(env) #Converting to PyTorch
    env = BufferWrapper(env, 4) #Buffer of 4 frames
    env = ScaledFloatFrame(env) #Scaling the frame to 0 to 1
    return JoypadSpace(env, SIMPLE_MOVEMENT) #Adding simple controls

In [8]:
env = wrap_env(env)

#Random action
acts = env.get_action_meanings()
print(acts)

#Input space shape
shp = env.observation_space.shape
print(shp)

['NOOP', 'right', 'right A', 'right B', 'right A B', 'A', 'left']
(4, 84, 84)


In [9]:
print('Actions availables in the game: ', env.action_space.n)
print(shp[0])

7


## Neuronal Network Architecture

In [11]:
import torch, torchvision
import torch.nn as nn
from torchsummary import summary


In [21]:
#Defining the neural network with the atari architecture
def make_DQN(input_shape, n_actions, summary_=True):
    net  = nn.Sequential(
        nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, kernel_size=4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, kernel_size=3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
        nn.Linear(512, n_actions)
        )
    net.to(torch.device("cuda"))
    if summary_:
        summary(net, env.observation_space.shape) #Printing the network
    return net

In [24]:
net = make_DQN(shp, env.action_space.n)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 20, 20]           8,224
              ReLU-2           [-1, 32, 20, 20]               0
            Conv2d-3             [-1, 64, 9, 9]          32,832
              ReLU-4             [-1, 64, 9, 9]               0
            Conv2d-5             [-1, 64, 7, 7]          36,928
              ReLU-6             [-1, 64, 7, 7]               0
           Flatten-7                 [-1, 3136]               0
            Linear-8                  [-1, 512]       1,606,144
              ReLU-9                  [-1, 512]               0
           Linear-10                    [-1, 7]           3,591
Total params: 1,687,719
Trainable params: 1,687,719
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.11
Forward/backward pass size (MB): 0.35
Params size (MB): 6.44
Estimat

## Experience Replay and Target Network

### Experience replay