In [56]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
import numpy as np
import pandas as pd
import time
import torch 
from torch import nn
from torchvision import transforms as T

In [57]:
env = gym_super_mario_bros.make('SuperMarioBros-2-2-v0')   # World 2, Stage 2, Default Version. Single Life Mario. 
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [58]:
N_states = env.observation_space
N_actions = env.action_space
print('The state space has the following shape :', N_states)
print('The action space has the following shape :', N_actions)

The state space has the following shape : Box(0, 255, (240, 256, 3), uint8)
The action space has the following shape : Discrete(7)


In [59]:
# A few comments
# We are applying Q Learning initially
# The state space is too huge : It consists of 240 x 256 x 3 array, with each entry being in the integral range [0,255]
# Thus, the total states possible are : 240 x 256 x 3 x 256

# Obviously, for generic Q Learning, we must have a Q table that's not that big due to computational reasons
# Due to these reasons we will attempt to pre-process the states possible
# By pre-processing we can reduce considerably the total number of states possible, thus reducing the Q table size



# PRE-PROCESSING STATE SPACE


# GRAY SCALING

def grayscale(state_from_env):
    
    # converting to 3 x H x W (input is in H x W x 3 format)
    
    state_array = np.transpose(state_from_env, (2, 0, 1))
    state_array_copy = state_array.copy()                       # customary to convert to tensor using a copied numpy
    state_tensor = torch.tensor(state_array_copy, dtype = torch.float)
    
    # tranposed into required format for torch grayscale transformation (converted into tensor as well, for torch)
    
    grayscale_transform_function = T.Grayscale(num_output_channels = 1)
    grayscale_function_output = grayscale_transform_function(state_tensor)
    
    return grayscale_function_output

# (tracking function input and output)
# grayscale input = numpy array (H x W x 3) 
# grayscale output = torch tensor, grayscaled version of RGB input, 1 x H x W, torch float


# DOWNSAMPLING GRAYSCALE IMAGE

# (to reduce the size of Q table, the grayscale image outputted will now be downsampled)

# Grayscaled input will be resized into a 9 x 10 matrix, using the default settings in torchvision

def reduce_size(grayscale_input):
    
    reduce_function = T.Resize((9,10))                    # using the default settings
    reduced_output = reduce_function(grayscale_input)
    
    return reduced_output
    
# (tracking function input and output)
# reduce_size input expected = 1 x H x W torch tensor, the grayscaled version of the state output via environment
# reduce_size output = 1 x 9 x 10 torch tensor, reduced size version of the grayscale


# Ensuring discrete states for Q table creation

# The output from the above two functions need not be integral. To remedy this, the reduce_size output will be rounded to 
# the nearest integer

# NOTE : Each element of the final 1 x 9 x 10 tensor/array, will now have each entry in the range [0,255] (256 possible entries)
# The range is obviously integral due to rounding



# ROUNDING THE REDUCE_SIZE OUTPUT TO MAKE STATE MATRIX ENTRIES INTEGRAL

# Realized here Q Learning is just not possible, DQN will be implemented