# Environment Setup

In [None]:
#!pip install opencv-python, gym gym-retro 

In [1]:
# Import retro to load ROM
import retro
# Import environment base class for a wrapper 
from gym import Env, ActionWrapper
# Import the space shapes for the environment
from gym.spaces import Box, Discrete
# Import numpy to calculate frame delta 
import numpy as np
# Import opencv for grayscaling
import cv2

In [2]:
# Importing ROM
!python -m retro.import .

Importing SonicTheHedgehog2-Genesis
Imported 1 games


In [3]:
class ActionDiscretizer(ActionWrapper):
    def __init__(self,env):
        super(ActionDiscretizer, self).__init__(env)
        buttons = ["B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y", "X", "Z"]
        actions = [["LEFT"], ["RIGHT"], ["LEFT", "DOWN"], ["RIGHT", "DOWN"], ["DOWN"], ["DOWN", "B"], ["B"]]
        self._actions = []
        
        for action in actions:
            arr = np.array([False] * 12)
            for button in action:
                arr[buttons.index(button)] = True
            self._actions.append(arr)
        self.action_space = Discrete(len(self._actions))
        
    def action(self, a):
        return self._actions[a].copy()

In [4]:
# Creating custom environment 
class Sonic(Env): 
    
    def __init__(self):
        super().__init__()    
        # Specify observation space 
        self.observation_space = Box(low=0, high=255, shape=(100, 100, 1), dtype=np.uint8)
        # Startup and instance of the game 
        self.game = retro.make('SonicTheHedgehog2-Genesis',)
    
    
    def level(self, num):
        # takes number form 0 to 16
        sonic_levels = {
        0: 'EmeraldHillZone.Act1',
        1: 'EmeraldHillZone.Act2',
        2: 'ChemicalPlantZone.Act1',
        3: 'ChemicalPlantZone.Act2',
        4: 'AquaticRuinZone.Act1',
        5: 'AquaticRuinZone.Act2',
        6: 'CasinoNightZone.Act1',
        7: 'CasinoNightZone.Act2',
        8: 'HillTopZone.Act1',
        9: 'HillTopZone.Act2',
        10: 'MysticCaveZone.Act1',
        11: 'MysticCaveZone.Act2',
        12: 'OilOceanZone.Act1',
        13: 'OilOceanZone.Act2',
        14: 'MetropolisZone.Act1',
        15: 'MetropolisZone.Act2',
        16: 'MetropolisZone.Act3'}
        # changes level
        self.game.load_state(sonic_levels[num])
    
    
    def step(self, action):
        # Take a step 
        obs, reward, done, info = self.game.step(action)
        obs = self.preprocess(obs)

        # ***Reward shaping***       
        x, lives, finishing_level, rings, kils = info["x"], info["lives"], info["level_end_bonus"], info["rings"], info["score"]
        
        if x > self.x_farthest:
            moving_reward = 1 # moving forward
            self.x_farthest = x
            self.i = 0 
        else:
            moving_reward = 0 # moving backward
            self.i += 1
            
        # If agent is not making any progress for n frames
        if self.i > 2000: # 30sec
            moving_reward = -1 # penalty
        
        lives_lost_delta = - self.lives + lives
        self.lives = lives
        rings_delta = rings - self.rings
        self.rings = rings
        kils_delta = kils - self.kils
        self.kils = kils
                
        reward = moving_reward*0.009 + rings_delta*14 + kils_delta*10 + finishing_level + lives_lost_delta*10
                 

        return obs, reward, done, info
    
    
    def reset(self):
        # Create game variables
        self.x_farthest = 96
        self.lives = 3
        self.rings = 0
        self.kils = 0
        self.i = 0
        
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        
        return obs
    
    
    def preprocess(self, observation): 
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (100,100), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (100,100,1))
        return channels 
    
    
    def render(self, *args, **kwargs):
        self.game.render()
        
        
    def close(self):
        self.game.close()

# Testing Environment

In [None]:
"""
# Game with random actions

# Creating environment
env = Sonic()
env = ActionDiscretizer(env)
env.level(1)
# Importing sleep to slow down game
from time import sleep

obs = env.reset()
result = 0 

for i in range(1_000):
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    result += reward
    env.render()
    sleep(0.001)
    i += 1
    if i % 50 == 0:
        result += reward
        print(result)
"""

In [None]:
"""
print("/nshape of screen = ", env.observation_space.sample().shape)
print("total score = ", result)

from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
import matplotlib.pyplot as plt

env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

plt.figure(figsize=(16,9))

# Skipping some first black frames
for _ in range(6):
    obs, reward, done, info = env.step([env.action_space.sample()])
# Displaying 4 frames as movement
for i in range(obs.shape[3]):
    obs, reward, done, info = env.step([env.action_space.sample()])
    plt.subplot(1,4,i+1)
    plt.imshow(obs[0][:,:,i])
plt.show();
"""