In [None]:
# Pygame installation
!pip install pygame

In [2]:
import pygame as pg
from pygame import image as img

pygame 2.1.0 (SDL 2.0.16, Python 3.9.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
# Gym Imports
import gym
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete # different types of spaces

# Helpers
import numpy as np
import random
import os

# Stable baselines
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# Time module to make program halt for presentation purposes
import time

In [None]:
# Function definitions for use
def load_image(file):
    """loads an image, prepares it for play"""
    try:
        surface = pg.image.load(file)
    except pg.error:
        raise SystemExit('Could not load image "%s" %s' % (file, pg.get_error()))
    return surface.convert_alpha() # convert_alpha allows for transparency from .pngs

In [None]:
pg.init()
win = pg.display.set_mode((0,0))
pg.display.set_caption("Custom Environment - Guess Path")

class Robot(pg.sprite.Sprite):
    """Visual for AI space and movement direction"""
    images = [load_image("assets/sprites/robo_R.png"), 
              load_image("assets/sprites/robo_L.png"), 
              load_image("assets/sprites/robo_U.png"),
              load_image("assets/sprites/robo_D.png")]

    def __init__(self, pos):
        pg.sprite.Sprite.__init__(self, self.containers)
        self.image = self.images[0]
        self.rect = self.image.get_rect()
        self.rect.x = pos[0] - (self.rect.width / 2.0)
        self.rect.y = pos[1] - (self.rect.height / 2.0)

    def update(self, action: int):
        if action >= 0:
            self.image = self.images[action]
    
    def setPosition(self, newPos):
        self.rect = self.image.get_rect()
        self.rect.x = newPos[0] - (self.rect.width / 2.0)
        self.rect.y = newPos[1] - (self.rect.height / 2.0)

class Space(pg.sprite.Sprite):
    """Visual for space type. Normal, positive or negative"""
    images = [load_image("assets/sprites/normal.png"), 
              load_image("assets/sprites/plus.png"), 
              load_image("assets/sprites/minus.png"),
              load_image("assets/sprites/start.png"), 
              load_image("assets/sprites/goal.png")]

    def __init__(self, pos, jumpDistance):
        pg.sprite.Sprite.__init__(self, self.containers)
        
        if(jumpDistance == 0):  # neutral movement - does not push Robot
            self.image = self.images[0]
            self.spaceType = 0
        elif(jumpDistance > 0): # positive movement - pushes robot forward
            self.image = self.images[1]
            self.spaceType = 1
        else:                    # negative movement - pushes robot backward
            self.image = self.images[2]
            self.spaceType = 2

        self.rect = self.image.get_rect()
        self.rect.x = pos[0] - (self.rect.width / 2.0)
        self.rect.y = pos[1] - (self.rect.height / 2.0)
        
    def setType(self, spaceType):
            self.image = self.images[spaceType]
            self.spaceType = spaceType

# Initialize Game Groups
all = pg.sprite.RenderUpdates()
    
Space.containers = all
Robot.containers = all

# Board does not require rendering
        
class Board():
    """Board that sets up and displays all spaces"""
    def __init__(self, maxRows, pos):
        self.spaces = [[], [], []]
        
        self.maxRows = maxRows
        
        if self.maxRows < 1:
            self.maxRows = 1 # cap minimum value to max rows in case of emergency
        
        self.playerPos = [0,1]
        self.goal = [1, self.maxRows - 1]
        
        # Thanks to Jack Malone for help with this loop
        for index, value in enumerate(self.spaces):
                    for y in range(self.maxRows):
                            value.append(Space(((64 * y) + pos[0], (64 * index) + pos[1]), 0))
        setStartEnd()
                        
    def setStartEnd(self):
        self.spaces[1][0].setType(3)
        self.spaces[self.goal[0]][self.goal[1]].setType(4)
                        
    def update(self, robot: Robot, action: int):
        if action >= 0:
            if action == 0: # right
                  if self.playerPos[0] < self.maxRows - 1:
                        self.playerPos[0] += 1
            elif action == 1: # left
                 if self.playerPos[0] > 0:
                        self.playerPos[0] -= 1
            elif action == 2: # up
                if self.playerPos[1] > 0:
                        self.playerPos[1] -= 1
            elif action == 3: # down
                if self.playerPos[1] < 2:
                        self.playerPos[1] += 1
            # now that the robot has moved, update it's position
            robot.setPosition(((self.playerPos[0] * 64) + 96, (self.playerPos[1] * 64) + 96))

pg.quit()

In [None]:
class PathEnv(Env):
    win = pg.display.set_mode((1600,800))
    pg.display.set_caption("Custom Environment - Guess Path RL")
    bg = load_image('assets/sprites/background.png')
    board = Board(6, (100,100))
    board.setStartEnd()
    robo = Robot(((board.playerPos[0] * 64) + 96, 
                  (board.playerPos[1] * 64) + 96))
    action = -1
    clock = pg.time.Clock()
    
    def __init__(self):
        # Actions: 0 - Left, 1 - Up, 2 - Right, 3 - Down
        self.action_space = Discrete(4)
        # Temperature array
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        # Set start temp
        self.state = determineState()
        # Set shower length
        self.alloted_length = 600

    def step(self, action):
        
        clock.tick(30)
        
        # Update Environment elements
        robo.update(action)
        
        board.update(robo, action)
        
        # With elements updated, determine state, reward etc
        
        # Reduce alloted length to use environment by time moved via pygame clock
        self.alloted_length -= clock.get_time()

        # Calculate reward
        if self.state >=37 and self.state <=39: 
            reward =1 
        else: 
            reward = -1 

        # Check if environment is done
        if self.alloted_length <= 0: 
            done = True
        else:
            done = False
        
        # Needed during the return
        info = {}
  
        # Return step information
        return self.state, reward, done, info

    def render(self):
        win.blit(bg, (0,0))
        for r in board.spaces:
                for c in r: # position all spaces to the correct top left position
                    win.blit(c.image, (c.rect.x, c.rect.y))

        win.blit(robo.image, (robo.rect.x, robo.rect.y))
        pg.display.update()
    
    def reset(self):
        # Reset starting state
        self.state = determineState()
        # Reset alloted time to interact
        self.alloted_length = 600
        return self.state
    
    def calculateReward(self):
        # calculate reward based for AI
        pass
    
    def determineState(self):
        # generate a dictionary that is the current state of the game
        # we will tell the AI it's position on the board, the goal's position on the board
        # and what each space type is in each direction
        
        # we need to determine if it is possible to move in all 4 directions
        # as the AI may be at the edge of a board
        left = self.board.playerPos[0] - 1 if self.board.playerPos[0] > 0 else -1
        up = self.board.playerPos[1] - 1 if self.board.playerPos[1] > 0 else -1
        right = self.board.playerPos[0] + 1 if self.board.playerPos[0] < self.board.maxRows else -1
        down = self.board.playerPos[1] + 1 if self.board.playerPos[1] < 2 else -1
        
        state = Dict({
            "playerPos": self.board.playerPos,
            "goalPos": self.board.goal,
            "left": left,
            "up": up,
            "right": right,
            "down": down,
        })

        return state
        

In [None]:
env = PathEnv()

In [None]:
# Run this cell to test that the Environment works properly
# This will randomly pick from a Discrete action step, no model is used here.

pg.display.init()
win = pg.display.set_mode((1600,800))
pg.display.set_caption("Custom Environment - Guess Path RL")

episodes = 5
for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        pg.event.get()
        env.render()
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        score += reward         
    print('Episode:{} Score:{}'.format(episode, score))
env.close()
pg.display.quit()