In [4]:
#imports 

from gym import Env
from gym import spaces
import random
import numpy as np
from IPython.display import clear_output
import os

In [5]:
# Global constants

#game board values
NOTHING = 0
PLAYER = 1
WIN = 2
LOSE = 3

#action values
UP = 0
DOWN = 1
LEFT = 2
RIGHT = 3

In [None]:
# helper function
def clear_screen():
    clear_output()
    os.system("cls")

# prints out the environment state in a visually appealing way

def pretty_print(state_array, cumulative_reward):
    clear_screen()
    print(f'Cumulative Reward: {cumulative_reward}')
    print()
    for i in range(6):
        for j in range(6):
            print('{:4}'.format(state_array[i*6 + j]), end="")
        print()


In [6]:
class BasicEnv(Env):
    def __init__(self):
        #class variable for reward
        self.cumulative_reward = 0

        #set initial state for flatten 6x6 
        self.state = [NOTHING] * 36

        self.player_position = random.randrange(0, 36)
        self.win_position = random.randrange(0, 36)
        self.lose_position = random.randrange(0, 36)

        #makeing sure wining and loseing does not overlap each other
        while self.win_position == self.player_position:
            self.win_position = random.randrange(0, 36)

        while self.lose_position == self.win_position or self.lose_position == self.player_position:
            self.lose_position = random.randrange(0,36)

        self.state[self.player_position] = PLAYER
        self.state[self.win_position] = WIN
        self.state[self.lose_position] = LOSE

        # convert the python array into numpy array
        self.state = np.array(self.state, dtype = np.int16)

        # observation space (valid range for observation in the state)
        self.observation_space = spaces.Box(0,3, [36,], dtype=np.int16)
        
        #spaces.discrete(4) is shortcut for defining action 0-3
        self.action_space = spaces.Discrete(4)

    def step(self, action):
        # placeholder for debugging information
        info = {}

        #set default values for done, reward and the player position
        done = False
        reward = -0.01
        previous_position = self.player_position

        # take action
        # moves agent and prevent it from getting off grid
        if action == UP:
            if (self.player_position - 6) >= 0:
                self.player_position -= 6
        elif action == DOWN:
            if (self.player_position + 6) < 36:
                    self.player_position += 6
        elif action == LEFT:
            if (self.player_position % 6) != 0:
                    self.player_position -= 1
        elif action == RIGHT:
            if (self.player_position % 6) != 0:
                    self.player_position += 1
        else: raise Exception("Invalid action")

        #check win or lose condition and set reward
        if self.state[self.player_position] == WIN:
            reward = 1.0
            self.cumulative_reward += reward
            done = True

            #this section diplays purposes
            clear_screen()
            print(f'Cumulative Reward: {self.cumulative_reward}')
            print('WIN !!!')

        elif self.state[self.player_position] == LOSE:
            reward = -1.0
            self.cumulative_reward += reward
            done = True

            # this section diplays progress
            clear_screen()
            print(f'Cumulative Reward: {self.cumulative_reward}')
            print("Lose :B")

        # Update the environment state

        if not done:
            self.state[previous_position] = NOTHING
            self.state[self.player_position] = PLAYER

        self.cumulative_reward += reward
        return self.state, reward, done, info



    def reset(self):
        self.cumulative_reward = 0

        #set the initial state to a flatten 6X6 grid
        self.state = [NOTHING] * 36

        self.player_position = random.randrange(0,36)
        self.win_position = random.randrange(0,36)
        self.lose_position = random.randrange(0,36)

        # making sure entry and lose points are not overlapping each other
        while self.win_position == self.player_position:
            self.win_position = random.randrange(0,36)

        while self.lose_position == self.win_position or self.lose_position == self.player_position:
            self.lose_position = random.randrange(0,36)

        self.state[self.player_position] = PLAYER
        self.state[self.win_position] = WIN
        self.state[self.lose_position] = LOSE

        # convert the python array to numpy array
        self.state = np.array(self.state, dtype=np.int16)

        return self.state

    def render(self):
        pretty_print(self.state, self.cumulative_reward)

In [14]:
# running env 
env = BasicEnv()

# visualize the current state of the environment
env.render() 

# ask user for action
action = 1

# after action this prodes env information
state, reward, done, info = env.step(action)

#keep repeating until the game is over
while not done:
    env.render()
    action = random.randrange(0,3)
    state, reward, done, info = env.step(action)

TypeError: cannot unpack non-iterable NoneType object

In [13]:
from stable_baselines3.common.env_checker import check_env
env = BasicEnv()
check_env(env)

AssertionError: Your environment must inherit from the gymnasium.Env class cf. https://gymnasium.farama.org/api/env/