<a href="https://colab.research.google.com/github/SJinji/reinforcement_project/blob/environment/RL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Frozen Lake with Reinforcement Learning**
# BY Mengyu LIANG, Nhat Mai NGUYEN, Jinji SHEN

# **Building Environment**

In [6]:
import numpy as np
from copy import deepcopy
import enum

In [8]:
# define the possible actions as an enumeration
class Action(enum.Enum):
    Left = 0
    Right = 1
    Up = 2
    Down = 3

# define the environment class
class Environment:
    def __init__(self):
        # initialize the map, which is a 4x4 grid of characters
        self.map = np.array([['S', 'F', 'F', 'F'], 
                             ['F', 'H', 'F', 'H'], 
                             ['F', 'F', 'F', 'H'], 
                             ['H', 'F', 'F', 'G']])
        # define the action space as an array of possible actions
        self.action_space = np.array([Action.Left, Action.Right, Action.Up, Action.Down])
        # define the state space as an array of integers from 0 to 15
        self.state_space = [i for i in range(np.array(self.map).size)]

        # define the states where certain actions are invalid (i.e. the agent would go off the map)
        self.no_left_states = [4, 8, 0, 12]
        self.no_right_states = [7, 11, 3, 15]
        self.no_up_states = [1, 2, 0, 3]
        self.no_down_states = [13, 14, 12, 15]

        # initialize the current state to be the top-left corner of the map
        self.current_state = 0

    # return the action space
    def get_action_space(self):
        return self.action_space

    # return the state space
    def get_state_space(self):
        return self.state_space

    # select a random action from the action space
    def get_random_action(self):
        return np.random.choice(self.action_space)

    # take a step in the environment, given an action index
    def step(self, action_index):
        # convert the action index to an Action enumeration
        action = Action(action_index)

        # check if the action is invalid (i.e. would make the agent go off the map)
        if self.invalid_action(action):
            # if so, return the current state, reward of 0, and False to indicate that the episode is not over
            return self.current_state, 0, False

        # update the current state based on the action
        if action == Action.Left:
            self.current_state -= 1
        elif action == Action.Right:
            self.current_state += 1
        elif action == Action.Up:
            self.current_state -= 4
        else:
            self.current_state += 4

        # get the row and column indices of the current state
        row, column = self.get_indices_of_current_state()
        # get the character at the current state's position on the map
        letter = self.map[row][column]

        # if the current state is a 'S' or 'F', return the current state, reward of 0, and False to indicate that the episode is not over
        if letter == 'S' or letter == 'F':
            return self.current_state, 0, False
        # if the current state is a 'G', return the current state, reward of 1, and True to indicate that the episode is over
        elif letter == 'G':
            return self.current_state, 1, True
        # if the current state is a 'H', return the current state, reward of 0, and True to indicate that the episode is over
            return self.current_state, 0, True

    def invalid_action(self, action):
        # Check if the chosen action is not allowed in the current state
        if (action == Action.Left and self.current_state in self.no_left_states) or \
           (action == Action.Right and self.current_state in self.no_right_states) or \
           (action == Action.Up and self.current_state in self.no_up_states) or \
           (action == Action.Down and self.current_state in self.no_down_states):
            return True

        return False

    def get_indices_of_current_state(self):
        # Convert the current state to a row and column index on the map
        temp = 0
        for i in range(len(self.map)):
            for j in range(len(self.map[0])):
                if temp == self.current_state:
                    return i, j
                temp += 1

    def reset(self):
        # reset the current state to the starting state (which is 0 in this case) and returns the new current state
        self.current_state = 0
        return self.current_state

    def print_current_state(self):
        # make a copy of the map
        temp_map = deepcopy(self.map)
        row, column = self.get_indices_of_current_state()
        # replace the cell at the current state with an 'X' symbol in the copy
        temp_map[row][column] = 'X'

        # prints the map with the current state symbol to the console.
        for r in temp_map:
            print(r[0], r[1], r[2], r[3])
        print()