<a href="https://colab.research.google.com/github/S-EGK/Intelligent-Path-Planning/blob/main/Obstacle%20Avoidance/RL%20based%20Obstacle%20Avoidance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random

# Global Variables
BOARD_ROWS = 10
BOARD_COLS = 10
WIN_STATE = (9,9)
START = (0,0)
DETERMINISTIC = True

In [None]:
class State:
    def __init__(self, state=START):
        self.board = np.zeros([BOARD_ROWS, BOARD_COLS])
        self.board[1, 5] = -1
        self.board[1, 7] = -1
        self.board[3, 2] = -1
        self.board[3, 4] = -1
        self.board[3, 8] = -1
        self.board[4, 2] = -1
        self.board[4, 8] = -1
        self.board[6, 3] = -1
        self.board[6, 5] = -1
        self.board[8, 7] = -1
        self.state = state
        self.isEnd = False
        self.determine = DETERMINISTIC

    def giveReward(self):
        if self.state == WIN_STATE:
            return 1
        else:
            return 0

    def isEndFunc(self):
        if (self.state == WIN_STATE):
            self.isEnd = True

    def nxtPosition(self, action):
        if self.determine:
            if action == "up":
                nxtState = (self.state[0] - 1, self.state[1])
            elif action == "down":
                nxtState = (self.state[0] + 1, self.state[1])
            elif action == "left":
                nxtState = (self.state[0], self.state[1] - 1)
            else:
                nxtState = (self.state[0], self.state[1] + 1)
            # if next state legal
            if (nxtState[0] >= 0) and (nxtState[0] <= (BOARD_ROWS -1)):
                if (nxtState[1] >= 0) and (nxtState[1] <= (BOARD_COLS -1)):
                    if nxtState != (1, 5) and nxtState != (1, 7) and nxtState != (3, 2) and nxtState != (3, 4) and nxtState != (3, 8) and nxtState != (4, 2) and nxtState != (4, 8) and nxtState != (6, 3) and nxtState != (6, 5) and nxtState != (8, 7):
                        return nxtState
            return self.state

In [None]:
class Agent:
    def __init__(self):
        self.states = []
        self.actions = ["up", "down", "left", "right"]
        self.State = State()
        self.lr = 0.2
        self.exp_rate = 0.5

        # initial state reward
        self.state_values = {}
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                self.state_values[(i, j)] = 0  # set initial value to 0
        self.state_values[(1, 5)] = -1
        self.state_values[(1, 7)] = -1
        self.state_values[(3, 2)] = -1
        self.state_values[(3, 4)] = -1
        self.state_values[(3, 8)] = -1
        self.state_values[(4, 2)] = -1
        self.state_values[(4, 8)] = -1
        self.state_values[(6, 3)] = -1
        self.state_values[(6, 5)] = -1
        self.state_values[(8, 7)] = -1

    def chooseAction(self):
        # choose action with most expected value
        mx_nxt_reward = 0
        action = ""

        if np.random.uniform(0, 1) <= self.exp_rate:
            action = np.random.choice(self.actions)
        else:
            # greedy action
            for a in self.actions:
                # if the action is deterministic
                nxt_reward = self.state_values[self.State.nxtPosition(a)]
                if nxt_reward >= mx_nxt_reward:
                    action = a
                    mx_nxt_reward = nxt_reward
        return action

    def takeAction(self, action):
        position = self.State.nxtPosition(action)
        return State(state=position)

    def reset(self):
        self.states = []
        self.State = State()

    def play(self, rounds=10):
        i = 0
        while i < rounds:
            # to the end of game back propagate reward
            if self.State.isEnd:
                # back propagate
                reward = self.State.giveReward()
                # explicitly assign end state to reward values
                self.state_values[self.State.state] = reward  # this is optional
                # print("Game End Reward", reward)
                # print("***********************")
                for s in reversed(self.states):
                    reward = self.state_values[s] + self.lr * (reward - self.state_values[s])
                    self.state_values[s] = round(reward, 3)
                self.reset()
                i += 1
            else:
                action = self.chooseAction()
                # append trace
                self.states.append(self.State.nxtPosition(action))
                # print("current position {} action {}".format(self.State.state, action))
                # by taking the action, it reaches the next state
                self.State = self.takeAction(action)
                # mark is end
                self.State.isEndFunc()
                # print("nxt state", self.State.state)
                # print("---------------------")

    def showValues(self):
        for i in range(0, BOARD_ROWS):
            print('-------------------------------------------------------------------------------------------')
            out = '| '
            for j in range(0, BOARD_COLS):
                out += str(self.state_values[(i, j)]).ljust(6) + ' | '
            print(out)
        print('-------------------------------------------------------------------------------------------')

In [None]:
if __name__ == "__main__":
    ag = Agent()
    ag.play(100)
    print(ag.showValues())

-------------------------------------------------------------------------------------------
| 0.213  | 0.256  | 0.305  | 0.337  | 0.295  | 0.226  | 0.212  | 0.242  | 0.357  | 0.486  | 
-------------------------------------------------------------------------------------------
| 0.057  | 0.206  | 0.301  | 0.383  | 0.43   | -1     | 0.509  | -1     | 0.668  | 0.721  | 
-------------------------------------------------------------------------------------------
| 0.0    | 0.017  | 0.139  | 0.381  | 0.485  | 0.514  | 0.599  | 0.666  | 0.762  | 0.796  | 
-------------------------------------------------------------------------------------------
| 0.001  | 0.004  | -1     | 0.165  | -1     | 0.452  | 0.547  | 0.584  | -1     | 0.845  | 
-------------------------------------------------------------------------------------------
| 0      | 0      | -1     | 0.012  | 0      | 0.132  | 0.119  | 0.166  | -1     | 0.853  | 
---------------------------------------------------------------------------