In [35]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

In [36]:


from curses import flash


class Continuous_gridWorld:
    def __init__(self, blocked_center):
        self.yaxis = 100
        self.xaxis = 100
        self.blocked_center = blocked_center
        self.initial_state = np.array(
            [(3, 3), (3, 4), (3, 5), (4, 3), (4, 4), (4, 5), (5, 3), (5, 4), (5, 5)])
        self.state = self.initial_state

    def create_blocked(self):
        self.blocked_list = []
        for i in self.blocked_center:
            for j in range(i[0]-6, i[0] + 5):
                for k in range(i[1] - 6, i[1] + 5):
                    if k < self.xaxis or j < self.yaxis or k >= 0 or j >= 0:
                        if k not in self.blocked_list:
                            self.blocked_list.append((j, k))
        return self.blocked_list

    def show_world(self):
        grid = np.zeros([self.xaxis, self.yaxis])
        for i in self.blocked_list:
            grid[i[0]][i[1]] = 1
        return grid

    def cost_movement(self, state):
        for i in state:
            if i in self.blocked_list:
                cost = 1000
            elif i in self.terminal_state:
                cost = 0
            else:
                cost = 1
        return cost

    def transition_function(self, action):
        if action == 'L':
            nxtState = [(s[0] - 10, s[1])
                        for s in self.state if s[0] - 10 >= 0]
        elif action == 'R':
            nxtState = [(s[0] + 10, s[1])
                        for s in self.state if s[0] + 10 <= self.xaxis - 1]
        elif action == 'U':
            nxtState = [(s[0], s[1] + 10)
                        for s in self.state if s[1] + 10 <= self.yaxis - 1]
        else:
            nxtState = [(s[0], s[1] - 10)
                        for s in self.state if s[1] - 10 >= 0]

        for s in self.state:
            if (s[0] >= 0) and (s[0] <= self.xaxis - 1):
                if (s[1] >= 0) and (s[1] <= self.yaxis - 1):
                    return nxtState
        return self.state

    def terminal_state_create(self):
        self.terminal_state = []
        for i in range(20):
            for j in range(20):
                self.terminal_state.append([i, j])
        return self.terminal_state

    def episode_break(self, state):
        
        for i in self.terminal_state:
            if state == i:
                return True
        for i in self.blocked_list:
            if state == i:
                return True
        return False

class Agent:
    def __init__(self):
        # world = Continuous_gridWorld()
        self.actions = ['L', 'R', 'U', 'D']

        self.qtable = []
        for q in range(0, 10):
            self.qtable.append([])
            for p in range(0, 10):
                self.qtable[q].append([])
                for a in range(4):
                    self.qtable[q][p].append(0)

    def action_selection(self, epsilon, state):
        if random.random <= epsilon:
            action = random.randrange(self.actions)
        else:
            action_index = np.argmin(self.qtable[state[0]//10][state[1]//10])
            action = self.actions[action_index]
        return action

    def q_value_update(self, currentState, nxtState, action, cost):
        min_nxtState = np.argmin(self.qtable[nxtState[0]//10][nxtState[1]//10])
        self.qtable[currentState[0]//10][currentState[1]//10][action] += self.alpha * (
            cost + self.gamma * min_nxtState - self.qtable[currentState[0]//10][currentState[1]//10][action])

def play(world, Agent, alpha=0.9, gamma=0.2, episode=500, epsilon = 0.9):
    steps_per_episode = []
    cost_per_episode = []                      #used for storing the total cost of each episode for plotting
    epsilon_per_episode = []
    grid_per_episode = []
    action_per_episode = []
    qvalue_per_episode = []
    state_per_episode = []
    world.terminal_state_create()
    for z in range(episode):
        cumulative_cost = 0
        steps = 0
        grid_per_step = []
            # grid_wrld = self.world.grid_wrld()

        action_per_step = []
        qvalue_per_step = []
        cost_per_step = []
        state_per_step = []
        while world.episode_break(world.state[4]) == False:
            lastState = world.state

            state_per_step.append(lastState)

            action = Agent.action_selection(epsilon, lastState)

            currentState = world.transition_function(lastState)

            cost = world.cost_movement(currentState)

            x = Agent.q_value_update(lastState, currentState, action, cost)

            cumulative_cost += cost
            steps += 1
            # grid_per_step.append(grid)
            # action_index_per_step.append(action_index)
            qvalue_per_step.append(x)
            cost_per_step.append(cost)
        
        if world.episode_break() == True:
            world.state = world.initial_state
        
        if z % 50 == 0:
            epsilon *= 0.45

        steps_per_episode.append(steps)
        cost_per_episode.append(cumulative_cost)
        epsilon_per_episode.append(epsilon)
        # grid_per_episode.append(grid_per_step)
        action_per_episode.append(action_per_step)
        qvalue_per_episode.append(qvalue_per_step)
        state_per_episode.append(state_per_step)

    return cost_per_episode, state_per_episode, epsilon_per_episode, action_per_episode, qvalue_per_episode, state_per_episode




In [37]:
world = Continuous_gridWorld([(50, 50), (60, 50), (70, 50), (90, 50), (60,65), (60, 75), (60, 85), (60, 95)])
Agentx = Agent()


cost_per_episode, steps_per_episode, epsilon_per_episode, action_index_per_episode, qvalue_per_episode, state_per_episode = play(world, Agentx)


# using matplotlib for plotting episode in x-axis and cost in y-axis
fig1 = plt.figure("Figure1")
plt.plot(cost_per_episode)
plt.ylim(0, 150)
plt.title("Cost per Episode")
fig2 = plt.figure("Figure2")
plt.plot(steps_per_episode)
plt.ylim(0, 150)
plt.title("Steps per Episode")
fig3 = plt.figure("Figure3")
plt.plot(epsilon_per_episode)
plt.title("Epsilon per Episode")

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [26]:
x = [1, 2, 3]
y = [2, 9, 1]

for i in x:
    if i in y:
        print('hi')

hi
hi
