In [1]:
from random import *
from gridworld import gridworld_agent
import numpy as np

In [2]:
def select_action(action_values, action_size, state, eps):
    eps = 0.995*eps # GLIE
    eps_val = random()
    if eps_val > eps:
        return np.argmax(action_values[state[0], state[1], :]), eps
    else:
        return randint(0, action_size-1), eps

In [3]:
# INITIALISING GRID
rows = 7
cols = 10
winds = [0, 0, 0, 1, 1, 1, 2, 2, 1, 0]
start = [3, 0]
goal = [3, 7]
action_mode = "king"
env = gridworld_agent(rows, cols, winds, start, goal, action_mode)

In [4]:
# Q-Table
action_values = np.zeros([env.rows,env.cols,env.action_size])

In [16]:
max_episodes = 1000
gamma = 0.9
lr = 0.1
max_steps = 100
epsilon = 0.7 # For Epsilon-Greedy exploration

In [17]:
# Training
for i in range(max_episodes):
    print("Episode: {}".format(i), end=' ')
    state = env.reset()
    steps = 0
    # Epsilon-Greedy Policy
    action, epsilon = select_action(action_values, env.action_size, state, epsilon)
    tot_err = 0
    done = False

    while steps < max_steps and not done:
        steps += 1
        # Taking Action and Observing reward
        new_state, reward, done = env.step(action)
        new_action, epsilon = select_action(action_values, env.action_size, new_state, epsilon)
        
        # Modifying State-action values according to Bellman Equation
        err = reward + gamma*action_values[new_state[0], new_state[1], new_action] - action_values[state[0], state[1], action]
        action_values[state[0], state[1], action] += lr*err
        state = new_state
        action = new_action
        tot_err+=err
        
    print("Total Error: {}".format(tot_err))

Episode: 0 Total Error: -55.97207873365251
Episode: 1 Total Error: 17.973165401878767
Episode: 2 Total Error: -8.744953734537603
Episode: 3 Total Error: 12.744065186300944
Episode: 4 Total Error: -10.830423282241043
Episode: 5 Total Error: 7.310212296906073
Episode: 6 Total Error: 30.038381178455296
Episode: 7 Total Error: 18.074547038855528
Episode: 8 Total Error: 23.016466358449534
Episode: 9 Total Error: 22.0769468904293
Episode: 10 Total Error: 29.391068966194908
Episode: 11 Total Error: 23.991251451932165
Episode: 12 Total Error: 27.10480101690058
Episode: 13 Total Error: 30.376865352280312
Episode: 14 Total Error: 23.34672114628606
Episode: 15 Total Error: 22.37309289053571
Episode: 16 Total Error: 21.87252309203584
Episode: 17 Total Error: 20.536467799263974
Episode: 18 Total Error: 15.496353034416796
Episode: 19 Total Error: 25.39924619186307
Episode: 20 Total Error: 19.880959429634643
Episode: 21 Total Error: 20.57524021554964
Episode: 22 Total Error: 1.5365417193549789
Episod

Episode: 457 Total Error: 6.645358041623695e-09
Episode: 458 Total Error: 6.126704477082967e-09
Episode: 459 Total Error: 5.648239209676831e-09
Episode: 460 Total Error: 5.206871378504729e-09
Episode: 461 Total Error: 4.799733943627871e-09
Episode: 462 Total Error: 4.424222765919694e-09
Episode: 463 Total Error: 4.077861603946076e-09
Episode: 464 Total Error: 3.758433564371444e-09
Episode: 465 Total Error: 3.463853204266343e-09
Episode: 466 Total Error: 3.1921985055305413e-09
Episode: 467 Total Error: 2.9417073221793544e-09
Episode: 468 Total Error: 2.7107205369247822e-09
Episode: 469 Total Error: 2.497753115449086e-09
Episode: 470 Total Error: 2.301405288562819e-09
Episode: 471 Total Error: 2.120383868486897e-09
Episode: 472 Total Error: 1.9535129069936374e-09
Episode: 473 Total Error: 1.7996804047015758e-09
Episode: 474 Total Error: 1.6578951544943266e-09
Episode: 475 Total Error: 1.5271908182512561e-09
Episode: 476 Total Error: 1.4067360609715251e-09
Episode: 477 Total Error: 1.2957

Episode: 930 Total Error: 5.080380560684716e-13
Episode: 931 Total Error: 5.080380560684716e-13
Episode: 932 Total Error: 5.080380560684716e-13
Episode: 933 Total Error: 5.080380560684716e-13
Episode: 934 Total Error: 5.080380560684716e-13
Episode: 935 Total Error: 5.080380560684716e-13
Episode: 936 Total Error: 5.080380560684716e-13
Episode: 937 Total Error: 5.080380560684716e-13
Episode: 938 Total Error: 5.080380560684716e-13
Episode: 939 Total Error: 5.080380560684716e-13
Episode: 940 Total Error: 5.080380560684716e-13
Episode: 941 Total Error: 5.080380560684716e-13
Episode: 942 Total Error: 5.080380560684716e-13
Episode: 943 Total Error: 5.080380560684716e-13
Episode: 944 Total Error: 5.080380560684716e-13
Episode: 945 Total Error: 5.080380560684716e-13
Episode: 946 Total Error: 5.080380560684716e-13
Episode: 947 Total Error: 5.080380560684716e-13
Episode: 948 Total Error: 5.080380560684716e-13
Episode: 949 Total Error: 5.080380560684716e-13
Episode: 950 Total Error: 5.080380560684

In [18]:
# Checking Values
# print(action_values)

In [20]:
# Playing using generated policy
steps = 0
env.reset()
print("Step {}".format(steps))
env.printenv()
done = False

while steps < max_steps and not done:
    steps += 1
    action, epsilon = select_action(action_values, env.action_size, state, epsilon)
    new_state, reward, done = env.step(action)
    state = new_state
    print("=====================")
    print("Step {}".format(steps))
    env.printenv()

if done:
    print("=====================")
    print("Goal Reached !!")
    print("=====================")

Step 0
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
|*| | | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
 0 0 0 1 1 1 2 2 1 0
Step 1
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
|*| | | | | | | | | |
---------------------
| | | | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
 0 0 0 1 1 1 2 2 1 0
Step 2
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| |*| | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
-------------