In [1]:
from random import *
from gridworld import gridworld_agent
import numpy as np

In [2]:
def select_action(action_values, action_size, state, eps):
    eps = 0.995*eps # GLIE
    eps_val = random()
    if eps_val > eps:
        return np.argmax(action_values[state[0], state[1], :]), eps
    else:
        return randint(0, action_size-1), eps

In [3]:
# INITIALISING GRID
rows = 7
cols = 10
winds = [0, 0, 0, 1, 1, 1, 2, 2, 1, 0]
start = [3, 0]
goal = [3, 7]
action_mode = "king"
env = gridworld_agent(rows, cols, winds, start, goal, action_mode)

In [5]:
# Q-Table
action_values = np.zeros([env.rows,env.cols,env.action_size])

In [6]:
max_episodes = 1000
gamma = 0.9
lr = 0.1
max_steps = 100
epsilon = 0.7 # For Epsilon-Greedy exploration

In [8]:
# Training
for i in range(max_episodes):
    print("Episode: {}".format(i), end=' ')
    state = env.reset()
    steps = 0
    tot_err = 0
    done = False

    while steps < max_steps and not done:
        steps += 1
        # Taking Action and Observing reward
        action, epsilon = select_action(action_values, env.action_size, state, epsilon)
        new_state, reward, done = env.step(action)
        
        # Modifying Q-values according to Bellman Equation
        err = reward + gamma*np.max(action_values[new_state[0], new_state[1], :]) - action_values[state[0], state[1], action]
        action_values[state[0], state[1], action] += lr*err
        state = new_state
        tot_err+=err
        
    print("Total Error: {}".format(tot_err))

Episode: 0 Total Error: -97.91100000000004
Episode: 1 Total Error: -97.17659000000002
Episode: 2 Total Error: -97.26932628999997
Episode: 3 Total Error: -98.02993961989999
Episode: 4 Total Error: -97.022905778855
Episode: 5 Total Error: -98.49721865789994
Episode: 6 Total Error: -97.31756394051668
Episode: 7 Total Error: -96.56473538076504
Episode: 8 Total Error: -96.08065710232609
Episode: 9 Total Error: -95.34949564050427
Episode: 10 Total Error: -95.97912934988163
Episode: 11 Total Error: -95.16832589318712
Episode: 12 Total Error: -97.09879663040893
Episode: 13 Total Error: -94.29943513031762
Episode: 14 Total Error: -94.09440293643485
Episode: 15 Total Error: -92.87687998897879
Episode: 16 Total Error: -93.65690478767146
Episode: 17 Total Error: -93.86088403938983
Episode: 18 Total Error: -92.34354017792178
Episode: 19 Total Error: -92.80106099373788
Episode: 20 Total Error: -93.27957027436669
Episode: 21 Total Error: -93.82217333751868
Episode: 22 Total Error: -91.29553341087235


Episode: 336 Total Error: 0.04873907430213009
Episode: 337 Total Error: 0.045866509858694116
Episode: 338 Total Error: 0.0431565968542138
Episode: 339 Total Error: 0.04060057342389101
Episode: 340 Total Error: 0.038190120053659626
Episode: 341 Total Error: 0.0359173391782015
Episode: 342 Total Error: 0.03377473559555355
Episode: 343 Total Error: 0.03175519767397361
Episode: 344 Total Error: 0.029851979325943745
Episode: 345 Total Error: 0.028058682725905726
Episode: 346 Total Error: 0.0263692417470196
Episode: 347 Total Error: 0.024777906093980562
Episode: 348 Total Error: 0.023279226108339657
Episode: 349 Total Error: 0.021868038223271213
Episode: 350 Total Error: 0.02053945104581345
Episode: 351 Total Error: 0.01928883204394083
Episode: 352 Total Error: 0.018111794817365023
Episode: 353 Total Error: 0.0170041869304427
Episode: 354 Total Error: 0.015962078286868575
Episode: 355 Total Error: 0.014981750025775398
Episode: 356 Total Error: 0.014059683919626309
Episode: 357 Total Error: 0

Episode: 696 Total Error: 5.186961971048731e-13
Episode: 697 Total Error: 5.151434834260726e-13
Episode: 698 Total Error: 5.115907697472721e-13
Episode: 699 Total Error: 5.080380560684716e-13
Episode: 700 Total Error: 5.080380560684716e-13
Episode: 701 Total Error: 5.080380560684716e-13
Episode: 702 Total Error: 5.080380560684716e-13
Episode: 703 Total Error: 5.080380560684716e-13
Episode: 704 Total Error: 5.080380560684716e-13
Episode: 705 Total Error: 5.080380560684716e-13
Episode: 706 Total Error: 5.080380560684716e-13
Episode: 707 Total Error: 5.080380560684716e-13
Episode: 708 Total Error: 5.080380560684716e-13
Episode: 709 Total Error: 5.080380560684716e-13
Episode: 710 Total Error: 5.080380560684716e-13
Episode: 711 Total Error: 5.080380560684716e-13
Episode: 712 Total Error: 5.080380560684716e-13
Episode: 713 Total Error: 5.080380560684716e-13
Episode: 714 Total Error: 5.080380560684716e-13
Episode: 715 Total Error: 5.080380560684716e-13
Episode: 716 Total Error: 5.080380560684

In [9]:
# Checking Values
# print(action_values)

In [11]:
# Playing using generated policy
steps = 0
env.reset()
print("Step {}".format(steps))
env.printenv()
done = False

while steps < max_steps and not done:
    steps += 1
    action, epsilon = select_action(action_values, env.action_size, state, epsilon)
    new_state, reward, done = env.step(action)
    state = new_state
    print("=====================")
    print("Step {}".format(steps))
    env.printenv()

if done:
    print("=====================")
    print("Goal Reached !!")
    print("=====================")

Step 0
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
|*| | | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
 0 0 0 1 1 1 2 2 1 0
Step 1
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
|*| | | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
 0 0 0 1 1 1 2 2 1 0
Step 2
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | |g| | |
---------------------
| |*| | | | | | | | |
---------------------
| | | | | | | | | | |
-------------