In [1]:
from random import *
from gridworld import gridworld_agent
import numpy as np

In [2]:
def select_action(action_values, action_size, state, eps):
    eps = 0.995*eps # GLIE
    eps_val = random()
    if eps_val > eps:
        return np.argmax(action_values[state[0], state[1], :]), eps
    else:
        return randint(0, action_size-1), eps

In [3]:
# INITIALISING GRID
rows = 7
cols = 10
winds = [0, 0, 0, 1, 1, 1, 2, 2, 1, 0]
start = [3, 0]
goal = [3, 7]
action_mode = "king"
env = gridworld_agent(rows, cols, winds, start, goal, action_mode)

In [4]:
# Q-Table and Eligibility Values for backward view SARSA
action_values = np.zeros([env.rows,env.cols,env.action_size])
eligibility_trace = np.zeros([env.rows,env.cols,env.action_size])

In [5]:
max_episodes = 1000
gamma = 0.9
lr = 0.1
max_steps = 100
epsilon = 0.7 # For Epsilon-Greedy exploration
lmbd = 0.8

In [6]:
for i in range(max_episodes):
    print("Episode: {}".format(i), end=' ')
    state = env.reset()
    # Reinitialise Eligibility Trace for this episode
    eligibility_trace[:, :, :] = 0
    
    steps = 0
    # Epsilon-Greedy Policy
    action, epsilon = select_action(action_values, env.action_size, state, epsilon)
    tot_err = 0
    done = False

    while steps < max_steps and not done:
        steps += 1
        # Taking Action and Observing reward
        new_state, reward, done = env.step(action)
        new_action, epsilon = select_action(action_values, env.action_size, new_state, epsilon)
        
        # Modifying State-action values according to Bellman Equation
        err = reward + gamma*action_values[new_state[0], new_state[1], new_action] - action_values[state[0], state[1], action]
        
        # Updating Eligibility
        eligibility_trace[state[0], state[1], action] += 1
        
        # Update all action-state values based on their eligibility traces
        for i in range(env.rows):
            for j in range(env.cols):
                for k in range(env.action_size):
                    action_values[i, j, k] += lr*err*eligibility_trace[i, j, k]
                    # Update Eligibility Trace
                    eligibility_trace[i, j, k] *= gamma*lmbd
        state = new_state
        action = new_action
        tot_err+=err
        
    print("Total Error: {}".format(tot_err))

Episode: 0 Total Error: -98.09423396337915
Episode: 1 Total Error: -96.61185760693115
Episode: 2 Total Error: -92.32604315605579
Episode: 3 Total Error: -91.66661933256623
Episode: 4 Total Error: -92.89535543469162
Episode: 5 Total Error: -89.50761785311579
Episode: 6 Total Error: -88.76444883265589
Episode: 7 Total Error: -83.6434879134675
Episode: 8 Total Error: -87.7023059398965
Episode: 9 Total Error: -87.93799185618937
Episode: 10 Total Error: -94.41062606179577
Episode: 11 Total Error: -86.97099629240537
Episode: 12 Total Error: -81.3607172188195
Episode: 13 Total Error: -83.42542022626928
Episode: 14 Total Error: -78.82883466827151
Episode: 15 Total Error: -85.33879557088925
Episode: 16 Total Error: -73.43820387808714
Episode: 17 Total Error: -74.72431867754445
Episode: 18 Total Error: -78.35581718236128
Episode: 19 Total Error: -80.18639787522955
Episode: 20 Total Error: -73.28800070400807
Episode: 21 Total Error: -77.07673954411803
Episode: 22 Total Error: -64.89097422825651
E

Episode: 187 Total Error: 1.5675789284583956
Episode: 188 Total Error: 1.472170100796749
Episode: 189 Total Error: 1.3822731791826648
Episode: 190 Total Error: 1.2975917659203056
Episode: 191 Total Error: 1.217843766381126
Episode: 192 Total Error: 1.1427608204533328
Episode: 193 Total Error: 1.0720877473013068
Episode: 194 Total Error: 1.0055820039104546
Episode: 195 Total Error: 0.9430131578037404
Episode: 196 Total Error: 0.8841623742338491
Episode: 197 Total Error: 0.8288219180787131
Episode: 198 Total Error: 0.7767946705986297
Episode: 199 Total Error: 0.7278936611480837
Episode: 200 Total Error: 0.6819416138778784
Episode: 201 Total Error: 0.6387705094094116
Episode: 202 Total Error: 0.5982211614149584
Episode: 203 Total Error: 0.5601428079943744
Episode: 204 Total Error: 0.5243927176997829
Episode: 205 Total Error: 0.4908358100253132
Episode: 206 Total Error: 0.45934429014801026
Episode: 207 Total Error: 0.4297972976794604
Episode: 208 Total Error: 0.4020805691639886
Episode: 20

Episode: 364 Total Error: 2.959829259907565e-06
Episode: 365 Total Error: 2.726962559762569e-06
Episode: 366 Total Error: 2.5122789217846275e-06
Episode: 367 Total Error: 2.3143701177730236e-06
Episode: 368 Total Error: 2.1319361316329832e-06
Episode: 369 Total Error: 1.9637766612845553e-06
Episode: 370 Total Error: 1.8087838640212794e-06
Episode: 371 Total Error: 1.6659349100223153e-06
Episode: 372 Total Error: 1.5342859249756202e-06
Episode: 373 Total Error: 1.4129656271677504e-06
Episode: 374 Total Error: 1.3011699913079156e-06
Episode: 375 Total Error: 1.1981570544605802e-06
Episode: 376 Total Error: 1.1032421554091343e-06
Episode: 377 Total Error: 1.0157935470544999e-06
Episode: 378 Total Error: 9.352283747432466e-07
Episode: 379 Total Error: 8.610088855220965e-07
Episode: 380 Total Error: 7.926389855583693e-07
Episode: 381 Total Error: 7.296611244100859e-07
Episode: 382 Total Error: 6.716531579797902e-07
Episode: 383 Total Error: 6.182257372699951e-07
Episode: 384 Total Error: 5.

Episode: 559 Total Error: 3.943512183468556e-13
Episode: 560 Total Error: 3.801403636316536e-13
Episode: 561 Total Error: 3.659295089164516e-13
Episode: 562 Total Error: 3.481659405224491e-13
Episode: 563 Total Error: 3.410605131648481e-13
Episode: 564 Total Error: 3.339550858072471e-13
Episode: 565 Total Error: 3.268496584496461e-13
Episode: 566 Total Error: 3.197442310920451e-13
Episode: 567 Total Error: 3.126388037344441e-13
Episode: 568 Total Error: 3.055333763768431e-13
Episode: 569 Total Error: 2.984279490192421e-13
Episode: 570 Total Error: 2.877698079828406e-13
Episode: 571 Total Error: 2.8421709430404007e-13
Episode: 572 Total Error: 2.8066438062523957e-13
Episode: 573 Total Error: 2.7711166694643907e-13
Episode: 574 Total Error: 2.7355895326763857e-13
Episode: 575 Total Error: 2.7000623958883807e-13
Episode: 576 Total Error: 2.6645352591003757e-13
Episode: 577 Total Error: 2.6290081223123707e-13
Episode: 578 Total Error: 2.5934809855243657e-13
Episode: 579 Total Error: 2.5579

Episode: 743 Total Error: 2.4868995751603507e-13
Episode: 744 Total Error: 2.4868995751603507e-13
Episode: 745 Total Error: 2.4868995751603507e-13
Episode: 746 Total Error: 2.4868995751603507e-13
Episode: 747 Total Error: 2.4868995751603507e-13
Episode: 748 Total Error: 2.4868995751603507e-13
Episode: 749 Total Error: 2.4868995751603507e-13
Episode: 750 Total Error: 2.4868995751603507e-13
Episode: 751 Total Error: 2.4868995751603507e-13
Episode: 752 Total Error: 2.4868995751603507e-13
Episode: 753 Total Error: 2.4868995751603507e-13
Episode: 754 Total Error: 2.4868995751603507e-13
Episode: 755 Total Error: 2.4868995751603507e-13
Episode: 756 Total Error: 2.4868995751603507e-13
Episode: 757 Total Error: 2.4868995751603507e-13
Episode: 758 Total Error: 2.4868995751603507e-13
Episode: 759 Total Error: 2.4868995751603507e-13
Episode: 760 Total Error: 2.4868995751603507e-13
Episode: 761 Total Error: 2.4868995751603507e-13
Episode: 762 Total Error: 2.4868995751603507e-13
Episode: 763 Total E

Episode: 926 Total Error: 2.4868995751603507e-13
Episode: 927 Total Error: 2.4868995751603507e-13
Episode: 928 Total Error: 2.4868995751603507e-13
Episode: 929 Total Error: 2.4868995751603507e-13
Episode: 930 Total Error: 2.4868995751603507e-13
Episode: 931 Total Error: 2.4868995751603507e-13
Episode: 932 Total Error: 2.4868995751603507e-13
Episode: 933 Total Error: 2.4868995751603507e-13
Episode: 934 Total Error: 2.4868995751603507e-13
Episode: 935 Total Error: 2.4868995751603507e-13
Episode: 936 Total Error: 2.4868995751603507e-13
Episode: 937 Total Error: 2.4868995751603507e-13
Episode: 938 Total Error: 2.4868995751603507e-13
Episode: 939 Total Error: 2.4868995751603507e-13
Episode: 940 Total Error: 2.4868995751603507e-13
Episode: 941 Total Error: 2.4868995751603507e-13
Episode: 942 Total Error: 2.4868995751603507e-13
Episode: 943 Total Error: 2.4868995751603507e-13
Episode: 944 Total Error: 2.4868995751603507e-13
Episode: 945 Total Error: 2.4868995751603507e-13
Episode: 946 Total E

In [7]:
# Checking Values
# print(action_values)

In [9]:
# Playing using generated policy
steps = 0
env.reset()
print("Step {}".format(steps))
env.printenv()
done = False

while steps < max_steps and not done:
    steps += 1
    action, epsilon = select_action(action_values, env.action_size, state, 0)
    new_state, reward, done = env.step(action)
    state = new_state
    print("=====================")
    print("Step {}".format(steps))
    env.printenv()

if done:
    print("=====================")
    print("Goal Reached !!")
    print("=====================")

Step 0
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
|*| | | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
 0 0 0 1 1 1 2 2 1 0
Step 1
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
|*| | | | | | | | | |
---------------------
| | | | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
 0 0 0 1 1 1 2 2 1 0
Step 2
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
---------------------
| |*| | | | | |g| | |
---------------------
| | | | | | | | | | |
---------------------
| | | | | | | | | | |
-------------