In [7]:
import gymnasium as gym
import numpy as np
import random
import pandas as pd
import time

In [3]:
#setting up in the custom maze enviorment
desc=["SFFF", "FHHH", "FFFF", "HFHF", "FFGF"]

env = gym.make('FrozenLake-v1', desc=desc, map_name="5x4", is_slippery=False, render_mode="human") 
observation, info = env.reset()

In [9]:
def calculate_percentage_change(old_q_table, new_q_table):
    """
    Calculate the percentage change in Q-values between two Q-tables.

    Parameters:
        old_q_table (numpy.ndarray): The old Q-table.
        new_q_table (numpy.ndarray): The new Q-table.

    Returns:
        float: The percentage change in Q-values.
    """
    q_diff = np.abs(new_q_table - old_q_table)
    avg_change = np.mean(q_diff)
    old_avg = np.mean(old_q_table)
    if old_avg == 0:
        return 100
    else:
        return (avg_change / old_avg) * 100

In [33]:
def runTrial():
    # Define parameters
    gamma = 0.9  # Discount factor
    alpha = 0.1  # Learning rate
    epsilon = 0.7  # Epsilon-greedy parameter
    num_episodes = 1000

    # Initialize Q-values
    num_states = env.observation_space.n
    num_actions = env.action_space.n
    Q = np.zeros((num_states, num_actions))
    
    statistics = pd.DataFrame({
        "Time": [],
        "Episode": [],
        "Q-Difference": [],
        "GotGift": []
    })

    # SARSA algorithm
    
    start_time = time.time()
    for episode in range(num_episodes):
        state = env.reset()[0]
        done = False
        gotGift = False
        steps = []
        
        arr_copy = Q.copy()

        # Choose action using epsilon-greedy policy
        if np.random.rand() < epsilon:
            action = env.action_space.sample()  # Random action
        else:
            action = np.argmax(Q[state])  # Greedy action
            
        if episode > 20 and epsilon > 0.01:
            epsilon *= 0.90

        while not done:
            # Take action and observe next state and reward
            steps.append(action)
            step = env.step(action)
            next_state = step[0]
            reward = step[1]
            done = step[2]

            if reward == 0 and done:
                reward = -1
            elif next_state == state:
                reward = -1

            # Choose next action using epsilon-greedy policy
            if np.random.rand() < epsilon:
                next_action = env.action_space.sample()  # Random action
            else:
                next_action = np.argmax(Q[next_state])  # Greedy action

            # Update Q-value using SARSA update rule
            Q[state, action] += alpha * (reward + gamma * Q[next_state, next_action] - Q[state, action])

            if reward == 1:
                print("Hit gift! At Episode: " + str(episode + 1))
                print(Q)
                gotGift = True
                end_time = time.time()
                elapsed_time = end_time - start_time
                print(calculate_percentage_change(arr_copy, Q) * -1)

            # Move to next state and action
            state = next_state
            action = next_action
            
        change = calculate_percentage_change(arr_copy, Q)
        end_time = time.time()
        elapsed_time = end_time - start_time
        
        statistics.loc[episode] = {
            "Time": elapsed_time,
            "Episode": episode+1,
            "Q-Difference": -change,
            "GotGift": gotGift
          }

        if (-1) * change <= 0.1 and gotGift:
            print(f"Converged on episode {episode}")
            optimal_policy = np.argmax(Q, axis=1)
            print("Optimal policy:")
            break



In [18]:
# Optimal policy
optimal_policy = np.argmax(Q, axis=1)

print("Optimal policy:")
print(optimal_policy)

#prints out how many time it hit the lake, wall, or has completed
print("# of episode completed:" + str(episode + 1))
print("# of times elf completed maze: " + str(completion_num))
print("# of times elf hit the wall: " + str(into_wall))
print("# of times elf went into the lake: " + str(into_lake))

NameError: name 'Q' is not defined

In [None]:
runTrial()

Hit gift! At Episode: 42
[[-0.94299292 -0.09057305 -0.15375981 -0.97281179]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017 -0.02105484 -0.468559   -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439     -0.02621263 -0.02011005]
 [-0.009      -0.0081      0.         -0.3439    ]
 [-0.0171     -0.19        0.          0.        ]
 [-0.009       0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.          0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.1

Hit gift! At Episode: 50
[[-9.55005749e-01 -4.74046660e-02 -1.53759808e-01 -9.72811786e-01]
 [-1.58318451e-01 -4.68559000e-01 -1.45256139e-01 -5.44406168e-01]
 [-1.20355265e-01 -6.12579511e-01 -1.31825981e-01 -6.52440199e-01]
 [-9.32499342e-02 -4.68559000e-01 -3.61533321e-01 -3.62860440e-01]
 [-4.80120171e-01 -1.57425791e-02 -5.21703100e-01 -1.03696071e-01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-1.00000000e-01 -3.43900000e-01 -1.25235941e-02 -5.53475852e-02]
 [-9.00000000e-03 -8.10000000e-03  2.83205565e-04 -3.43900000e-01]
 [-1.71000000e-02 -1.90000000e-01  3.66275115e-03  0.00000000e+00]
 [-9.00000000e-03  3.08543499e-02  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-1.90000000e-01  0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.

Hit gift! At Episode: 59
[[-0.95500575 -0.02972217 -0.15375981 -0.97281179]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017 -0.02205076 -0.5217031  -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439     -0.01771614 -0.05150007]
 [-0.009      -0.0081      0.01115632 -0.40951   ]
 [-0.0171     -0.19        0.0498681   0.        ]
 [-0.009       0.17070803  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.43674495  0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.8

Hit gift! At Episode: 67
[[-0.95500575 -0.02205756 -0.15375981 -0.97281179]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017 -0.01241641 -0.5217031  -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439      0.00786774 -0.05150007]
 [-0.009      -0.0081      0.05581763 -0.40951   ]
 [-0.0171     -0.19        0.15619717  0.        ]
 [-0.009       0.35293828  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.63677074  0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.9

Hit gift! At Episode: 75
[[-0.95500575 -0.01091335 -0.15375981 -0.97281179]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017  0.00908612 -0.5217031  -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439      0.05209791 -0.05150007]
 [-0.009      -0.0081      0.13876403 -0.40951   ]
 [-0.0171     -0.19        0.291377    0.        ]
 [-0.009       0.51298635  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.75921896  0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.9

Hit gift! At Episode: 83
[[-0.95500575  0.0090668  -0.15375981 -0.97634099]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017  0.04734257 -0.5217031  -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439      0.12023351 -0.05150007]
 [-0.009      -0.0081      0.24340149 -0.40951   ]
 [-0.0171     -0.19        0.42053775  0.        ]
 [-0.009       0.62952542  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.82757367  0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.9

Hit gift! At Episode: 91
[[-0.95500575  0.0419491  -0.15375981 -0.97634099]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017  0.10275508 -0.5217031  -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439      0.19958669 -0.05150007]
 [-0.009      -0.0081      0.33028045 -0.40951   ]
 [-0.0171     -0.19        0.49431905  0.        ]
 [-0.009       0.70558833 -0.04230509  0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.86373269  0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.9

Hit gift! At Episode: 99
[[-0.95500575  0.08713237 -0.15375981 -0.97634099]
 [-0.15831845 -0.468559   -0.14525614 -0.54440617]
 [-0.12035527 -0.61257951 -0.13182598 -0.6524402 ]
 [-0.09324993 -0.468559   -0.36153332 -0.36286044]
 [-0.48012017  0.16703693 -0.5217031  -0.10369607]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1        -0.3439      0.27910116 -0.05150007]
 [-0.009      -0.0081      0.42154914 -0.40951   ]
 [-0.0171     -0.19        0.5874962   0.        ]
 [-0.009       0.75179779 -0.04230509  0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.19        0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [-0.1         0.88219697  0.          0.        ]
 [-0.1         0.          0.         -0.1       ]
 [ 0.         -0.1         0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.9