In [46]:
import gymnasium as gym
from gymnasium import spaces
import pandas as pd
import random as rand
import numpy as np
from gymnasium.envs.toy_text.frozen_lake import generate_random_map

actions = {0: 'Left', 2: 'Right', 3: 'Up', 1: 'Down'}

gamma = .8
alpha = .2
episodes = 1000

qtable = pd.DataFrame(
    [[0.0 for _ in range(4)] for _ in range(12)],
    columns=['Left', 'Right', 'Up', 'Down']
)

# Bellman Equation: (1-alpha) * q(s, a) + alpha(R + gamma(max(q(s', a'))))
def calculateQScore(currCell, direction, reward, futureCell):
    # Get the Q-value for the current state and action
    current_q = qtable.iloc[currCell][direction]

    # Get the maximum Q-value for the next state (future state)
    future_q_max = qtable.iloc[futureCell].max()

    # Apply the Bellman equation
    return (1 - alpha) * current_q + alpha * (reward + gamma * future_q_max)

s is start
f is empty
h is lake
g is goal

In [54]:
maze=["FFFG", "SHFH", "FFFF"]

env = gym.make('FrozenLake-v1', desc=maze, is_slippery=False, render_mode="human")
initial_state = env.reset()
env.render()

In [57]:
env.close()

In [48]:
currCell = 5
for episode in range(episodes):
    while True:
        action = rand.randint(0,3)
        new_state, reward, done, _, _ = env.step(action)

        if new_state == 3: # Got to the goal!
            reward = 10
        elif done: # died...
            reward = -10 
        else: # Empty
            reward = .1

        print(calculateQScore(currCell, actions[action], reward, new_state))


        qtable.loc[currCell, actions[action]] = calculateQScore(currCell, actions[action], reward, new_state)

        currCell=new_state

        if done:
            env.reset()
            currCell = 5
            break
    
    print(f"Progress: {episode}/{episodes}")

0.020000000000000004
0.020000000000000004
0.023200000000000002
0.03971200000000001
0.020000000000000004
0.020000000000000004
0.020000000000000004
0.023200000000000002
-1.9968000000000001
Progress: 0/1000
-1.9968000000000001
Progress: 1/1000
0.023712000000000004
0.023712000000000004
0.042763520000000006
0.026842163200000005
0.039200000000000006
-3.59364608
Progress: 2/1000
0.04581176320000001
0.04491392000000001
0.020000000000000004
0.026353920000000003
0.05895582720000001
0.06536406835200001
0.04021662720000001
0.05051606835200001
0.029432932352000004
-1.992670117888
Progress: 3/1000
0.04543293235200001
-3.5868062121984003
Progress: 4/1000
0.06710766149632001
0.06466906693632
0.05193198149632001
0.072003836133376
0.08912368268804097
0.05456000000000001
-4.864179638160589
Progress: 5/1000
0.02808257093632001
0.02808257093632001
0.06984578703360002
-4.858707743919309
Progress: 6/1000
0.05364138267443202
0.05364138267443202
0.03117532592537601
0.056115586665676814
0.07606779525791746
0.09

In [56]:
env.reset()
currCell = 5

actions = {
    'Left' : 0,
    'Right': 2,
    'Up': 3,
    'Down': 1
}

while True:
    action_name = qtable.iloc[currCell].idxmax()
    action_value = actions[action_name]

    currCell, _, done, truncated, info = env.step(action_value)

    env.render()

    if done:
        break
    