In [1]:
import numpy as np
import gymnasium as gym
import random
from tqdm.notebook import tqdm
import cv2 as cv



In [2]:
def epsilonGreedyPolicy(epsilonStart, epsilondEnd, epsilonDecay, episode, maxEpisodes):
    sample = random.random()
    threshold = epsilondEnd + (epsilonStart - epsilondEnd) * np.exp(-epsilonDecay*episode/maxEpisodes)
    if sample > threshold:
        return False
    return True

In [3]:
environment = gym.make("FrozenLake-v1", map_name="4x4", is_slippery=False, render_mode="rgb_array")
nStates = environment.observation_space.n
nActions = environment.action_space.n
QTable = np.zeros((nStates, nActions))
greedyPolicy = lambda state: np.argmax(QTable[state])

In [4]:
trainingEpisodes = 100000
learningRate = 0.5
evaluationEpisodes = 100
discountGamma = 0.98

epsilonStart = 0.99
epsilonEnd = 0.01
epsilonDecay = 10 # Works okay for reaching target value at around 20% of max episodes

In [5]:
for episode in tqdm(range(trainingEpisodes)):
    state, _ = environment.reset()
    
    while True:

        if epsilonGreedyPolicy(epsilonStart, epsilonEnd, epsilonDecay, episode, trainingEpisodes) == True:
            action = environment.action_space.sample()
        else:
            action = greedyPolicy(state)
        
        nextState, reward, terminated, truncated, _ = environment.step(action)
        QTable[state][action] = QTable[state][action] + learningRate*(reward + discountGamma*np.max(QTable[nextState]) - QTable[state][action])

        if terminated or truncated:
            break

        state = nextState

  0%|          | 0/100000 [00:00<?, ?it/s]

In [6]:
QTable

array([[0.88584238, 0.9039208 , 0.9039208 , 0.88584238],
       [0.88584238, 0.        , 0.92236816, 0.9039208 ],
       [0.9039208 , 0.941192  , 0.9039208 , 0.92236816],
       [0.92236816, 0.        , 0.9039208 , 0.9039208 ],
       [0.9039208 , 0.92236816, 0.        , 0.88584238],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.9604    , 0.        , 0.92236816],
       [0.        , 0.        , 0.        , 0.        ],
       [0.92236816, 0.        , 0.941192  , 0.9039208 ],
       [0.92236816, 0.9604    , 0.9604    , 0.        ],
       [0.941192  , 0.98      , 0.        , 0.941192  ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.9604    , 0.98      , 0.941192  ],
       [0.9604    , 0.98      , 1.        , 0.9604    ],
       [0.        , 0.        , 0.        , 0.        ]])

In [7]:
def returnFrames(environment):
    state, _ = environment.reset()
    images = []
    image = environment.render()
    images.append(image)
    while True:
        action = greedyPolicy(state)
        state, _, terminated, truncated, _ = environment.step(action)
        image = environment.render()
        images.append(image)
        if terminated or truncated:
            break
    return images

In [8]:
def saveVideo(images, outputDirectory, fps):
    height, width, _ = images[0].shape
    writer = cv.VideoWriter(outputDirectory, cv.VideoWriter_fourcc(*'H264'), fps, (width, height))
    for image in images:
        bgr_image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
        writer.write(bgr_image)
    writer.release()

In [9]:
saveVideo(returnFrames(environment), "preview.mp4", fps=5)

  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
