In [23]:
from IPython.display import clear_output
from time import sleep

def print_frames(frames):
    for i, frame in enumerate(frames):
        clear_output(wait=True)
        print(frame['frame'])
        print(f"Timestep: {i + 1}")
        print(f"State: {frame['state']}")
        print(f"Action: {frame['action']}")      
        print(f"Reward: {frame['reward']}")
        sleep(.1)

print_frames(frames)

In [27]:
import gym
import numpy as np
import random
import time

env = gym.make("FrozenLake-v0")
frames = []

'''Learning the frozen lake'''

epsilon = .9
total_episodes = 10001
max_steps = 100

lr_rate = 0.81
gamma = .96

Q = np.zeros([env.observation_space.n, env.action_space.n])

def choose_action(state, learning):
    action = 0
    if learning and random.uniform(0,1) < epsilon:
        action = env.action_space.sample()
    else:
        action = np.argmax(Q[state])
    return action

def learn(state, next_state, reward, action):
    predict = Q[state, action]
    target = reward + gamma * np.max(Q[next_state])
    Q[state, action] = Q[state, action] + lr_rate * (target - predict)
    
for episode in range(total_episodes):
    state = env.reset()
    t = 0
    done = False
    
    while not done:
        action = choose_action(state, True)
        state2, reward, done, info = env.step(action)
        
        learn(state, state2, reward, action)
        state = state2
        t+=1
        
    if episode % 100 == 0:
        clear_output(wait=True)
        print(f"episode {episode} complete")
print(Q)

episode 10000 complete
[[6.30575985e-01 6.63503203e-01 6.60829274e-01 6.61261260e-01]
 [1.25177427e-01 5.35183708e-01 5.94037235e-01 6.74649031e-01]
 [6.98955817e-01 6.25263238e-01 7.30143696e-01 6.05568236e-01]
 [9.84614639e-02 5.82533775e-01 1.14326940e-01 5.85477673e-01]
 [5.99072760e-01 6.01926206e-01 5.65440921e-01 5.64110654e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [7.33474318e-01 1.89064234e-04 7.66485219e-01 6.93710720e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [6.59259327e-01 6.09837872e-01 5.89354293e-01 7.03762629e-01]
 [2.61200125e-02 7.25014500e-01 7.77699866e-01 1.43389941e-01]
 [8.10833858e-01 7.49051642e-01 7.62704948e-01 7.62986104e-01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [1.54072615e-01 9.05746780e-01 7.80279087e-01 8.20951448e-01]
 [8.86674965e-01 9.47579227e-01 7.89202813e-01 8.29219325e-01]
 [0.00000000e+00 0.00000000e+00 

In [30]:
'''Evaluate the Frozen lake rearning rate'''

frames = []

state = env.reset()
done = False

for i in range(10):
    done = False
    while not done:
        action = choose_action(state, False)
        state2, reward, done, info = env.step(action)
        state = state2
        frames.append({
            "frame": env.render(mode='ansi'),
            'state': state,
            'action': action,
            'reward': reward,
        })

print_frames(frames)

  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG

Timestep: 11
State: 5
Action: 2
Reward: 0.0
