In [None]:
from IPython.display import clear_output
from time import sleep

def print_frames(frames):
    for i, frame in enumerate(frames):
        clear_output(wait=True)
        print(frame['frame'])
        print(f"Timestep: {i + 1}")
        print(f"State: {frame['state']}")
        print(f"Action: {frame['action']}")      
        print(f"Reward: {frame['reward']}")
        sleep(.1)

print_frames(frames)

In [None]:
import gym
import numpy as np
import random
import time

env = gym.make("FrozenLake-v0")
frames = []

'''Learning the frozen lake'''

epsilon = .9
total_episodes = 10001
max_steps = 100

lr_rate = 0.81
gamma = .96

Q = np.zeros([env.observation_space.n, env.action_space.n])

def choose_action(state, learning):
    action = 0
    if learning and random.uniform(0,1) < epsilon:
        action = env.action_space.sample()
    else:
        action = np.argmax(Q[state])
    return action

def learn(state, next_state, reward, action):
    predict = Q[state, action]
    target = reward + gamma * np.max(Q[next_state])
    Q[state, action] = Q[state, action] + lr_rate * (target - predict)
    
for episode in range(total_episodes):
    state = env.reset()
    t = 0
    done = False
    
    while not done:
        action = choose_action(state, True)
        state2, reward, done, info = env.step(action)
        
        learn(state, state2, reward, action)
        state = state2
        t+=1
        
    if episode % 100 == 0:
        clear_output(wait=True)
        print(f"episode {episode} complete")
print(Q)

In [None]:
'''Evaluate the Frozen lake rearning rate'''

frames = []

state = env.reset()
done = False

for i in range(10):
    done = False
    while not done:
        action = choose_action(state, False)
        state2, reward, done, info = env.step(action)
        state = state2
        frames.append({
            "frame": env.render(mode='ansi'),
            'state': state,
            'action': action,
            'reward': reward,
        })

print_frames(frames)

# DQL Implementation

In [2]:
import gym
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
env = gym.make('FrozenLake-v0')

In [4]:
tf.reset_default_graph()