# Frozen Lake

The following tutorial has been used to implement Q-Learning:
https://www.kaggle.com/sarjit07/reinforcement-learning-using-q-table-frozenlake

## Q-Learning

In [None]:
#!pip install torch===1.4.0 torchvision===0.5.0 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
import gym
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output

In [None]:
#using non slippery frozen lake, modify registers
from gym.envs.registration import register
register(
    id='FrozenLakeNotSlippery-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery': False},
)

#Load the game environment and render what it looks like
env = gym.make('FrozenLakeNotSlippery-v0')


env.render()

In [None]:
# Total number of States and Actions
number_of_states = env.observation_space.n
number_of_actions = env.action_space.n

print( "States = ", number_of_states)
print( "Actions = ", number_of_actions)

num_episodes = 1000
steps_total = []
rewards_total = []


In [None]:
#if the gamma/ discount rate is 0, agent will only account current reward
#if the gamma/ discount rate is 1, agent will account future rewards too
gamma = 0.95

#if the learning rate is 0, agent will pick next action based on past learning.
#if the learning rate is 1, agent will pick the next action based on current situation.
learning_rate = 0.9

In [None]:
# Set the agent to initially start with 70% of actions being random, to explore the environment.
# Adjust epison as more actions are learned from past experience.
egreedy = 0.7
egreedy_final = 0.1
egreedy_decay = 0.999

In [None]:
# initialise Q-Table with 0s, and print it. Rows represent the states, and columns represent the actions.
Q = torch.zeros([number_of_states, number_of_actions])
Q


In [None]:
# training agent

for i_episode in range(num_episodes):
    
    # resets the environment
    state = env.reset()
    step = 0

    while True:
        #increment the timestep
        step += 1
        
        random_for_egreedy = torch.rand(1)[0]
        
        #decide whether to pick a random action or use already computed Q-values.
        if random_for_egreedy > egreedy:      
            random_values = Q[state] + torch.rand(1,number_of_actions) / 1000      
            action = torch.max(random_values,1)[1][0]  
            action = action.item()
        else:
            action = env.action_space.sample()
            
        if egreedy > egreedy_final:
            egreedy *= egreedy_decay
        
        #execute the chosen action
        new_state, reward, done, info = env.step(action)

        # calculate and update q-value for current state and action
        Q[state, action] = reward + gamma * torch.max(Q[new_state])
        
        #set the next state
        state = new_state
        
        # env.render()
        # time.sleep(0.4)
        
        if done:
            #update total timesteps, 
            steps_total.append(step)
            rewards_total.append(reward)
            
            if i_episode % 10 == 0:
                clear_output(wait=True)
                print('Episode: {} Reward: {} Steps Taken: {}'.format(i_episode,reward, step))
            break
            
print("Training finished.\n")

In [None]:
#print final Q-table
print(Q)

print("Episodes finished successfully: {0}".format(sum(rewards_total)))
print("Percent of episodes finished successfully: {0}".format(sum(rewards_total)/num_episodes)*100)

print("Average number of steps: %.2f" % (sum(steps_total)/num_episodes))


In [None]:
#generate plot
sns.lineplot(range(len(steps_total)),steps_total)
plt.xlabel("Episode")
plt.ylabel("Steps")
plt.title("FrozenLake-Problem")
plt.show()