# Reinforcement Learning Example - FrozenLake 

In [2]:
import numpy as np
import gym
import random
import time
from IPython.display import clear_output

In [3]:
env = gym.make("FrozenLake-v0")

In [4]:
action_space_size = env.action_space.n
state_space_size = env.observation_space.n

q_table = np.zeros((state_space_size,action_space_size))
print(q_table)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [16]:
num_episodes = 10000
max_steps_per_episode = 100

learning_rate = 0.1
discount_rate = 0.99

exploration_rate = 1
max_exploration_rate = 1
min_exploration_rate = 0.01
exploration_decay_rate = 0.001

In [17]:
rewards_all_episodes = []

for episode in range(num_episodes):
    state = env.reset()
    
    done = False
    rewards_current_episode = 0
    
    for step in range(max_steps_per_episode):
        exploration_rate_threshold = random.uniform(0,1)
        if exploration_rate_threshold > exploration_rate:
            action = np.argmax(q_table[state,:])
        else:
            action = env.action_space.sample()
        new_state, reward, done, info = env.step(action)
        
        q_table[state, action] = q_table[state, action] * (1-learning_rate) + learning_rate * (reward + discount_rate * np.max(q_table[new_state, :]))
        
        state = new_state 
        rewards_current_episode += reward
        
        if done == True:
            break
    
    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate*episode)
    
    rewards_all_episodes.append(rewards_current_episode)
    
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes),num_episodes/1000)
count=1000

for r in rewards_per_thousand_episodes:
    print(count, ": ", str(sum(r/1000)))
    count += 1000
    
print('********** Q-Table *********')
print(q_table)

1000 :  0.05500000000000004
2000 :  0.20100000000000015
3000 :  0.3970000000000003
4000 :  0.5740000000000004
5000 :  0.6330000000000005
6000 :  0.6370000000000005
7000 :  0.6930000000000005
8000 :  0.6720000000000005
9000 :  0.6790000000000005
10000 :  0.6690000000000005
********** Q-Table *********
[[0.55671891 0.50341981 0.51826799 0.50338286]
 [0.30367464 0.30448326 0.29258121 0.49124698]
 [0.41522852 0.4068427  0.39427538 0.45633171]
 [0.34890752 0.35285997 0.32887003 0.44064188]
 [0.57742486 0.31062218 0.41261394 0.34326743]
 [0.         0.         0.         0.        ]
 [0.17122527 0.12748703 0.37760975 0.18337703]
 [0.         0.         0.         0.        ]
 [0.43619048 0.53471954 0.42883777 0.60215536]
 [0.35191957 0.64411046 0.37197694 0.46416193]
 [0.64641152 0.34435699 0.36482366 0.34035241]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.47996503 0.59668532 0.71003398 0.4828913 ]
 [0.75191428 0.89747425 0.7392755  0.715