In [24]:
from IPython.display import HTML
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/q2ZOEFAaaI0?showinfo=0" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>')

In [25]:
import numpy as np
import gym
import random

In [26]:
env = gym.make("FrozenLake-v0")

In [27]:
action_size = env.action_space.n
state_size = env.observation_space.n

In [28]:
qtable = np.zeros((state_size, action_size))
print(qtable)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [29]:
total_episodes = 15000        # Total episodes
learning_rate = 0.8           # Learning rate
max_steps = 99                # Max steps per episode
gamma = 0.95                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.005             # Exponential decay rate for exploration prob

In [31]:
# List of rewards
rewards = []

# 2 For life or until learning is stopped
for episode in range(total_episodes):
    # Reset the environment
    state = env.reset()
    step = 0
    done = False
    total_rewards = 0
    
    for step in range(max_steps):
        # 3. Choose an action a in the current world state (s)
        ## First we randomize a number
        exp_exp_tradeoff = random.uniform(0, 1)
        
        ## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this state)
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])

        # Else doing a random choice --> exploration
        else:
            action = env.action_space.sample()

        # Take the action (a) and observe the outcome state(s') and reward (r)
        new_state, reward, done, info = env.step(action)
        if reward>0 :
            print(episode,step,state,action,reward,new_state)

        # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]
        # qtable[new_state,:] : all the actions we can take from new state
        qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
        
        total_rewards += reward
        
        # Our new state is state
        state = new_state
        
        # If done (if we're dead) : finish episode
        if done == True: 
            break
        
    # Reduce epsilon (because we need less and less exploration)
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode) 
    rewards.append(total_rewards)

print ("Score over time: " +  str(sum(rewards)/total_episodes))
print(qtable)

0 47 14 1 1.0 15
142 8 14 1 1.0 15
148 12 14 1 1.0 15
188 19 14 1 1.0 15
257 9 14 1 1.0 15
270 16 14 1 1.0 15
327 16 14 2 1.0 15
329 27 14 2 1.0 15
337 40 14 3 1.0 15
355 27 14 3 1.0 15
361 21 14 3 1.0 15
369 22 14 2 1.0 15
400 7 14 1 1.0 15
402 9 14 1 1.0 15
408 21 14 1 1.0 15
419 9 14 1 1.0 15
423 10 14 1 1.0 15
424 20 14 1 1.0 15
428 29 14 1 1.0 15
431 22 14 1 1.0 15
441 17 14 2 1.0 15
443 10 14 2 1.0 15
446 43 14 1 1.0 15
451 25 14 1 1.0 15
458 13 14 1 1.0 15
462 30 14 1 1.0 15
468 18 14 1 1.0 15
470 26 14 1 1.0 15
476 17 14 1 1.0 15
477 6 14 1 1.0 15
479 20 14 1 1.0 15
480 52 14 1 1.0 15
487 15 14 1 1.0 15
490 41 14 2 1.0 15
504 7 14 2 1.0 15
506 58 14 2 1.0 15
508 13 14 1 1.0 15
520 18 14 1 1.0 15
526 38 14 1 1.0 15
528 14 14 1 1.0 15
541 29 14 2 1.0 15
545 12 14 2 1.0 15
547 12 14 2 1.0 15
548 10 14 2 1.0 15
555 16 14 2 1.0 15
558 20 14 2 1.0 15
568 11 14 2 1.0 15
574 16 14 2 1.0 15
588 29 14 1 1.0 15
593 5 14 1 1.0 15
596 25 14 1 1.0 15
613 33 14 1 1.0 15
619 10 14 1 1.0 15
623

1549 18 14 1 1.0 15
1551 39 14 1 1.0 15
1553 5 14 1 1.0 15
1554 29 14 1 1.0 15
1558 7 14 1 1.0 15
1560 18 14 1 1.0 15
1566 87 14 1 1.0 15
1567 40 14 1 1.0 15
1568 40 14 1 1.0 15
1569 21 14 1 1.0 15
1571 39 14 1 1.0 15
1572 24 14 1 1.0 15
1575 82 14 1 1.0 15
1576 31 14 1 1.0 15
1577 20 14 1 1.0 15
1578 20 14 1 1.0 15
1580 16 14 1 1.0 15
1581 11 14 1 1.0 15
1583 18 14 1 1.0 15
1584 36 14 1 1.0 15
1587 44 14 1 1.0 15
1589 18 14 1 1.0 15
1590 46 14 1 1.0 15
1592 37 14 1 1.0 15
1593 12 14 1 1.0 15
1596 18 14 1 1.0 15
1597 38 14 1 1.0 15
1598 59 14 3 1.0 15
1599 16 14 3 1.0 15
1600 67 14 3 1.0 15
1602 33 14 1 1.0 15
1603 97 14 1 1.0 15
1604 12 14 1 1.0 15
1605 23 14 1 1.0 15
1607 60 14 1 1.0 15
1609 12 14 1 1.0 15
1611 15 14 1 1.0 15
1612 44 14 1 1.0 15
1613 69 14 1 1.0 15
1615 42 14 1 1.0 15
1617 31 14 1 1.0 15
1618 12 14 1 1.0 15
1619 25 14 1 1.0 15
1621 39 14 1 1.0 15
1623 67 14 1 1.0 15
1626 82 14 1 1.0 15
1630 48 14 1 1.0 15
1631 69 14 1 1.0 15
1633 24 14 1 1.0 15
1635 30 14 1 1.0 15
16

2416 59 14 1 1.0 15
2417 21 14 1 1.0 15
2418 34 14 1 1.0 15
2419 51 14 1 1.0 15
2421 32 14 1 1.0 15
2422 73 14 1 1.0 15
2425 41 14 1 1.0 15
2428 13 14 1 1.0 15
2429 49 14 1 1.0 15
2432 38 14 1 1.0 15
2433 10 14 1 1.0 15
2434 18 14 1 1.0 15
2435 43 14 1 1.0 15
2436 23 14 1 1.0 15
2437 20 14 1 1.0 15
2441 50 14 1 1.0 15
2447 22 14 1 1.0 15
2448 10 14 1 1.0 15
2449 27 14 1 1.0 15
2450 10 14 1 1.0 15
2451 33 14 1 1.0 15
2456 5 14 1 1.0 15
2459 13 14 3 1.0 15
2461 22 14 3 1.0 15
2462 26 14 3 1.0 15
2463 38 14 3 1.0 15
2466 17 14 3 1.0 15
2467 73 14 3 1.0 15
2468 10 14 3 1.0 15
2469 23 14 3 1.0 15
2471 23 14 3 1.0 15
2473 14 14 1 1.0 15
2475 13 14 1 1.0 15
2477 54 14 1 1.0 15
2478 17 14 1 1.0 15
2482 10 14 1 1.0 15
2483 26 14 1 1.0 15
2486 85 14 1 1.0 15
2490 29 14 1 1.0 15
2491 11 14 1 1.0 15
2494 9 14 1 1.0 15
2495 11 14 1 1.0 15
2497 97 14 1 1.0 15
2502 23 14 1 1.0 15
2503 32 14 1 1.0 15
2504 48 14 1 1.0 15
2506 60 14 1 1.0 15
2508 16 14 1 1.0 15
2511 23 14 1 1.0 15
2515 8 14 1 1.0 15
251

3291 29 14 1 1.0 15
3294 81 14 2 1.0 15
3298 9 14 3 1.0 15
3299 20 14 1 1.0 15
3300 20 14 1 1.0 15
3301 73 14 1 1.0 15
3303 16 14 1 1.0 15
3306 58 14 1 1.0 15
3307 12 14 1 1.0 15
3308 22 14 1 1.0 15
3310 10 14 1 1.0 15
3311 24 14 1 1.0 15
3315 37 14 1 1.0 15
3317 7 14 1 1.0 15
3318 55 14 1 1.0 15
3319 36 14 1 1.0 15
3320 31 14 1 1.0 15
3324 9 14 1 1.0 15
3327 11 14 1 1.0 15
3328 33 14 1 1.0 15
3329 42 14 1 1.0 15
3330 36 14 1 1.0 15
3331 88 14 1 1.0 15
3332 27 14 1 1.0 15
3334 17 14 1 1.0 15
3336 58 14 1 1.0 15
3337 14 14 1 1.0 15
3338 16 14 1 1.0 15
3339 63 14 1 1.0 15
3341 33 14 1 1.0 15
3342 25 14 1 1.0 15
3345 37 14 1 1.0 15
3346 46 14 1 1.0 15
3350 7 14 1 1.0 15
3352 47 14 1 1.0 15
3353 22 14 1 1.0 15
3354 40 14 1 1.0 15
3357 8 14 1 1.0 15
3359 15 14 1 1.0 15
3361 86 14 1 1.0 15
3362 13 14 1 1.0 15
3363 20 14 1 1.0 15
3365 34 14 1 1.0 15
3367 9 14 1 1.0 15
3369 50 14 1 1.0 15
3370 30 14 1 1.0 15
3373 56 14 1 1.0 15
3374 11 14 1 1.0 15
3375 20 14 1 1.0 15
3376 36 14 1 1.0 15
3378 8

4122 30 14 3 1.0 15
4124 63 14 3 1.0 15
4125 33 14 3 1.0 15
4126 14 14 3 1.0 15
4127 43 14 3 1.0 15
4128 7 14 3 1.0 15
4130 16 14 3 1.0 15
4131 20 14 3 1.0 15
4136 55 14 3 1.0 15
4138 31 14 3 1.0 15
4141 20 14 3 1.0 15
4150 18 14 3 1.0 15
4151 29 14 3 1.0 15
4152 41 14 3 1.0 15
4155 15 14 3 1.0 15
4156 24 14 3 1.0 15
4158 26 14 3 1.0 15
4159 6 14 3 1.0 15
4161 20 14 3 1.0 15
4162 74 14 3 1.0 15
4166 27 14 1 1.0 15
4169 9 14 1 1.0 15
4170 25 14 1 1.0 15
4171 9 14 1 1.0 15
4172 45 14 1 1.0 15
4175 74 14 1 1.0 15
4180 17 14 3 1.0 15
4185 18 14 3 1.0 15
4186 23 14 3 1.0 15
4188 42 14 3 1.0 15
4189 25 14 3 1.0 15
4190 46 14 3 1.0 15
4192 14 14 3 1.0 15
4193 8 14 3 1.0 15
4196 25 14 1 1.0 15
4198 14 14 1 1.0 15
4203 48 14 1 1.0 15
4205 30 14 1 1.0 15
4206 8 14 1 1.0 15
4207 9 14 1 1.0 15
4208 43 14 1 1.0 15
4209 96 14 1 1.0 15
4211 41 14 1 1.0 15
4212 12 14 1 1.0 15
4213 43 14 1 1.0 15
4214 89 14 1 1.0 15
4215 51 14 1 1.0 15
4216 29 14 1 1.0 15
4220 15 14 1 1.0 15
4223 69 14 3 1.0 15
4224 20

5010 32 14 1 1.0 15
5011 20 14 1 1.0 15
5012 15 14 1 1.0 15
5013 23 14 1 1.0 15
5014 58 14 1 1.0 15
5015 82 14 1 1.0 15
5017 40 14 1 1.0 15
5018 5 14 1 1.0 15
5019 30 14 1 1.0 15
5020 21 14 1 1.0 15
5022 16 14 1 1.0 15
5023 48 14 1 1.0 15
5026 16 14 1 1.0 15
5028 61 14 1 1.0 15
5029 42 14 1 1.0 15
5030 18 14 1 1.0 15
5035 13 14 1 1.0 15
5037 44 14 1 1.0 15
5038 19 14 1 1.0 15
5039 74 14 1 1.0 15
5040 13 14 1 1.0 15
5042 17 14 1 1.0 15
5043 9 14 1 1.0 15
5045 51 14 1 1.0 15
5047 62 14 1 1.0 15
5048 50 14 1 1.0 15
5049 55 14 1 1.0 15
5051 36 14 1 1.0 15
5052 45 14 1 1.0 15
5055 24 14 1 1.0 15
5056 84 14 1 1.0 15
5057 29 14 1 1.0 15
5058 15 14 1 1.0 15
5060 86 14 1 1.0 15
5061 56 14 1 1.0 15
5062 23 14 1 1.0 15
5063 9 14 1 1.0 15
5064 45 14 1 1.0 15
5065 24 14 1 1.0 15
5066 10 14 1 1.0 15
5068 20 14 1 1.0 15
5069 11 14 1 1.0 15
5070 24 14 1 1.0 15
5071 22 14 1 1.0 15
5072 54 14 1 1.0 15
5073 29 14 1 1.0 15
5075 39 14 1 1.0 15
5076 59 14 1 1.0 15
5077 40 14 1 1.0 15
5079 33 14 1 1.0 15
508

5867 40 14 1 1.0 15
5868 35 14 1 1.0 15
5870 47 14 1 1.0 15
5872 47 14 1 1.0 15
5875 70 14 1 1.0 15
5877 35 14 1 1.0 15
5882 56 14 1 1.0 15
5884 16 14 1 1.0 15
5885 40 14 1 1.0 15
5893 49 14 1 1.0 15
5897 22 14 1 1.0 15
5900 21 14 1 1.0 15
5901 50 14 1 1.0 15
5902 10 14 1 1.0 15
5903 80 14 1 1.0 15
5905 9 14 1 1.0 15
5907 29 14 1 1.0 15
5908 12 14 1 1.0 15
5909 30 14 1 1.0 15
5910 15 14 1 1.0 15
5911 24 14 1 1.0 15
5912 9 14 1 1.0 15
5913 21 14 1 1.0 15
5914 60 14 1 1.0 15
5915 71 14 1 1.0 15
5918 21 14 1 1.0 15
5919 11 14 1 1.0 15
5920 12 14 1 1.0 15
5921 30 14 1 1.0 15
5922 23 14 1 1.0 15
5923 20 14 1 1.0 15
5924 78 14 1 1.0 15
5926 77 14 1 1.0 15
5927 58 14 1 1.0 15
5929 79 14 1 1.0 15
5932 35 14 1 1.0 15
5933 28 14 1 1.0 15
5934 24 14 1 1.0 15
5936 68 14 1 1.0 15
5937 11 14 1 1.0 15
5938 22 14 1 1.0 15
5939 43 14 1 1.0 15
5940 34 14 1 1.0 15
5942 17 14 1 1.0 15
5943 27 14 1 1.0 15
5944 20 14 1 1.0 15
5945 86 14 1 1.0 15
5946 70 14 1 1.0 15
5947 11 14 1 1.0 15
5949 15 14 1 1.0 15
59

6710 10 14 1 1.0 15
6712 41 14 1 1.0 15
6713 30 14 1 1.0 15
6714 48 14 1 1.0 15
6716 40 14 1 1.0 15
6717 19 14 1 1.0 15
6718 18 14 1 1.0 15
6719 33 14 1 1.0 15
6721 55 14 1 1.0 15
6722 43 14 1 1.0 15
6723 54 14 1 1.0 15
6725 28 14 1 1.0 15
6726 39 14 1 1.0 15
6728 37 14 1 1.0 15
6732 18 14 1 1.0 15
6734 26 14 1 1.0 15
6735 51 14 1 1.0 15
6736 14 14 1 1.0 15
6737 33 14 1 1.0 15
6738 26 14 1 1.0 15
6739 34 14 1 1.0 15
6740 29 14 1 1.0 15
6741 7 14 1 1.0 15
6742 38 14 1 1.0 15
6743 42 14 1 1.0 15
6744 11 14 1 1.0 15
6747 14 14 1 1.0 15
6748 72 14 1 1.0 15
6749 54 14 1 1.0 15
6751 9 14 1 1.0 15
6752 70 14 1 1.0 15
6754 23 14 1 1.0 15
6755 11 14 1 1.0 15
6758 15 14 1 1.0 15
6759 20 14 1 1.0 15
6762 5 14 1 1.0 15
6773 18 14 1 1.0 15
6775 26 14 1 1.0 15
6776 27 14 1 1.0 15
6777 25 14 1 1.0 15
6779 37 14 1 1.0 15
6780 54 14 1 1.0 15
6781 54 14 1 1.0 15
6785 17 14 3 1.0 15
6787 21 14 3 1.0 15
6789 93 14 1 1.0 15
6792 13 14 1 1.0 15
6793 78 14 1 1.0 15
6794 27 14 1 1.0 15
6795 15 14 1 1.0 15
679

7628 76 14 1 1.0 15
7629 91 14 1 1.0 15
7632 77 14 1 1.0 15
7633 25 14 1 1.0 15
7634 21 14 1 1.0 15
7638 33 14 1 1.0 15
7640 84 14 1 1.0 15
7641 16 14 1 1.0 15
7642 19 14 1 1.0 15
7644 11 14 1 1.0 15
7645 33 14 1 1.0 15
7648 14 14 1 1.0 15
7649 19 14 1 1.0 15
7650 59 14 1 1.0 15
7653 84 14 1 1.0 15
7654 66 14 1 1.0 15
7656 18 14 1 1.0 15
7657 76 14 1 1.0 15
7658 37 14 1 1.0 15
7660 41 14 1 1.0 15
7662 77 14 1 1.0 15
7663 15 14 1 1.0 15
7666 42 14 1 1.0 15
7667 8 14 1 1.0 15
7669 32 14 1 1.0 15
7670 26 14 1 1.0 15
7673 47 14 1 1.0 15
7674 7 14 1 1.0 15
7676 57 14 1 1.0 15
7677 15 14 1 1.0 15
7678 20 14 1 1.0 15
7679 10 14 1 1.0 15
7680 78 14 1 1.0 15
7681 15 14 1 1.0 15
7682 36 14 2 1.0 15
7684 13 14 2 1.0 15
7686 10 14 1 1.0 15
7688 44 14 1 1.0 15
7689 34 14 1 1.0 15
7691 14 14 1 1.0 15
7696 13 14 1 1.0 15
7697 13 14 1 1.0 15
7700 40 14 1 1.0 15
7702 13 14 1 1.0 15
7705 28 14 1 1.0 15
7706 14 14 1 1.0 15
7709 11 14 1 1.0 15
7710 60 14 1 1.0 15
7712 11 14 1 1.0 15
7713 15 14 1 1.0 15
77

8428 51 14 1 1.0 15
8430 27 14 1 1.0 15
8431 36 14 1 1.0 15
8432 25 14 1 1.0 15
8433 21 14 1 1.0 15
8434 30 14 1 1.0 15
8435 18 14 1 1.0 15
8437 10 14 1 1.0 15
8439 13 14 1 1.0 15
8440 27 14 1 1.0 15
8441 18 14 1 1.0 15
8442 30 14 1 1.0 15
8443 14 14 1 1.0 15
8444 37 14 1 1.0 15
8445 46 14 1 1.0 15
8446 12 14 1 1.0 15
8447 19 14 1 1.0 15
8448 8 14 1 1.0 15
8452 42 14 1 1.0 15
8457 43 14 1 1.0 15
8460 51 14 1 1.0 15
8461 9 14 1 1.0 15
8462 22 14 1 1.0 15
8463 81 14 1 1.0 15
8464 45 14 1 1.0 15
8466 41 14 1 1.0 15
8470 73 14 1 1.0 15
8474 16 14 1 1.0 15
8482 75 14 1 1.0 15
8483 24 14 1 1.0 15
8484 12 14 1 1.0 15
8485 56 14 1 1.0 15
8487 36 14 1 1.0 15
8488 18 14 1 1.0 15
8490 14 14 1 1.0 15
8491 38 14 1 1.0 15
8495 26 14 1 1.0 15
8497 35 14 1 1.0 15
8498 44 14 1 1.0 15
8499 14 14 1 1.0 15
8501 27 14 1 1.0 15
8502 47 14 1 1.0 15
8503 34 14 1 1.0 15
8504 20 14 1 1.0 15
8506 11 14 1 1.0 15
8507 65 14 1 1.0 15
8509 7 14 1 1.0 15
8512 43 14 1 1.0 15
8513 30 14 1 1.0 15
8514 13 14 1 1.0 15
851

9244 46 14 1 1.0 15
9253 7 14 1 1.0 15
9254 41 14 1 1.0 15
9255 15 14 1 1.0 15
9258 10 14 1 1.0 15
9259 31 14 1 1.0 15
9260 47 14 1 1.0 15
9262 27 14 1 1.0 15
9264 24 14 1 1.0 15
9265 25 14 1 1.0 15
9266 58 14 1 1.0 15
9267 10 14 1 1.0 15
9270 29 14 1 1.0 15
9271 14 14 1 1.0 15
9273 49 14 1 1.0 15
9277 9 14 2 1.0 15
9278 29 14 2 1.0 15
9280 19 14 2 1.0 15
9282 7 14 2 1.0 15
9283 78 14 2 1.0 15
9284 62 14 2 1.0 15
9285 12 14 2 1.0 15
9286 12 14 2 1.0 15
9288 48 14 2 1.0 15
9290 51 14 2 1.0 15
9292 28 14 2 1.0 15
9294 13 14 2 1.0 15
9298 32 14 2 1.0 15
9301 59 14 2 1.0 15
9302 24 14 2 1.0 15
9303 89 14 2 1.0 15
9306 47 14 2 1.0 15
9310 9 14 2 1.0 15
9311 33 14 2 1.0 15
9313 36 14 2 1.0 15
9317 16 14 2 1.0 15
9322 58 14 2 1.0 15
9325 22 14 2 1.0 15
9327 76 14 2 1.0 15
9328 30 14 1 1.0 15
9329 8 14 1 1.0 15
9336 17 14 1 1.0 15
9339 62 14 1 1.0 15
9340 11 14 1 1.0 15
9341 19 14 1 1.0 15
9342 28 14 1 1.0 15
9346 8 14 1 1.0 15
9348 70 14 1 1.0 15
9352 39 14 1 1.0 15
9356 20 14 1 1.0 15
9357 9

10042 58 14 1 1.0 15
10043 11 14 1 1.0 15
10044 32 14 1 1.0 15
10045 25 14 1 1.0 15
10047 38 14 1 1.0 15
10048 33 14 1 1.0 15
10049 16 14 1 1.0 15
10050 57 14 1 1.0 15
10051 17 14 1 1.0 15
10053 34 14 1 1.0 15
10055 42 14 1 1.0 15
10056 88 14 1 1.0 15
10059 20 14 1 1.0 15
10062 19 14 1 1.0 15
10068 31 14 1 1.0 15
10069 17 14 1 1.0 15
10071 50 14 1 1.0 15
10075 33 14 1 1.0 15
10076 11 14 1 1.0 15
10079 91 14 1 1.0 15
10080 11 14 1 1.0 15
10081 31 14 1 1.0 15
10082 10 14 1 1.0 15
10089 10 14 1 1.0 15
10091 32 14 1 1.0 15
10092 82 14 3 1.0 15
10093 23 14 3 1.0 15
10097 25 14 3 1.0 15
10098 20 14 3 1.0 15
10101 12 14 3 1.0 15
10102 27 14 3 1.0 15
10104 13 14 3 1.0 15
10105 49 14 3 1.0 15
10107 31 14 3 1.0 15
10110 18 14 3 1.0 15
10111 8 14 3 1.0 15
10114 36 14 1 1.0 15
10116 42 14 1 1.0 15
10117 9 14 1 1.0 15
10118 13 14 1 1.0 15
10121 62 14 1 1.0 15
10122 40 14 1 1.0 15
10123 66 14 1 1.0 15
10130 23 14 1 1.0 15
10131 28 14 1 1.0 15
10132 25 14 1 1.0 15
10134 63 14 1 1.0 15
10136 45 14 1 1

10796 71 14 1 1.0 15
10797 17 14 1 1.0 15
10799 18 14 1 1.0 15
10800 10 14 1 1.0 15
10803 12 14 1 1.0 15
10804 83 14 1 1.0 15
10806 20 14 1 1.0 15
10807 22 14 1 1.0 15
10809 19 14 1 1.0 15
10811 17 14 1 1.0 15
10812 23 14 1 1.0 15
10815 58 14 1 1.0 15
10816 6 14 1 1.0 15
10818 18 14 1 1.0 15
10819 37 14 1 1.0 15
10823 59 14 1 1.0 15
10824 40 14 1 1.0 15
10826 60 14 1 1.0 15
10827 22 14 1 1.0 15
10828 11 14 1 1.0 15
10829 32 14 1 1.0 15
10831 17 14 1 1.0 15
10835 12 14 1 1.0 15
10836 15 14 1 1.0 15
10838 25 14 1 1.0 15
10840 45 14 1 1.0 15
10841 73 14 1 1.0 15
10842 36 14 1 1.0 15
10844 6 14 1 1.0 15
10849 8 14 1 1.0 15
10850 6 14 1 1.0 15
10853 35 14 1 1.0 15
10854 44 14 1 1.0 15
10857 71 14 1 1.0 15
10859 16 14 1 1.0 15
10860 50 14 1 1.0 15
10861 29 14 1 1.0 15
10865 92 14 1 1.0 15
10866 11 14 1 1.0 15
10870 18 14 1 1.0 15
10872 26 14 1 1.0 15
10875 19 14 1 1.0 15
10876 10 14 1 1.0 15
10877 22 14 1 1.0 15
10881 50 14 1 1.0 15
10882 34 14 1 1.0 15
10883 25 14 1 1.0 15
10885 46 14 1 1.0

11631 6 14 1 1.0 15
11633 44 14 1 1.0 15
11634 13 14 1 1.0 15
11635 31 14 1 1.0 15
11638 15 14 1 1.0 15
11639 21 14 1 1.0 15
11640 55 14 1 1.0 15
11644 20 14 1 1.0 15
11645 8 14 1 1.0 15
11646 17 14 1 1.0 15
11647 78 14 1 1.0 15
11648 61 14 1 1.0 15
11654 31 14 1 1.0 15
11655 28 14 1 1.0 15
11657 85 14 1 1.0 15
11659 65 14 1 1.0 15
11660 27 14 1 1.0 15
11661 26 14 1 1.0 15
11665 88 14 1 1.0 15
11667 21 14 1 1.0 15
11672 16 14 1 1.0 15
11677 41 14 1 1.0 15
11679 49 14 1 1.0 15
11681 38 14 1 1.0 15
11686 39 14 1 1.0 15
11687 26 14 1 1.0 15
11688 29 14 1 1.0 15
11689 10 14 1 1.0 15
11691 63 14 1 1.0 15
11692 74 14 1 1.0 15
11694 26 14 1 1.0 15
11700 77 14 1 1.0 15
11701 14 14 1 1.0 15
11702 28 14 1 1.0 15
11703 8 14 1 1.0 15
11704 14 14 1 1.0 15
11705 41 14 1 1.0 15
11706 56 14 1 1.0 15
11707 23 14 1 1.0 15
11708 14 14 1 1.0 15
11709 17 14 1 1.0 15
11710 28 14 1 1.0 15
11713 62 14 1 1.0 15
11715 18 14 1 1.0 15
11716 34 14 1 1.0 15
11718 40 14 1 1.0 15
11719 7 14 1 1.0 15
11722 24 14 1 1.0

12525 41 14 2 1.0 15
12526 11 14 2 1.0 15
12528 56 14 2 1.0 15
12529 82 14 2 1.0 15
12530 68 14 2 1.0 15
12532 74 14 3 1.0 15
12533 26 14 3 1.0 15
12534 26 14 3 1.0 15
12536 32 14 3 1.0 15
12540 60 14 3 1.0 15
12543 59 14 3 1.0 15
12546 14 14 3 1.0 15
12552 71 14 2 1.0 15
12556 8 14 3 1.0 15
12560 32 14 3 1.0 15
12562 87 14 3 1.0 15
12567 15 14 3 1.0 15
12570 23 14 3 1.0 15
12573 9 14 3 1.0 15
12574 29 14 3 1.0 15
12575 22 14 3 1.0 15
12577 67 14 3 1.0 15
12578 21 14 3 1.0 15
12579 62 14 3 1.0 15
12583 77 14 3 1.0 15
12584 21 14 3 1.0 15
12586 20 14 3 1.0 15
12587 12 14 3 1.0 15
12590 25 14 3 1.0 15
12591 22 14 3 1.0 15
12592 15 14 3 1.0 15
12593 49 14 3 1.0 15
12595 13 14 3 1.0 15
12599 7 14 3 1.0 15
12606 54 14 3 1.0 15
12609 50 14 3 1.0 15
12611 10 14 3 1.0 15
12620 31 14 3 1.0 15
12623 91 14 3 1.0 15
12626 17 14 3 1.0 15
12628 24 14 3 1.0 15
12629 28 14 3 1.0 15
12630 45 14 3 1.0 15
12632 23 14 3 1.0 15
12633 64 14 3 1.0 15
12635 20 14 3 1.0 15
12637 51 14 3 1.0 15
12638 25 14 3 1.

13387 59 14 1 1.0 15
13390 32 14 1 1.0 15
13396 13 14 1 1.0 15
13397 22 14 1 1.0 15
13398 31 14 1 1.0 15
13399 81 14 1 1.0 15
13400 13 14 1 1.0 15
13402 10 14 1 1.0 15
13403 34 14 1 1.0 15
13404 37 14 1 1.0 15
13405 21 14 1 1.0 15
13406 31 14 1 1.0 15
13407 12 14 1 1.0 15
13408 11 14 2 1.0 15
13410 17 14 1 1.0 15
13411 39 14 2 1.0 15
13412 37 14 2 1.0 15
13414 38 14 1 1.0 15
13416 10 14 1 1.0 15
13420 30 14 2 1.0 15
13421 38 14 2 1.0 15
13425 85 14 1 1.0 15
13429 51 14 1 1.0 15
13431 18 14 1 1.0 15
13432 10 14 1 1.0 15
13433 47 14 1 1.0 15
13435 45 14 1 1.0 15
13436 71 14 1 1.0 15
13437 30 14 1 1.0 15
13438 47 14 1 1.0 15
13439 11 14 1 1.0 15
13440 9 14 1 1.0 15
13441 12 14 1 1.0 15
13442 48 14 1 1.0 15
13443 86 14 1 1.0 15
13444 39 14 1 1.0 15
13446 35 14 1 1.0 15
13447 7 14 1 1.0 15
13448 42 14 1 1.0 15
13449 33 14 1 1.0 15
13450 30 14 1 1.0 15
13451 70 14 1 1.0 15
13453 36 14 1 1.0 15
13455 28 14 1 1.0 15
13456 11 14 1 1.0 15
13457 41 14 1 1.0 15
13458 57 14 1 1.0 15
13459 20 14 1 1

14263 54 14 1 1.0 15
14269 14 14 1 1.0 15
14270 6 14 1 1.0 15
14272 10 14 1 1.0 15
14273 10 14 1 1.0 15
14274 19 14 1 1.0 15
14276 56 14 1 1.0 15
14278 28 14 1 1.0 15
14280 30 14 1 1.0 15
14281 16 14 1 1.0 15
14282 15 14 1 1.0 15
14285 28 14 1 1.0 15
14289 23 14 1 1.0 15
14290 58 14 1 1.0 15
14299 34 14 1 1.0 15
14302 82 14 1 1.0 15
14303 45 14 1 1.0 15
14304 83 14 1 1.0 15
14305 24 14 1 1.0 15
14307 21 14 1 1.0 15
14308 43 14 1 1.0 15
14311 11 14 1 1.0 15
14317 18 14 1 1.0 15
14319 25 14 1 1.0 15
14320 45 14 1 1.0 15
14321 33 14 1 1.0 15
14322 30 14 1 1.0 15
14323 19 14 1 1.0 15
14324 58 14 1 1.0 15
14326 13 14 1 1.0 15
14328 18 14 1 1.0 15
14330 24 14 1 1.0 15
14331 25 14 1 1.0 15
14332 50 14 1 1.0 15
14333 9 14 1 1.0 15
14335 15 14 1 1.0 15
14336 61 14 1 1.0 15
14338 9 14 1 1.0 15
14339 10 14 1 1.0 15
14343 25 14 1 1.0 15
14344 14 14 1 1.0 15
14345 8 14 1 1.0 15
14348 9 14 1 1.0 15
14349 18 14 1 1.0 15
14350 43 14 1 1.0 15
14354 8 14 1 1.0 15
14356 7 14 1 1.0 15
14357 9 14 1 1.0 15


In [32]:
env.reset()

for episode in range(5):
    state = env.reset()
    step = 0
    done = False
    print("****************************************************")
    print("EPISODE ", episode)

    for step in range(max_steps):
        
        # Take the action (index) that have the maximum expected future reward given that state
        action = np.argmax(qtable[state,:])
        
        new_state, reward, done, info = env.step(action)
        
        if done:
            # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)
            env.render()
            
            # We print the number of step it took.
            print("Number of steps", step)
            break
        state = new_state
env.close()

****************************************************
EPISODE  0
  (Right)
SFFF
FHF[41mH[0m
FFFH
HFFG
Number of steps 24
****************************************************
EPISODE  1
  (Down)
SFFF
FHFH
FFFH
HFF[41mG[0m
Number of steps 27
****************************************************
EPISODE  2
  (Down)
SFFF
FHFH
FFFH
HFF[41mG[0m
Number of steps 20
****************************************************
EPISODE  3
  (Right)
SFFF
FHF[41mH[0m
FFFH
HFFG
Number of steps 47
****************************************************
EPISODE  4
  (Down)
SFFF
FHFH
FFFH
HFF[41mG[0m
Number of steps 22


In [34]:
qtable

array([[2.56185525e-02, 2.66457186e-02, 9.40343766e-02, 2.63538848e-02],
       [5.94664110e-03, 8.27665611e-03, 8.36175719e-03, 2.59648271e-02],
       [2.11751466e-02, 7.64366645e-03, 9.67464991e-03, 1.04062839e-02],
       [2.35423052e-03, 2.76469129e-03, 6.54793082e-03, 1.02724579e-02],
       [2.23548450e-02, 1.22320138e-02, 6.75474280e-03, 5.94636040e-03],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [8.57007045e-06, 6.66115156e-06, 1.54312064e-05, 1.15290623e-05],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.69439827e-02, 4.47571392e-03, 2.58552161e-04, 5.96402288e-01],
       [5.70220004e-03, 5.34195429e-01, 1.66328861e-02, 9.87136181e-03],
       [1.04786237e-01, 5.78571088e-03, 1.07931863e-03, 3.08247428e-03],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.07956879e-01, 1.57064803e-02, 8.73818284e

In [35]:
rewards

[1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0