### Loading the environment

https://gym.openai.com/envs/Taxi-v2/

In [41]:
import gym
env = gym.make('Taxi-v2')

### Initialization and rendering

In [42]:
initial_state = env.reset()
env.render()

+---------+
|[34;1mR[0m: | : :G|
| : : : : |
| : : : : |
| | : | :[43m [0m|
|[35mY[0m| : |B: |
+---------+



### Q-learning Hyperparameters

In [43]:
alpha = 0.1
gamma = 0.6
epsilon = 0.8

### Initializing the Q-table

In [56]:
import numpy as np
from collections import defaultdict

# Ideally, the size of Q-table is state_space_size x action_space_size
q_table = np.zeros([env.observation_space.n, env.action_space.n])

### Updating the Q-values after every epoch

![https://s31.postimg.cc/tmim26kqx/q_learning_equation.png](https://s31.postimg.cc/tmim26kqx/q_learning_equation.png?dl=1)

In [52]:
def q_learning_update(q_table, env, state, epsilon):
    """
    Updates the Q-values according to the Q-learning equation.
    """
    if random.uniform(0, 1) > epsilon:
        action = env.action_space.sample() # select a random action
    else:
        action = select_optimal_action(q_table, state) # select an optimal action
    
    next_state, reward, done, _ = env.step(action)
    old_q_value = q_table[state][action]


    # Maximum q_value for the actions in next state
    next_max = np.max(q_table[next_state])

    new_q_value = (1 - alpha) * old_q_value + alpha * (reward + gamma * next_max)

    # Update the q_value
    q_table[state][action] = new_q_value

    return next_state, reward, done

In [53]:
import random

def select_optimal_action(q_table, state):
    """
    Given a state, select the action from the action space having the
    highest Q-value in the q_table.
    """
    if np.sum(q_table[state]) == 0:
        return random.randint(0, q_table.shape[1]-1)
    
    return np.argmax(q_table[state])

In [54]:
def train_agent(q_table, env, num_episodes, epsilon):
    for i in range(num_episodes):
        state = env.reset()

        epochs = 0
        num_penalties, total_reward = 0, 0
        
        done = False
        
        while not done:
            state, reward, done = q_learning_update(q_table, env, state, epsilon)
            total_reward += reward

            if reward == -10:
                num_penalties += 1

            epochs += 1
        print("\nTraining episode {}".format(i + 1))
        print("Time steps: {}, Penalties: {}, Reward: {}".format(epochs,
                                                                 num_penalties,
                                                                 total_reward))

    print("Training finished.\n")
    return q_table

### Let's train

In [57]:
q_table = train_agent(q_table, env, 5000, epsilon)


Training episode 1
Time steps: 200, Penalties: 46, Reward: -614

Training episode 2
Time steps: 200, Penalties: 23, Reward: -407

Training episode 3
Time steps: 200, Penalties: 45, Reward: -605

Training episode 4
Time steps: 200, Penalties: 43, Reward: -587

Training episode 5
Time steps: 200, Penalties: 39, Reward: -551

Training episode 6
Time steps: 200, Penalties: 38, Reward: -542

Training episode 7
Time steps: 200, Penalties: 50, Reward: -650

Training episode 8
Time steps: 200, Penalties: 41, Reward: -569

Training episode 9
Time steps: 200, Penalties: 39, Reward: -551

Training episode 10
Time steps: 200, Penalties: 51, Reward: -659

Training episode 11
Time steps: 200, Penalties: 41, Reward: -569

Training episode 12
Time steps: 200, Penalties: 45, Reward: -605

Training episode 13
Time steps: 200, Penalties: 11, Reward: -299

Training episode 14
Time steps: 200, Penalties: 40, Reward: -560

Training episode 15
Time steps: 142, Penalties: 14, Reward: -247

Training episode 1


Training episode 140
Time steps: 200, Penalties: 16, Reward: -344

Training episode 141
Time steps: 200, Penalties: 10, Reward: -290

Training episode 142
Time steps: 149, Penalties: 13, Reward: -245

Training episode 143
Time steps: 200, Penalties: 12, Reward: -308

Training episode 144
Time steps: 200, Penalties: 17, Reward: -353

Training episode 145
Time steps: 156, Penalties: 4, Reward: -171

Training episode 146
Time steps: 200, Penalties: 12, Reward: -308

Training episode 147
Time steps: 42, Penalties: 4, Reward: -57

Training episode 148
Time steps: 47, Penalties: 1, Reward: -35

Training episode 149
Time steps: 200, Penalties: 12, Reward: -308

Training episode 150
Time steps: 200, Penalties: 11, Reward: -299

Training episode 151
Time steps: 92, Penalties: 4, Reward: -107

Training episode 152
Time steps: 200, Penalties: 14, Reward: -326

Training episode 153
Time steps: 200, Penalties: 19, Reward: -371

Training episode 154
Time steps: 120, Penalties: 7, Reward: -162

Trai


Training episode 274
Time steps: 89, Penalties: 5, Reward: -113

Training episode 275
Time steps: 46, Penalties: 2, Reward: -43

Training episode 276
Time steps: 40, Penalties: 4, Reward: -55

Training episode 277
Time steps: 30, Penalties: 2, Reward: -27

Training episode 278
Time steps: 200, Penalties: 10, Reward: -290

Training episode 279
Time steps: 200, Penalties: 14, Reward: -326

Training episode 280
Time steps: 119, Penalties: 3, Reward: -125

Training episode 281
Time steps: 200, Penalties: 10, Reward: -290

Training episode 282
Time steps: 96, Penalties: 5, Reward: -120

Training episode 283
Time steps: 176, Penalties: 12, Reward: -263

Training episode 284
Time steps: 13, Penalties: 0, Reward: 8

Training episode 285
Time steps: 82, Penalties: 7, Reward: -124

Training episode 286
Time steps: 200, Penalties: 19, Reward: -371

Training episode 287
Time steps: 137, Penalties: 10, Reward: -206

Training episode 288
Time steps: 200, Penalties: 17, Reward: -332

Training episod


Training episode 431
Time steps: 130, Penalties: 10, Reward: -199

Training episode 432
Time steps: 51, Penalties: 2, Reward: -48

Training episode 433
Time steps: 175, Penalties: 11, Reward: -253

Training episode 434
Time steps: 74, Penalties: 5, Reward: -98

Training episode 435
Time steps: 142, Penalties: 8, Reward: -193

Training episode 436
Time steps: 25, Penalties: 2, Reward: -22

Training episode 437
Time steps: 75, Penalties: 3, Reward: -81

Training episode 438
Time steps: 11, Penalties: 1, Reward: 1

Training episode 439
Time steps: 167, Penalties: 5, Reward: -191

Training episode 440
Time steps: 13, Penalties: 0, Reward: 8

Training episode 441
Time steps: 155, Penalties: 11, Reward: -233

Training episode 442
Time steps: 159, Penalties: 10, Reward: -228

Training episode 443
Time steps: 134, Penalties: 13, Reward: -230

Training episode 444
Time steps: 115, Penalties: 7, Reward: -157

Training episode 445
Time steps: 42, Penalties: 5, Reward: -66

Training episode 446
T

Training episode 602
Time steps: 25, Penalties: 1, Reward: -13

Training episode 603
Time steps: 140, Penalties: 7, Reward: -182

Training episode 604
Time steps: 64, Penalties: 6, Reward: -97

Training episode 605
Time steps: 92, Penalties: 14, Reward: -197

Training episode 606
Time steps: 31, Penalties: 2, Reward: -28

Training episode 607
Time steps: 60, Penalties: 4, Reward: -75

Training episode 608
Time steps: 78, Penalties: 3, Reward: -84

Training episode 609
Time steps: 31, Penalties: 2, Reward: -28

Training episode 610
Time steps: 41, Penalties: 4, Reward: -56

Training episode 611
Time steps: 165, Penalties: 10, Reward: -234

Training episode 612
Time steps: 54, Penalties: 4, Reward: -69

Training episode 613
Time steps: 177, Penalties: 13, Reward: -273

Training episode 614
Time steps: 47, Penalties: 2, Reward: -44

Training episode 615
Time steps: 64, Penalties: 8, Reward: -115

Training episode 616
Time steps: 38, Penalties: 2, Reward: -35

Training episode 617
Time ste

Training episode 768
Time steps: 38, Penalties: 1, Reward: -26

Training episode 769
Time steps: 82, Penalties: 5, Reward: -106

Training episode 770
Time steps: 19, Penalties: 1, Reward: -7

Training episode 771
Time steps: 44, Penalties: 2, Reward: -41

Training episode 772
Time steps: 45, Penalties: 1, Reward: -33

Training episode 773
Time steps: 32, Penalties: 0, Reward: -11

Training episode 774
Time steps: 43, Penalties: 2, Reward: -40

Training episode 775
Time steps: 9, Penalties: 0, Reward: 12

Training episode 776
Time steps: 87, Penalties: 11, Reward: -165

Training episode 777
Time steps: 64, Penalties: 2, Reward: -61

Training episode 778
Time steps: 13, Penalties: 1, Reward: -1

Training episode 779
Time steps: 20, Penalties: 1, Reward: -8

Training episode 780
Time steps: 109, Penalties: 6, Reward: -142

Training episode 781
Time steps: 42, Penalties: 0, Reward: -21

Training episode 782
Time steps: 39, Penalties: 5, Reward: -63

Training episode 783
Time steps: 23, Pen

Training episode 972
Time steps: 17, Penalties: 0, Reward: 4

Training episode 973
Time steps: 75, Penalties: 9, Reward: -135

Training episode 974
Time steps: 35, Penalties: 4, Reward: -50

Training episode 975
Time steps: 9, Penalties: 0, Reward: 12

Training episode 976
Time steps: 32, Penalties: 3, Reward: -38

Training episode 977
Time steps: 30, Penalties: 0, Reward: -9

Training episode 978
Time steps: 26, Penalties: 1, Reward: -14

Training episode 979
Time steps: 25, Penalties: 1, Reward: -13

Training episode 980
Time steps: 15, Penalties: 1, Reward: -3

Training episode 981
Time steps: 59, Penalties: 6, Reward: -92

Training episode 982
Time steps: 81, Penalties: 7, Reward: -123

Training episode 983
Time steps: 85, Penalties: 7, Reward: -127

Training episode 984
Time steps: 57, Penalties: 5, Reward: -81

Training episode 985
Time steps: 19, Penalties: 1, Reward: -7

Training episode 986
Time steps: 136, Penalties: 8, Reward: -187

Training episode 987
Time steps: 32, Penal


Training episode 1231
Time steps: 15, Penalties: 3, Reward: -21

Training episode 1232
Time steps: 13, Penalties: 1, Reward: -1

Training episode 1233
Time steps: 45, Penalties: 4, Reward: -60

Training episode 1234
Time steps: 30, Penalties: 2, Reward: -27

Training episode 1235
Time steps: 109, Penalties: 11, Reward: -187

Training episode 1236
Time steps: 40, Penalties: 2, Reward: -37

Training episode 1237
Time steps: 25, Penalties: 0, Reward: -4

Training episode 1238
Time steps: 47, Penalties: 5, Reward: -71

Training episode 1239
Time steps: 77, Penalties: 8, Reward: -128

Training episode 1240
Time steps: 9, Penalties: 0, Reward: 12

Training episode 1241
Time steps: 17, Penalties: 1, Reward: -5

Training episode 1242
Time steps: 15, Penalties: 2, Reward: -12

Training episode 1243
Time steps: 51, Penalties: 3, Reward: -57

Training episode 1244
Time steps: 68, Penalties: 5, Reward: -92

Training episode 1245
Time steps: 56, Penalties: 6, Reward: -89

Training episode 1246
Tim

Training episode 1386
Time steps: 11, Penalties: 1, Reward: 1

Training episode 1387
Time steps: 15, Penalties: 1, Reward: -3

Training episode 1388
Time steps: 19, Penalties: 2, Reward: -16

Training episode 1389
Time steps: 27, Penalties: 0, Reward: -6

Training episode 1390
Time steps: 14, Penalties: 0, Reward: 7

Training episode 1391
Time steps: 22, Penalties: 0, Reward: -1

Training episode 1392
Time steps: 65, Penalties: 3, Reward: -71

Training episode 1393
Time steps: 27, Penalties: 1, Reward: -15

Training episode 1394
Time steps: 15, Penalties: 0, Reward: 6

Training episode 1395
Time steps: 14, Penalties: 0, Reward: 7

Training episode 1396
Time steps: 11, Penalties: 1, Reward: 1

Training episode 1397
Time steps: 36, Penalties: 3, Reward: -42

Training episode 1398
Time steps: 13, Penalties: 0, Reward: 8

Training episode 1399
Time steps: 10, Penalties: 0, Reward: 11

Training episode 1400
Time steps: 19, Penalties: 1, Reward: -7

Training episode 1401
Time steps: 38, Pena

Training episode 1564
Time steps: 69, Penalties: 9, Reward: -129

Training episode 1565
Time steps: 15, Penalties: 2, Reward: -12

Training episode 1566
Time steps: 31, Penalties: 1, Reward: -19

Training episode 1567
Time steps: 11, Penalties: 0, Reward: 10

Training episode 1568
Time steps: 26, Penalties: 3, Reward: -32

Training episode 1569
Time steps: 22, Penalties: 1, Reward: -10

Training episode 1570
Time steps: 38, Penalties: 0, Reward: -17

Training episode 1571
Time steps: 63, Penalties: 1, Reward: -51

Training episode 1572
Time steps: 19, Penalties: 0, Reward: 2

Training episode 1573
Time steps: 17, Penalties: 1, Reward: -5

Training episode 1574
Time steps: 45, Penalties: 5, Reward: -69

Training episode 1575
Time steps: 16, Penalties: 1, Reward: -4

Training episode 1576
Time steps: 10, Penalties: 0, Reward: 11

Training episode 1577
Time steps: 28, Penalties: 2, Reward: -25

Training episode 1578
Time steps: 34, Penalties: 4, Reward: -49

Training episode 1579
Time ste

Training episode 1730
Time steps: 16, Penalties: 0, Reward: 5

Training episode 1731
Time steps: 16, Penalties: 2, Reward: -13

Training episode 1732
Time steps: 13, Penalties: 0, Reward: 8

Training episode 1733
Time steps: 17, Penalties: 0, Reward: 4

Training episode 1734
Time steps: 39, Penalties: 7, Reward: -81

Training episode 1735
Time steps: 13, Penalties: 2, Reward: -10

Training episode 1736
Time steps: 21, Penalties: 2, Reward: -18

Training episode 1737
Time steps: 21, Penalties: 2, Reward: -18

Training episode 1738
Time steps: 22, Penalties: 2, Reward: -19

Training episode 1739
Time steps: 24, Penalties: 1, Reward: -12

Training episode 1740
Time steps: 13, Penalties: 2, Reward: -10

Training episode 1741
Time steps: 18, Penalties: 2, Reward: -15

Training episode 1742
Time steps: 16, Penalties: 0, Reward: 5

Training episode 1743
Time steps: 22, Penalties: 0, Reward: -1

Training episode 1744
Time steps: 16, Penalties: 0, Reward: 5

Training episode 1745
Time steps: 47

Training episode 1964
Time steps: 18, Penalties: 2, Reward: -15

Training episode 1965
Time steps: 7, Penalties: 0, Reward: 14

Training episode 1966
Time steps: 55, Penalties: 6, Reward: -88

Training episode 1967
Time steps: 11, Penalties: 0, Reward: 10

Training episode 1968
Time steps: 19, Penalties: 2, Reward: -16

Training episode 1969
Time steps: 17, Penalties: 0, Reward: 4

Training episode 1970
Time steps: 17, Penalties: 0, Reward: 4

Training episode 1971
Time steps: 24, Penalties: 2, Reward: -21

Training episode 1972
Time steps: 18, Penalties: 1, Reward: -6

Training episode 1973
Time steps: 12, Penalties: 0, Reward: 9

Training episode 1974
Time steps: 13, Penalties: 2, Reward: -10

Training episode 1975
Time steps: 19, Penalties: 1, Reward: -7

Training episode 1976
Time steps: 13, Penalties: 1, Reward: -1

Training episode 1977
Time steps: 11, Penalties: 1, Reward: 1

Training episode 1978
Time steps: 11, Penalties: 0, Reward: 10

Training episode 1979
Time steps: 23, Pe

Training episode 2155
Time steps: 21, Penalties: 2, Reward: -18

Training episode 2156
Time steps: 13, Penalties: 1, Reward: -1

Training episode 2157
Time steps: 14, Penalties: 1, Reward: -2

Training episode 2158
Time steps: 15, Penalties: 2, Reward: -12

Training episode 2159
Time steps: 10, Penalties: 0, Reward: 11

Training episode 2160
Time steps: 19, Penalties: 0, Reward: 2

Training episode 2161
Time steps: 19, Penalties: 1, Reward: -7

Training episode 2162
Time steps: 19, Penalties: 2, Reward: -16

Training episode 2163
Time steps: 14, Penalties: 0, Reward: 7

Training episode 2164
Time steps: 20, Penalties: 2, Reward: -17

Training episode 2165
Time steps: 10, Penalties: 0, Reward: 11

Training episode 2166
Time steps: 16, Penalties: 1, Reward: -4

Training episode 2167
Time steps: 15, Penalties: 0, Reward: 6

Training episode 2168
Time steps: 31, Penalties: 1, Reward: -19

Training episode 2169
Time steps: 16, Penalties: 0, Reward: 5

Training episode 2170
Time steps: 14, P

Training episode 2394
Time steps: 19, Penalties: 1, Reward: -7

Training episode 2395
Time steps: 18, Penalties: 0, Reward: 3

Training episode 2396
Time steps: 27, Penalties: 0, Reward: -6

Training episode 2397
Time steps: 21, Penalties: 0, Reward: 0

Training episode 2398
Time steps: 15, Penalties: 0, Reward: 6

Training episode 2399
Time steps: 17, Penalties: 1, Reward: -5

Training episode 2400
Time steps: 8, Penalties: 0, Reward: 13

Training episode 2401
Time steps: 16, Penalties: 1, Reward: -4

Training episode 2402
Time steps: 16, Penalties: 0, Reward: 5

Training episode 2403
Time steps: 18, Penalties: 2, Reward: -15

Training episode 2404
Time steps: 17, Penalties: 1, Reward: -5

Training episode 2405
Time steps: 37, Penalties: 2, Reward: -34

Training episode 2406
Time steps: 12, Penalties: 0, Reward: 9

Training episode 2407
Time steps: 19, Penalties: 0, Reward: 2

Training episode 2408
Time steps: 21, Penalties: 1, Reward: -9

Training episode 2409
Time steps: 18, Penalti

Training episode 2641
Time steps: 23, Penalties: 2, Reward: -20

Training episode 2642
Time steps: 19, Penalties: 0, Reward: 2

Training episode 2643
Time steps: 21, Penalties: 1, Reward: -9

Training episode 2644
Time steps: 35, Penalties: 1, Reward: -23

Training episode 2645
Time steps: 15, Penalties: 1, Reward: -3

Training episode 2646
Time steps: 19, Penalties: 2, Reward: -16

Training episode 2647
Time steps: 17, Penalties: 0, Reward: 4

Training episode 2648
Time steps: 17, Penalties: 1, Reward: -5

Training episode 2649
Time steps: 18, Penalties: 3, Reward: -24

Training episode 2650
Time steps: 12, Penalties: 2, Reward: -9

Training episode 2651
Time steps: 18, Penalties: 2, Reward: -15

Training episode 2652
Time steps: 16, Penalties: 1, Reward: -4

Training episode 2653
Time steps: 24, Penalties: 4, Reward: -39

Training episode 2654
Time steps: 19, Penalties: 1, Reward: -7

Training episode 2655
Time steps: 33, Penalties: 2, Reward: -30

Training episode 2656
Time steps: 2

Training episode 2868
Time steps: 17, Penalties: 1, Reward: -5

Training episode 2869
Time steps: 15, Penalties: 0, Reward: 6

Training episode 2870
Time steps: 21, Penalties: 0, Reward: 0

Training episode 2871
Time steps: 23, Penalties: 2, Reward: -20

Training episode 2872
Time steps: 17, Penalties: 2, Reward: -14

Training episode 2873
Time steps: 9, Penalties: 0, Reward: 12

Training episode 2874
Time steps: 15, Penalties: 1, Reward: -3

Training episode 2875
Time steps: 23, Penalties: 0, Reward: -2

Training episode 2876
Time steps: 15, Penalties: 1, Reward: -3

Training episode 2877
Time steps: 12, Penalties: 0, Reward: 9

Training episode 2878
Time steps: 29, Penalties: 5, Reward: -53

Training episode 2879
Time steps: 33, Penalties: 1, Reward: -21

Training episode 2880
Time steps: 16, Penalties: 2, Reward: -13

Training episode 2881
Time steps: 21, Penalties: 1, Reward: -9

Training episode 2882
Time steps: 38, Penalties: 0, Reward: -17

Training episode 2883
Time steps: 17, 

Training episode 3097
Time steps: 28, Penalties: 1, Reward: -16

Training episode 3098
Time steps: 28, Penalties: 1, Reward: -16

Training episode 3099
Time steps: 10, Penalties: 0, Reward: 11

Training episode 3100
Time steps: 7, Penalties: 0, Reward: 14

Training episode 3101
Time steps: 16, Penalties: 1, Reward: -4

Training episode 3102
Time steps: 12, Penalties: 1, Reward: 0

Training episode 3103
Time steps: 16, Penalties: 1, Reward: -4

Training episode 3104
Time steps: 21, Penalties: 0, Reward: 0

Training episode 3105
Time steps: 14, Penalties: 0, Reward: 7

Training episode 3106
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3107
Time steps: 13, Penalties: 0, Reward: 8

Training episode 3108
Time steps: 25, Penalties: 4, Reward: -40

Training episode 3109
Time steps: 14, Penalties: 0, Reward: 7

Training episode 3110
Time steps: 11, Penalties: 0, Reward: 10

Training episode 3111
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3112
Time steps: 22, Penalti

Training episode 3330
Time steps: 20, Penalties: 2, Reward: -17

Training episode 3331
Time steps: 25, Penalties: 1, Reward: -13

Training episode 3332
Time steps: 17, Penalties: 0, Reward: 4

Training episode 3333
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3334
Time steps: 22, Penalties: 2, Reward: -19

Training episode 3335
Time steps: 14, Penalties: 2, Reward: -11

Training episode 3336
Time steps: 19, Penalties: 1, Reward: -7

Training episode 3337
Time steps: 10, Penalties: 0, Reward: 11

Training episode 3338
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3339
Time steps: 14, Penalties: 0, Reward: 7

Training episode 3340
Time steps: 14, Penalties: 1, Reward: -2

Training episode 3341
Time steps: 18, Penalties: 2, Reward: -15

Training episode 3342
Time steps: 26, Penalties: 0, Reward: -5

Training episode 3343
Time steps: 19, Penalties: 2, Reward: -16

Training episode 3344
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3345
Time steps: 15, P

Training episode 3545
Time steps: 22, Penalties: 1, Reward: -10

Training episode 3546
Time steps: 13, Penalties: 0, Reward: 8

Training episode 3547
Time steps: 19, Penalties: 1, Reward: -7

Training episode 3548
Time steps: 14, Penalties: 0, Reward: 7

Training episode 3549
Time steps: 20, Penalties: 0, Reward: 1

Training episode 3550
Time steps: 9, Penalties: 0, Reward: 12

Training episode 3551
Time steps: 14, Penalties: 0, Reward: 7

Training episode 3552
Time steps: 13, Penalties: 2, Reward: -10

Training episode 3553
Time steps: 21, Penalties: 2, Reward: -18

Training episode 3554
Time steps: 23, Penalties: 2, Reward: -20

Training episode 3555
Time steps: 24, Penalties: 3, Reward: -30

Training episode 3556
Time steps: 24, Penalties: 3, Reward: -30

Training episode 3557
Time steps: 18, Penalties: 2, Reward: -15

Training episode 3558
Time steps: 24, Penalties: 1, Reward: -12

Training episode 3559
Time steps: 14, Penalties: 2, Reward: -11

Training episode 3560
Time steps: 13


Training episode 3728
Time steps: 20, Penalties: 2, Reward: -17

Training episode 3729
Time steps: 17, Penalties: 2, Reward: -14

Training episode 3730
Time steps: 18, Penalties: 3, Reward: -24

Training episode 3731
Time steps: 7, Penalties: 0, Reward: 14

Training episode 3732
Time steps: 15, Penalties: 0, Reward: 6

Training episode 3733
Time steps: 14, Penalties: 1, Reward: -2

Training episode 3734
Time steps: 16, Penalties: 1, Reward: -4

Training episode 3735
Time steps: 17, Penalties: 0, Reward: 4

Training episode 3736
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3737
Time steps: 11, Penalties: 0, Reward: 10

Training episode 3738
Time steps: 16, Penalties: 2, Reward: -13

Training episode 3739
Time steps: 12, Penalties: 1, Reward: 0

Training episode 3740
Time steps: 12, Penalties: 0, Reward: 9

Training episode 3741
Time steps: 25, Penalties: 2, Reward: -22

Training episode 3742
Time steps: 15, Penalties: 3, Reward: -21

Training episode 3743
Time steps: 16, P

Training episode 3856
Time steps: 19, Penalties: 1, Reward: -7

Training episode 3857
Time steps: 20, Penalties: 3, Reward: -26

Training episode 3858
Time steps: 15, Penalties: 0, Reward: 6

Training episode 3859
Time steps: 9, Penalties: 1, Reward: 3

Training episode 3860
Time steps: 23, Penalties: 0, Reward: -2

Training episode 3861
Time steps: 27, Penalties: 3, Reward: -33

Training episode 3862
Time steps: 16, Penalties: 1, Reward: -4

Training episode 3863
Time steps: 23, Penalties: 2, Reward: -20

Training episode 3864
Time steps: 24, Penalties: 2, Reward: -21

Training episode 3865
Time steps: 19, Penalties: 2, Reward: -16

Training episode 3866
Time steps: 19, Penalties: 1, Reward: -7

Training episode 3867
Time steps: 8, Penalties: 1, Reward: 4

Training episode 3868
Time steps: 23, Penalties: 5, Reward: -47

Training episode 3869
Time steps: 26, Penalties: 2, Reward: -23

Training episode 3870
Time steps: 14, Penalties: 0, Reward: 7

Training episode 3871
Time steps: 23, P


Training episode 4063
Time steps: 18, Penalties: 0, Reward: 3

Training episode 4064
Time steps: 18, Penalties: 0, Reward: 3

Training episode 4065
Time steps: 13, Penalties: 1, Reward: -1

Training episode 4066
Time steps: 24, Penalties: 1, Reward: -12

Training episode 4067
Time steps: 20, Penalties: 1, Reward: -8

Training episode 4068
Time steps: 11, Penalties: 1, Reward: 1

Training episode 4069
Time steps: 15, Penalties: 0, Reward: 6

Training episode 4070
Time steps: 20, Penalties: 3, Reward: -26

Training episode 4071
Time steps: 25, Penalties: 2, Reward: -22

Training episode 4072
Time steps: 13, Penalties: 0, Reward: 8

Training episode 4073
Time steps: 14, Penalties: 2, Reward: -11

Training episode 4074
Time steps: 18, Penalties: 0, Reward: 3

Training episode 4075
Time steps: 18, Penalties: 0, Reward: 3

Training episode 4076
Time steps: 20, Penalties: 2, Reward: -17

Training episode 4077
Time steps: 12, Penalties: 2, Reward: -9

Training episode 4078
Time steps: 25, Pen


Training episode 4294
Time steps: 18, Penalties: 0, Reward: 3

Training episode 4295
Time steps: 12, Penalties: 1, Reward: 0

Training episode 4296
Time steps: 20, Penalties: 0, Reward: 1

Training episode 4297
Time steps: 24, Penalties: 1, Reward: -12

Training episode 4298
Time steps: 21, Penalties: 2, Reward: -18

Training episode 4299
Time steps: 23, Penalties: 2, Reward: -20

Training episode 4300
Time steps: 23, Penalties: 1, Reward: -11

Training episode 4301
Time steps: 36, Penalties: 4, Reward: -51

Training episode 4302
Time steps: 12, Penalties: 0, Reward: 9

Training episode 4303
Time steps: 22, Penalties: 2, Reward: -19

Training episode 4304
Time steps: 16, Penalties: 1, Reward: -4

Training episode 4305
Time steps: 10, Penalties: 0, Reward: 11

Training episode 4306
Time steps: 10, Penalties: 0, Reward: 11

Training episode 4307
Time steps: 22, Penalties: 1, Reward: -10

Training episode 4308
Time steps: 39, Penalties: 4, Reward: -54

Training episode 4309
Time steps: 2


Training episode 4564
Time steps: 24, Penalties: 2, Reward: -21

Training episode 4565
Time steps: 22, Penalties: 4, Reward: -37

Training episode 4566
Time steps: 17, Penalties: 0, Reward: 4

Training episode 4567
Time steps: 17, Penalties: 1, Reward: -5

Training episode 4568
Time steps: 13, Penalties: 0, Reward: 8

Training episode 4569
Time steps: 15, Penalties: 2, Reward: -12

Training episode 4570
Time steps: 9, Penalties: 1, Reward: 3

Training episode 4571
Time steps: 16, Penalties: 0, Reward: 5

Training episode 4572
Time steps: 18, Penalties: 1, Reward: -6

Training episode 4573
Time steps: 15, Penalties: 1, Reward: -3

Training episode 4574
Time steps: 13, Penalties: 0, Reward: 8

Training episode 4575
Time steps: 10, Penalties: 1, Reward: 2

Training episode 4576
Time steps: 14, Penalties: 0, Reward: 7

Training episode 4577
Time steps: 17, Penalties: 2, Reward: -14

Training episode 4578
Time steps: 13, Penalties: 1, Reward: -1

Training episode 4579
Time steps: 20, Penal

Training episode 4819
Time steps: 18, Penalties: 2, Reward: -15

Training episode 4820
Time steps: 13, Penalties: 1, Reward: -1

Training episode 4821
Time steps: 16, Penalties: 2, Reward: -13

Training episode 4822
Time steps: 10, Penalties: 2, Reward: -7

Training episode 4823
Time steps: 11, Penalties: 1, Reward: 1

Training episode 4824
Time steps: 14, Penalties: 0, Reward: 7

Training episode 4825
Time steps: 16, Penalties: 0, Reward: 5

Training episode 4826
Time steps: 17, Penalties: 0, Reward: 4

Training episode 4827
Time steps: 11, Penalties: 2, Reward: -8

Training episode 4828
Time steps: 14, Penalties: 3, Reward: -20

Training episode 4829
Time steps: 15, Penalties: 0, Reward: 6

Training episode 4830
Time steps: 22, Penalties: 2, Reward: -19

Training episode 4831
Time steps: 13, Penalties: 1, Reward: -1

Training episode 4832
Time steps: 16, Penalties: 1, Reward: -4

Training episode 4833
Time steps: 12, Penalties: 1, Reward: 0

Training episode 4834
Time steps: 15, Pena

Training episode 4950
Time steps: 13, Penalties: 1, Reward: -1

Training episode 4951
Time steps: 17, Penalties: 0, Reward: 4

Training episode 4952
Time steps: 20, Penalties: 2, Reward: -17

Training episode 4953
Time steps: 16, Penalties: 0, Reward: 5

Training episode 4954
Time steps: 33, Penalties: 5, Reward: -57

Training episode 4955
Time steps: 16, Penalties: 1, Reward: -4

Training episode 4956
Time steps: 11, Penalties: 0, Reward: 10

Training episode 4957
Time steps: 18, Penalties: 0, Reward: 3

Training episode 4958
Time steps: 23, Penalties: 2, Reward: -20

Training episode 4959
Time steps: 11, Penalties: 0, Reward: 10

Training episode 4960
Time steps: 12, Penalties: 0, Reward: 9

Training episode 4961
Time steps: 16, Penalties: 1, Reward: -4

Training episode 4962
Time steps: 16, Penalties: 0, Reward: 5

Training episode 4963
Time steps: 16, Penalties: 0, Reward: 5

Training episode 4964
Time steps: 19, Penalties: 1, Reward: -7

Training episode 4965
Time steps: 20, Penal

In [58]:
q_table[333]

array([-2.3470143 , -2.31153855, -2.3289116 , -2.35769712, -7.26776826,
       -7.19915075])

In [59]:
initial_state = env.reset()
print(initial_state)
env.render()

62
+---------+
|[34;1mR[0m: | :[43m [0m:G|
| : : : : |
| : : : : |
| | : | : |
|[35mY[0m| : |B: |
+---------+



In [61]:
q_table[62]

array([-2.19096845, -2.27907431, -2.27141765, -2.25364375, -6.12575537,
       -6.18658448])