### Import Libraries:

In [1]:
import numpy as np
import random
from IPython.display import clear_output
import gym

import warnings
warnings.filterwarnings('ignore')

### Create environment:

In [2]:
enviroment = gym.make ('Taxi-v3').env
enviroment.reset()
enviroment.render()

print ('Number of states: {}'.format (enviroment.observation_space.n))
print ('Number of actions: {}'.format (enviroment.action_space.n))

+---------+
|R: | : :[34;1mG[0m|
| : | : : |
| : : :[43m [0m: |
| | : | : |
|[35mY[0m| : |B: |
+---------+

Number of states: 500
Number of actions: 6


### Determine Parameters:

In [3]:
alpha = 0.1
gamma = 0.6
epsilon = 0.1
q_table = np.zeros ([enviroment.observation_space.n, enviroment.action_space.n])

## Q Learning Algorithm: 

In [4]:
num_of_episodes = 100000

for episode in range (0, num_of_episodes):
    state = enviroment.reset()
    
    reward = 0
    terminated = False
    
    while not terminated:
        
        if random.uniform (0, 1) < epsilon:
            action = enviroment.action_space.sample()
        else:
            action = np.argmax (q_table[state])
        
        # Take action
        next_state, reward, terminated, info = enviroment.step (action)
        
        # Recalculate
        q_value = q_table [state,action]
        max_value = np.max (q_table[next_state])
        new_q_value = (1 - alpha) * q_value + alpha * (reward + gamma * max_value)
        
        # Update Q-table
        q_table [state, action] = new_q_value
        state = next_state
    
    # optimization:
    if (episode + 1) % 100 == 0:
        clear_output (wait = True)
        print ('Episode: {}'.format (episode + 1))
        enviroment.render()
        
print ('******************************')
print ('Training is done! \n')
print ('******************************')

Episode: 100000
+---------+
|[35m[34;1m[43mR[0m[0m[0m: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (Dropoff)
******************************
Training is done! 

******************************
