In [1]:
import gymnasium as gym
import numpy as np

## Ex1 : Exploration de l'Environnement FrozenLake

In [4]:
env = gym.make("FrozenLake-v1", is_slippery=True )
env.reset() 

(0, {'prob': 1})

In [6]:
print(f"Espace d'états : {env.observation_space}")

Espace d'états : Discrete(16)


In [8]:
print(f"Espace d'actions : {env.action_space}")

Espace d'actions : Discrete(4)


In [10]:
for _ in range(10):
    action = env.action_space.sample() 
    observation, reward, done, _, _ = env.step(action) 
    print(f"Action : {action}, Observation : {observation}, Reward : {reward}")
    if done:
        env.reset()
env.close()

Action : 1, Observation : 1, Reward : 0.0
Action : 1, Observation : 2, Reward : 0.0
Action : 2, Observation : 6, Reward : 0.0
Action : 0, Observation : 10, Reward : 0.0
Action : 2, Observation : 6, Reward : 0.0
Action : 0, Observation : 5, Reward : 0.0
Action : 2, Observation : 1, Reward : 0.0
Action : 1, Observation : 2, Reward : 0.0
Action : 2, Observation : 6, Reward : 0.0
Action : 1, Observation : 10, Reward : 0.0


## Ex2 : Implémentation et Initialisation de la Q-Table

In [25]:
num_states = env.observation_space.n
num_actions = env.action_space.n
q_table = np.zeros((num_states, num_actions))

print("\nQ-Table initialisée :")
print(q_table)



Q-Table initialisée :
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


## Ex3 : Implémentation du Q-Learning avec Mise à Jour

In [36]:
# Paramètres
alpha = 0.1   
gamma = 0.99     
epsilon = 1.0    # Exploration initiale
epsilon_decay = 0.995  
num_episodes = 5000  

for episode in range(num_episodes):
    state = env.reset()[0]
    done = False
    
    while not done:
        # Choix de l'action (exploration vs exploitation)
        if np.random.rand() < epsilon:
            action = env.action_space.sample()  # Exploration
        else:
            action = np.argmax(q_table[state, :])  # Exploitation
        
        new_state, reward, done, _, _ = env.step(action)

        # MAJ de la Q-Table 
        q_table[state, action] = q_table[state, action] + alpha * (reward + gamma * np.max(q_table[new_state, :]) - q_table[state, action])
        
        state = new_state  # MAJ d'état

    # Réduction de epsilon pour moins d'exploration au fil du temps
    epsilon = max(0.01, epsilon * epsilon_decay)

print("\nQ-Table après apprentissage : \n")
print(q_table)




Q-Table après apprentissage : 

[[0.49739674 0.48944025 0.4898178  0.48469667]
 [0.32559709 0.34802785 0.30290304 0.46017407]
 [0.39900433 0.26374148 0.28310021 0.29910152]
 [0.09008194 0.11455173 0.04618486 0.09253383]
 [0.51081404 0.3310545  0.40905429 0.38222129]
 [0.         0.         0.         0.        ]
 [0.35070756 0.13143948 0.16299476 0.0791155 ]
 [0.         0.         0.         0.        ]
 [0.42151183 0.39962833 0.32766961 0.54333262]
 [0.36934971 0.56699971 0.33657123 0.32554302]
 [0.53639672 0.3213424  0.4653868  0.23329603]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.50958051 0.48809908 0.635766   0.33697742]
 [0.73088285 0.77721387 0.7450151  0.73556533]
 [0.         0.         0.         0.        ]]


## Ex4 : Évaluation des Performances de l'Agent

In [49]:
num_test_episodes = 100
successes = 0

for _ in range(num_test_episodes):
    state = env.reset()[0]
    done = False

    while not done:
        action = np.argmax(q_table[state, :])  
        new_state, reward, done, _, _ = env.step(action)
        state = new_state

    if reward > 0:  
        successes += 1

print(f"Taux de réussite : {successes}/{num_test_episodes} ({successes/num_test_episodes*100:.2f}%)")


Taux de réussite : 81/100 (81.00%)
