In [2]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
import pickle
from gym import spaces

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [8]:

class CustomFrozenLakeWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.win_state = 63   # Goal (bottom right)
        self.lose_state = 7   # Hole at top right (1,8)
        self.reward_win = +10
        self.reward_lose = -10
        self.reward_step = -1

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        if terminated:
            if obs == self.win_state:
                reward = self.reward_win
            elif obs == self.lose_state:
                reward = self.reward_lose
        else:
            reward = self.reward_step
        return obs, reward, terminated, truncated, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)

# --- Q-LEARNING FUNCTION ---
def run(episodes, is_training=True, render=False):
    # Créer l'environnement avec le wrapper
    env = gym.make('FrozenLake-v1', map_name="8x8", is_slippery=False, render_mode='human' if render else None)
    env = CustomFrozenLakeWrapper(env)

    if is_training:
        q = np.zeros((env.observation_space.n, env.action_space.n))  # 64 x 4
    else:
        with open('custom_frozenlake.pkl', 'rb') as f:
            q = pickle.load(f)

    learning_rate_a = 0.01
    discount_factor_g = 0.9
    epsilon = 0.1
    epsilon_decay_rate = 0.0001
    rng = np.random.default_rng()

    rewards_per_episode = np.zeros(episodes)

    for i in range(episodes):
        state, _ = env.reset()
        terminated = False
        truncated = False

        while not terminated and not truncated:
            if is_training and rng.random() < epsilon:
                action = env.action_space.sample()
            else:
                action = np.argmax(q[state, :])

            new_state, reward, terminated, truncated, _ = env.step(action)

            if is_training:
                q[state, action] = q[state, action] + learning_rate_a * (
                    reward + discount_factor_g * np.max(q[new_state, :]) - q[state, action]
                )

            state = new_state

            if render:
                # Le rendu visuel est géré par l'environnement original
                pass  # render_mode='human' s'occupe de tout

        epsilon = max(epsilon - epsilon_decay_rate, 0)
        if epsilon == 0:
            learning_rate_a = 0.0001

        if reward == 10:  #  GAGNE = +10
            rewards_per_episode[i] = 1

    env.close()


    if is_training:
        with open("custom_frozenlake.pkl", "wb") as f:
            pickle.dump(q, f)

# --- EXECUTION ---
if __name__ == '__main__':
    # Entraînement avec rendu visuel
    # run(15000, is_training=True, render=False)

    # Test avec rendu visuel
    run(10, is_training=True, render=True)

In [None]:
# --- MAP PERSONNALISÉE ---
custom_map = [
    "SFFFFFFH",  # (1,1)=Start, (1,8)=Hole
    "FFFFFFFF",
    "FFFFFFFF",
    "FFFFFFFF",
    "FFFFFFFF",
    "FFFFFFFF",
    "FFFFFFFF",
    "FFFFFFFG",  # (8,8)=Goal
]

# --- WRAPPER POUR RÉCOMPENSES PERSONNALISÉES ---
class CustomFrozenLakeWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.reward_win = +10
        self.reward_lose = -10
        self.reward_step = -1

    def step(self, action):
        obs, reward, terminated, truncated, info = self.env.step(action)
        if terminated:
            custom_reward = self.reward_win if reward == 1 else self.reward_lose
        else:
            custom_reward = self.reward_step
        return obs, custom_reward, terminated, truncated, info

# --- Q-LEARNING ---
def run(episodes, is_training=True, render=False):
    env = gym.make('FrozenLake-v1', desc=custom_map, is_slippery=False, render_mode='human' if render else None)
    env = CustomFrozenLakeWrapper(env)

    if is_training:
        q = np.zeros((env.observation_space.n, env.action_space.n))
    else:
        with open('custom_frozenlake.pkl', 'rb') as f:
            q = pickle.load(f)

    learning_rate_a = 0.01
    discount_factor_g = 0.9
    epsilon = 0.1
    epsilon_decay_rate = 0.0001
    rng = np.random.default_rng()

    rewards_per_episode = np.zeros(episodes)

    for i in range(episodes):
        state, _ = env.reset()
        terminated = False
        truncated = False

        while not terminated and not truncated:
            if is_training and rng.random() < epsilon:
                action = env.action_space.sample()
            else:
                action = np.argmax(q[state, :])

            new_state, reward, terminated, truncated, _ = env.step(action)

            if is_training:
                q[state, action] = q[state, action] + learning_rate_a * (
                    reward + discount_factor_g * np.max(q[new_state, :]) - q[state, action]
                )

            state = new_state

        epsilon = max(epsilon - epsilon_decay_rate, 0)
        if epsilon == 0:
            learning_rate_a = 0.0001

        if reward == 10:  # GAGNE = +10
            rewards_per_episode[i] = 1

    env.close()

    if is_training:
        with open("custom_frozenlake.pkl", "wb") as f:
            pickle.dump(q, f)

# --- EXÉCUTION ---
if __name__ == '__main__':
 
    run(1, is_training=True, render=True)