### 1. Importations

In [2]:
import gymnasium as gym            # Pour l'environnement
import numpy as np                 # Pour les calculs
import matplotlib.pyplot as plt    # Pour les graphes
from collections import deque      # Pour le memory buffer
import random                      # Pour le epsilon greedy
import torch                       # Pour les reseaux de neuronnes et fonction de retropropagation
from torch import nn               #
import torch.nn.functional as F    #
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from minigrid.wrappers import FlatObsWrapper  # Importer le wrapper pour aplatir les observations
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from minigrid.wrappers import ImgObsWrapper
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

pygame 2.6.1 (SDL 2.28.4, Python 3.12.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


### 2. Wrappers

In [3]:
class MinigridFeaturesExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.Space, features_dim: int = 512, normalized_image: bool = False) -> None:
        super().__init__(observation_space, features_dim)
        n_input_channels = observation_space.shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 16, (2, 2)),
            nn.ReLU(),
            nn.Conv2d(16, 32, (2, 2)),
            nn.ReLU(),
            nn.Conv2d(32, 64, (2, 2)),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with torch.no_grad():
            n_flatten = self.cnn(torch.as_tensor(observation_space.sample()[None]).float()).shape[1]

        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        return self.linear(self.cnn(observations))

### 3. Environnement + Vectorize Env  

In [3]:
policy_kwargs = dict(
    features_extractor_class=MinigridFeaturesExtractor,
    features_extractor_kwargs=dict(features_dim=128),
)

In [4]:
# Environnement et configuration du modèle
env = gym.make("MiniGrid-Empty-16x16-v0", render_mode="human")
env.reset()
env = ImgObsWrapper(env)

In [5]:
env = DummyVecEnv([ lambda:env])

### 4. Train Model + Save training

In [None]:
log_path = os.path.join('Minigrid','Training', 'Logs')

In [None]:
model = PPO("CnnPolicy", env, policy_kwargs=policy_kwargs, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=20000,tb_log_name="PPO")

### 4BIS. ReTrain

In [None]:
PPO_path = os.path.join('Minigrid','Training', 'Saved_Models', 'PPO_Minigrid_model')

In [None]:
env = gym.make("MiniGrid-Empty-16x16-v0", render_mode="rgb_array")
env = ImgObsWrapper(env)

In [None]:
env = DummyVecEnv([ lambda:env])

In [None]:
model=PPO.load(PPO_path, env=env)

In [None]:
model.learn(total_timesteps=10000000,tb_log_name="PPO")

### 5. Save Model

In [None]:
PPO_path = os.path.join('Minigrid','Training', 'Saved_Models', 'PPO_Minigrid_model')

In [None]:
model.save(PPO_path)

In [None]:
del model

### 6. Test Model

In [4]:
PPO_path = os.path.join('Minigrid','Training', 'Saved_Models', 'PPO_Minigrid_model')

In [5]:
env = gym.make("MiniGrid-Empty-16x16-v0", render_mode="human")
env = ImgObsWrapper(env)

In [6]:
model=PPO.load(PPO_path, env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [7]:
evaluate_policy(model, env, n_eval_episodes=10)



(np.float64(0.976269543170929), np.float64(0.0))

### 7. Tensorboard

In [8]:
training_log_path=os.path.join('Minigrid','Training', 'Logs','PPO_3')

In [9]:
training_log_path

'Minigrid/Training/Logs/PPO_3'

In [None]:
!tensorboard --logdir={training_log_path}

TensorFlow installation not found - running with reduced feature set.

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.18.0 at http://localhost:6006/ (Press CTRL+C to quit)
