# Deep Q Learning pour Breakout d'Atari: **play script**
 

## 1. Import et bilbiothèques

* **gymnasium** : Création et gestion de l'environnement Atari
* **tensorflow.keras**: Construction du réseau de neurones
* **pygame** : Gestion de l'affichage du jeu
* **rl** : Implémentation de l'agent DQN et de ses composants


In [1]:
import gymnasium as gym
from gymnasium.wrappers import AtariPreprocessing
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Permute
import time
import pygame
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import GreedyQPolicy
from rl.util import *
from rl.core import Processor
from rl.callbacks import Callback



## 2. Configuration de l'environnement

On réutilise les classes et fonctions créées dans le script `train.py`

In [2]:
# Compatibility wrapper
class CompatibilityWrapper(gym.Wrapper):
    def step(self, action):
        observation, reward, terminated, truncated, info = self.env.step(action)
        done = terminated or truncated
        return observation, reward, done, info

    def reset(self, **kwargs):
        observation, info = self.env.reset(**kwargs)
        return observation

# fonction pour la création de l'environnement
def create_atari_environment(env_name):
    env = gym.make(env_name, render_mode='rgb_array')
    env = AtariPreprocessing(env,
                             screen_size=84,
                             grayscale_obs=True,
                             frame_skip=1,
                             noop_max=30)
    env = CompatibilityWrapper(env)
    return env

## 3. Construction du modèle CNN


In [3]:
def build_model(window_length, shape, actions):
    model = Sequential()
    model.add(Permute((2, 3, 1), input_shape=(window_length,) + shape))
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu'))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

## 4.  Définition du Processeur Atari

In [4]:
class AtariProcessor(Processor):
    def process_observation(self, observation):
        if isinstance(observation, tuple):
            observation = observation[0]
        img = np.array(observation)
        img = img.astype('uint8')
        return img

    def process_state_batch(self, batch):
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

## 5. Création d'un Callback Pygame

Ce callback gère l'affichage du jeu avec Pygame :

1. Initialise la fenêtre Pygame
2. Affiche chaque frame du jeu après chaque action
3. Gère les événements Pygame (comme la fermeture de la fenêtre)
4. Ajoute un délai entre les épisodes

In [5]:
class PygameCallback(Callback):
    def __init__(self, env, delay=0.02):
        self.env = env
        self.delay = delay
        pygame.init()
        self.screen = pygame.display.set_mode((420, 320))
        pygame.display.set_caption("Atari Breakout - DQN Agent")

    def on_action_end(self, action, logs={}):
        frame = self.env.render()
        surf = pygame.surfarray.make_surface(frame.swapaxes(0, 1))
        surf = pygame.transform.scale(surf, (420, 320))
        self.screen.blit(surf, (0, 0))
        pygame.display.flip()
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.env.close()
                pygame.quit()
        time.sleep(self.delay)

    def on_episode_end(self, episode, logs={}):
        pygame.time.wait(1000)

## 6. Programme principal

In [6]:
if __name__ == "__main__":
    # 1. CREATE ENV
    env = create_atari_environment('ALE/Breakout-v5')
    nb_actions = env.action_space.n

    # 2. BUILD MODEL
    window_length = 4
    input_shape = (84, 84)
    model = build_model(window_length, input_shape, nb_actions)

    # 3. LOAD TRAINED WEIGHTS
    model.load_weights('policy.h5')

    # 4. CONFIGURE AGENT
    memory = SequentialMemory(limit=1000000, window_length=window_length)
    processor = AtariProcessor()
    policy = GreedyQPolicy()

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=50000,
                   gamma=.99,
                   target_model_update=10000,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(optimizer='adam', metrics=['mae'])

    # 5. TEST AGENT
    pygame_callback = PygameCallback(env, delay=0.02)
    scores = dqn.test(env, nb_episodes=5, visualize=False, callbacks=[pygame_callback])

    # 6. DISPLAY RESULT
    print('Average score over 5 test episodes:', np.mean(scores.history['episode_reward']))

    # 7. CLOSE ENV AND PYGAME
    env.close()
    pygame.quit()

Testing for 5 episodes ...


  updates=self.state_updates,
  logger.warn(


Episode 1: reward: 28.000, steps: 1071
Episode 2: reward: 32.000, steps: 1212
Episode 3: reward: 41.000, steps: 1516
Episode 4: reward: 22.000, steps: 907
Episode 5: reward: 20.000, steps: 832
Average score over 5 test episodes: 28.6
