# Import libraries and variables

In [1]:
# !pip install gymnasium[atari, accept-rom-license] stable-baselines3
# !pip install sb3-contrib
 
import os
os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'

import gymnasium as gym
from stable_baselines3.common.evaluation import evaluate_policy
import ale_py
gym.register_envs(ale_py)
from stable_baselines3 import DQN,PPO,A2C
from sb3_contrib.qrdqn import QRDQN

from models_scripts.qrdqn import mainQRDQN
from models_scripts.dqn import mainDQN
from models_scripts.a2c import mainA2C
from models_scripts.dqn_optuna import mainDQN_Optuna
from models_scripts.ppo import mainPPO

from utils.utils import record_video,show_videos

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ENV_ID = "ALE/MsPacman-v5"
env = gym.make(ENV_ID)


# QR‑DQN (Quantile regresion)

## Model train

In [4]:
mainQRDQN()

MemoryError: Unable to allocate 18.8 GiB for an array with shape (200000, 1, 3, 210, 160) and data type uint8

## Medir performance del modelo con evaluate policy

In [4]:
model = QRDQN.load("../models/qrdqn/qrdqn_msPacman.zip", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




In [9]:
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f"mean_reward: {mean_reward} +/- {std_reward}")

mean_reward: 218.0 +/- 4.0


## ver ejecuccion

In [None]:
record_video(ENV_ID, model, video_length=4000, prefix="qrdqn-mspacman")

Saving video to c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4
MoviePy - Building video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4.
MoviePy - Writing video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4




In [6]:
show_videos("../videos", prefix="qrdqn-msPacman")

# Deep Q-Network (DQN)

## Model train

In [8]:
mainDQN()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 465      |
|    ep_rew_mean      | 208      |
|    exploration_rate | 0.985    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1308     |
|    time_elapsed     | 1        |
|    total_timesteps  | 1860     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 455      |
|    ep_rew_mean      | 202      |
|    exploration_rate | 0.97     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1310     |
|    time_elapsed     | 2        |
|    total_timesteps  | 3638     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 441      |
|    ep_rew_mean      | 196      |
|    exploration_rate | 0.956    |
| time/               |          |
|    episodes       

## Medir performance del modelo con evaluate policy

In [9]:
model = DQN.load("../models/dqn/dqn_msPacman.zip", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




In [10]:
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f"mean_reward: {mean_reward} +/- {std_reward}")

mean_reward: 1929.0 +/- 460.46606823956097


## ver ejecuccion

In [13]:
record_video(ENV_ID, model, video_length=4000, prefix="dqn-mspacman")

Saving video to c:\MBD_Repos\Pacman-ReinforcedLearning\videos\dqn-mspacman-step-0-to-step-4000.mp4
MoviePy - Building video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\dqn-mspacman-step-0-to-step-4000.mp4.
MoviePy - Writing video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\dqn-mspacman-step-0-to-step-4000.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready c:\MBD_Repos\Pacman-ReinforcedLearning\videos\dqn-mspacman-step-0-to-step-4000.mp4




In [15]:
show_videos("../videos", prefix="dqn-msPacman")

# DQN with optuna (hyperparameter tunning)

## Model train

In [None]:
mainDQN_Optuna()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




## Medir performance del modelo con evaluate policy

In [None]:
model = DQN.load("../models/dqn_optuna/dqn_optuna_msPacman.zip", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




In [None]:
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f"mean_reward: {mean_reward} +/- {std_reward}")

mean_reward: 218.0 +/- 4.0


## ver ejecuccion

In [None]:
record_video(ENV_ID, model, video_length=4000, prefix="dqn-optuna-mspacman")

Saving video to c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4
MoviePy - Building video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4.
MoviePy - Writing video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4




In [None]:
show_videos("../videos", prefix="dqn-optuna-mspacman")

# Policy Gradient methods (PPO)

## Model train

In [3]:
mainPPO()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 503      |
|    ep_rew_mean     | 290      |
| time/              |          |
|    fps             | 248      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 1024     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 532         |
|    ep_rew_mean          | 277         |
| time/                   |             |
|    fps                  | 212         |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.004963094 |
|    clip_fraction        | 0.16        |
|    clip_range           | 0.1         |
|    entropy_loss         | -2.19       |
|    explained_variance   | 0.00134     |
|    learning_rate        | 0.

## Medir performance del modelo con evaluate policy

In [16]:
model = PPO.load("../models/ppo/ppo_msPacman.zip", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [17]:
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f"mean_reward: {mean_reward} +/- {std_reward}")

mean_reward: 170.0 +/- 0.0


## ver ejecuccion

In [18]:
record_video(ENV_ID, model, video_length=4000, prefix="ppo-mspacman")

Saving video to c:\MBD_Repos\Pacman-ReinforcedLearning\videos\ppo-mspacman-step-0-to-step-4000.mp4
MoviePy - Building video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\ppo-mspacman-step-0-to-step-4000.mp4.
MoviePy - Writing video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\ppo-mspacman-step-0-to-step-4000.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready c:\MBD_Repos\Pacman-ReinforcedLearning\videos\ppo-mspacman-step-0-to-step-4000.mp4




In [19]:
show_videos("../videos", prefix="ppo-msPacman")

# Advantage Actor-Critic (A2C)

## Model train

In [None]:
mainA2C()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




## Medir performance del modelo con evaluate policy

In [None]:
model = A2C.load("../models/a2c/a2c_msPacman.zip", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




In [None]:
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f"mean_reward: {mean_reward} +/- {std_reward}")

mean_reward: 218.0 +/- 4.0


## ver ejecuccion

In [None]:
record_video(ENV_ID, model, video_length=4000, prefix="a2c-mspacman")

Saving video to c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4
MoviePy - Building video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4.
MoviePy - Writing video c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready c:\MBD_Repos\Pacman-ReinforcedLearning\videos\qrdqn-mspacman-step-0-to-step-1000.mp4




In [None]:
show_videos("../videos", prefix="a2c-msPacman")