In [1]:
# Imports básicos y versiones
import sys
import os
import torch
import gymnasium as gym
import numpy as np
from src.dqn import DQN
from src.replay import ReplayBuffer
from src.agent import DQNAgent
from src.utils import preprocess_frame, stack_frames
import ale_py
import shimmy



print('Python', sys.version.split()[0])
print('torch', torch.__version__)
print('gymnasium', getattr(gym, '__version__', 'n/a'))
print('numpy', np.__version__)

Python 3.10.0
torch 2.9.1+cpu
gymnasium 1.2.2
numpy 2.2.6


In [2]:
# Prueba: forward pass de la red DQN
input_shape = (4, 84, 84)
n_actions = 6
model = DQN(input_shape, n_actions)
x = torch.rand(1, *input_shape)
out = model(x)
print('DQN output shape:', out.shape)

DQN output shape: torch.Size([1, 6])


In [3]:
# Prueba: ReplayBuffer push y sample
buf = ReplayBuffer(capacity=1000, device='cpu')
state = np.zeros((4,84,84), dtype=np.float32)
for i in range(64):
    buf.push(state, i % n_actions, float(i), state, 0.0)
print('Buffer length (expected 64):', len(buf))
s,a,r,s2,d = buf.sample(32)
print('Sample shapes:', s.shape, a.shape, r.shape, s2.shape, d.shape)

Buffer length (expected 64): 64
Sample shapes: torch.Size([32, 4, 84, 84]) torch.Size([32]) torch.Size([32]) torch.Size([32, 4, 84, 84]) torch.Size([32])


In [4]:
# Prueba: preprocess_frame y stack_frames con dummy frame
import numpy as np
from collections import deque
dummy = (np.random.rand(210,160,3) * 255).astype(np.uint8)
p = preprocess_frame(dummy)
frames = deque(maxlen=4)
stack = stack_frames(frames, p, True)
print('Processed frame shape (84,84):', p.shape)
print('Stack shape (4,84,84):', stack.shape)

Processed frame shape (84,84): (84, 84)
Stack shape (4,84,84): (4, 84, 84)


In [5]:
# Prueba: DQNAgent select_action y train_step (llenando memoria)
agent = DQNAgent((4,84,84), n_actions, device='cpu')
act = agent.select_action(stack, training=True)
print('Muestra de acción (train mode):', act)
# llenar la memoria con transiciones repetidas para permitir train_step
for i in range(200):
    agent.memory.push(stack, i % n_actions, 0.0, stack, 0.0)
# llamar train_step hasta que devuelva un valor no-None (puede necesitar varios intentos)
loss = None
for _ in range(10):
    loss = agent.train_step()
    if loss is not None:
        break
print('Train step result (None ok):', loss)

Muestra de acción (train mode): 1
Train step result (None ok): 0.007989153265953064


In [10]:
# Prueba: save y load del agente
tmp_path = 'tmp_test_model.pth'
agent.save(tmp_path, extra={'note':'test'})
agent2 = DQNAgent((4,84,84), n_actions, device='cpu')
agent2.load(tmp_path, map_location='cpu')
print('Loaded agent epsilon:', agent2.epsilon)
import os
os.remove(tmp_path)
print('Temp model saved and removed successfully')

Loaded agent epsilon: 1.0
Temp model saved and removed successfully


In [11]:
# Intento de crear el entorno ALE/Galaxian-v5 (si está instalado y con ROMs)
try:
    env = gym.make('ALE/Galaxian-v5', render_mode='rgb_array')
    print('ALE env creado. Action space:', env.action_space)
    env.close()
except Exception as e:
    print('No se pudo crear ALE/Galaxian-v5. Asegúrate de tener ale-py, shimmy y ROMs instaladas. Error:')
    print(e)

ALE env creado. Action space: Discrete(6)


## Instrucciones para ejecutar localmente
Si quieres ejecutar el notebook desde PowerShell: 
1) Instala dependencias (si no lo hiciste):
```powershell
pip install -r requirements.txt
```
2) Abre el notebook con Jupyter o ejecútalo en línea de comandos:
```powershell
jupyter notebook test_rl_galaxian.ipynb
# o para ejecutar todas las celdas sin abrir UI:
jupyter nbconvert --to notebook --execute test_rl_galaxian.ipynb --output executed_test_rl_galaxian.ipynb
```
Nota: las pruebas que usan `ALE/Galaxian-v5` requieren que `ale-py` y los ROMs de Atari estén presentes; si no, verás un mensaje indicando el error.