<a href="https://colab.research.google.com/github/LuisIrigoyen/trabajo_final_RL/blob/main/proyecto_final_APR_V08_en_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ATENCIÓN!! Modificar ruta relativa a la práctica si es distinta (drive_root)
mount='/content/gdrive'
drive_root = mount + "/My Drive/08_MIAR/actividades/proyecto practico"

try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

In [2]:
# Switch to the directory on the Google Drive that you want to use
import os
if IN_COLAB:
  print("We're running Colab")

  if IN_COLAB:
    # Mount the Google Drive at mount
    print("Colab: mounting Google drive on ", mount)

    drive.mount(mount)

    # Create drive_root if it doesn't exist
    create_drive_root = True
    if create_drive_root:
      print("\nColab: making sure ", drive_root, " exists.")
      os.makedirs(drive_root, exist_ok=True)

    # Change to the directory
    print("\nColab: Changing directory to ", drive_root)
    %cd $drive_root
# Verify we're in the correct working directory
%pwd
print("Archivos en el directorio: ")
print(os.listdir())

We're running Colab
Colab: mounting Google drive on  /content/gdrive
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).

Colab: making sure  /content/gdrive/My Drive/08_MIAR/actividades/proyecto practico  exists.

Colab: Changing directory to  /content/gdrive/My Drive/08_MIAR/actividades/proyecto practico
/content/gdrive/My Drive/08_MIAR/actividades/proyecto practico
Archivos en el directorio: 
['dqn_models', 'checkpoints', 'dqn_05_weights.h5f.data-00000-of-00001', 'dqn_05_weights.h5f.index', 'dqn_06_weights.h5f.index', 'dqn_06_weights.h5f.data-00000-of-00001', 'dqn_07_weights.h5f.index', 'checkpoint', 'dqn_07_weights.h5f.data-00000-of-00001']


In [3]:
if IN_COLAB:
  %pip install gym==0.17.3
  %pip install git+https://github.com/Kojoley/atari-py.git
  %pip install keras-rl2==1.0.5
  %pip install tensorflow==2.12  #2.8
else:
  %pip install gym==0.17.3
  %pip install git+https://github.com/Kojoley/atari-py.git
  %pip install pyglet==1.5.0
  %pip install h5py==3.1.0
  %pip install Pillow==9.5.0
  %pip install keras-rl3
  %pip install Keras==2.2.4
  %pip install tensorflow==2.10.1
  %pip install torch==2.0.1
  %pip install agents==1.4.0

Collecting git+https://github.com/Kojoley/atari-py.git
  Cloning https://github.com/Kojoley/atari-py.git to /tmp/pip-req-build-zoumysri
  Running command git clone --filter=blob:none --quiet https://github.com/Kojoley/atari-py.git /tmp/pip-req-build-zoumysri
  Resolved https://github.com/Kojoley/atari-py.git to commit 86a1e05c0a95e9e6233c3a413521fdb34ca8a089
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [4]:
from __future__ import division

from PIL import Image
import numpy as np
import gym

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, BatchNormalization
#from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

In [5]:
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

env_name = 'SpaceInvaders-v0'
env = gym.make(env_name)

np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

#--- nuevo -- ejfm
height, width, channels = env.observation_space.shape
obs = env.reset()
print(height, width, channels)
print("Forma de la observación:", obs.shape)  # Debe ser (height, width, channels)
env.unwrapped.get_action_meanings()

210 160 3
Forma de la observación: (210, 160, 3)


['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')

    def process_state_batch(self, batch):

        # ejfm -- modificado
        # Agrega la dimensión de canales (axis=-1) si no está presente
        if batch.ndim == 4:                         # Forma: (batch_size, window_length, h, w)
            batch = np.expand_dims(batch, axis=-1)  # Nueva forma: (batch_size, window_length, h, w, 1)
            processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

## 1. EJFM Implementación de la red neuronal

In [7]:
# Habilita estas opciones ANTES de crear el modelo
import tensorflow as tf

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    tf.config.optimizer.set_jit(True)  # Habilita XLA para aceleración

# Aceleración XLA (compilación optimizada)
tf.config.optimizer.set_jit(True)

In [8]:
def build_model(height, width, channels, actions):
    model = Sequential()

    model.add(Convolution2D(16, (3, 3), strides=2, padding='same', input_shape=(4,84,84,1)))
    model.add(Activation('relu'))

    model.add(Convolution2D(32, (3, 3), strides=2, padding='same'))
    model.add(Activation('relu'))

    model.add(Convolution2D(64, (3, 3), strides=2, padding='same'))
    model.add(Activation('relu'))

    model.add(Flatten())

    model.add(Dense(516))
    model.add(Activation('relu'))

    # Capa de salida
    model.add(Dense(actions, activation='linear'))

    return model

model = build_model(84, 84, 1, nb_actions)
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 4, 42, 42, 16)     160       
                                                                 
 activation (Activation)     (None, 4, 42, 42, 16)     0         
                                                                 
 conv2d_1 (Conv2D)           (None, 4, 21, 21, 32)     4640      
                                                                 
 activation_1 (Activation)   (None, 4, 21, 21, 32)     0         
                                                                 
 conv2d_2 (Conv2D)           (None, 4, 11, 11, 64)     18496     
                                                                 
 activation_2 (Activation)   (None, 4, 11, 11, 64)     0         
                                                                 
 flatten (Flatten)           (None, 30976)             0

## 2. EJFM Implementación de la solución DQN

In [9]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from keras.optimizers import Adam

def build_agent(model, actions):
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.0,
        value_min=0.2,
        value_test=0.1,
        nb_steps=200000)

    memory = SequentialMemory(limit=200000, window_length=4)

    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   nb_actions=nb_actions,
                   batch_size=32,
                   target_model_update=10000,
                   processor=AtariProcessor(),
                   nb_steps_warmup=20000
                  )
    return dqn

dqn = build_agent(model, nb_actions)
dqn.compile(Adam(learning_rate=0.0005, epsilon=1e-4))

In [10]:
from rl.callbacks import ModelIntervalCheckpoint
import os

# Guardar el modelo cada 10,000 pasos
checkpoint_callback = ModelIntervalCheckpoint(
    filepath='checkpoints/dqn_v08_weights_{step}.h5f',
    interval=10000,
    verbose=1
)

os.makedirs("checkpoints", exist_ok=True)

In [None]:
from datetime import datetime

print(f"⏱ Tiempo de inicio: {datetime.now().strftime('%H:%M:%S')}")

dqn.fit(env, nb_steps=200000, visualize=False, verbose=2,callbacks=[checkpoint_callback])

print(f"⏱ Tiempo de fin: {datetime.now().strftime('%H:%M:%S')}")

# Guardando modelo
dqn.save_weights('dqn_08_weights.h5f')

⏱ Tiempo de inicio: 14:34:34
Training for 200000 steps ...


  updates=self.state_updates,


    553/200000: episode: 1, duration: 3.255s, episode steps: 553, steps per second: 170, episode reward:  6.000, mean reward:  0.011 [ 0.000,  1.000], mean action: 2.508 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
   1255/200000: episode: 2, duration: 3.921s, episode steps: 702, steps per second: 179, episode reward: 15.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 2.389 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
   1877/200000: episode: 3, duration: 3.550s, episode steps: 622, steps per second: 175, episode reward:  6.000, mean reward:  0.010 [ 0.000,  1.000], mean action: 2.423 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
   2445/200000: episode: 4, duration: 3.159s, episode steps: 568, steps per second: 180, episode reward:  7.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 2.366 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
   3032/200000: episode: 5, duration: 3.270s, episode steps: 587, steps per second: 179, episode reward:  1.000,

  updates=self.state_updates,


  20001/200000: episode: 31, duration: 5.316s, episode steps: 657, steps per second: 124, episode reward:  9.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 2.458 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
  20624/200000: episode: 32, duration: 93.150s, episode steps: 623, steps per second:   7, episode reward:  6.000, mean reward:  0.010 [ 0.000,  1.000], mean action: 2.478 [0.000, 5.000],  loss: 0.006396, mean_q: 0.025591, mean_eps: 0.918752
  21009/200000: episode: 33, duration: 57.350s, episode steps: 385, steps per second:   7, episode reward:  5.000, mean reward:  0.013 [ 0.000,  1.000], mean action: 2.551 [0.000, 5.000],  loss: 0.006563, mean_q: 0.029520, mean_eps: 0.916736
  21537/200000: episode: 34, duration: 79.230s, episode steps: 528, steps per second:   7, episode reward:  8.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 2.377 [0.000, 5.000],  loss: 0.006385, mean_q: 0.028865, mean_eps: 0.914910
  22214/200000: episode: 35, duration: 101.327s, ep

In [None]:
# Testing part to calculate the mean reward
#weights_filename = 'dqn_{}_weights.h5f'.format(env_name)
weights_filename = 'dqn_08_weights.h5f'.format(env_name)
dqn.load_weights(weights_filename)
dqn.test(env, nb_episodes=10, visualize=False)

In [None]:
from google.colab import drive
import shutil
import glob  # <-- ¡Falta este import!
import os

# 1. Montar Google Drive
#drive.mount('/content/drive')

# 2. Crear directorio en Drive si no existe
os.makedirs('/content/gdrive/MyDrive/Colab_Outputs', exist_ok=True)

# 3. Buscar y copiar todos los archivos coincidentes
archivos = glob.glob('dqn_08_weights.h5f.*')  # Encuentra todos los archivos con ese patrón

for archivo in archivos:
    shutil.copy(
        archivo,
        f'/content/gdrive/MyDrive/Colab_Outputs/{os.path.basename(archivo)}'
    )
    print(f"¡Archivo {archivo} copiado a Drive!")

# 4. Buscar y copiar todos los archivos coincidentes
archivos = glob.glob('checkpoints/dqn_v08_weights*')  # Encuentra todos los archivos con ese patrón

for archivo in archivos:
    shutil.copy(
        archivo,
        f'/content/gdrive/MyDrive/Colab_Outputs/{os.path.basename(archivo)}'
    )

print("✅ ¡Todos los archivos copiados!")