<a href="https://colab.research.google.com/github/eflores5000/APR/blob/main/proyecto_final_APR_V01_en_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1.2. Localizar entorno de trabajo: Google colab o local

In [2]:
# ATENCIÓN!! Modificar ruta relativa a la práctica si es distinta (drive_root)
mount='/content/gdrive'
drive_root = mount + "/My Drive/08_MIAR/actividades/proyecto practico"

try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False


## 1.3. Montar carpeta de datos local (solo Colab)

In [2]:
# Switch to the directory on the Google Drive that you want to use
import os
if IN_COLAB:
  print("We're running Colab")

  if IN_COLAB:
    # Mount the Google Drive at mount
    print("Colab: mounting Google drive on ", mount)

    drive.mount(mount)

    # Create drive_root if it doesn't exist
    create_drive_root = True
    if create_drive_root:
      print("\nColab: making sure ", drive_root, " exists.")
      os.makedirs(drive_root, exist_ok=True)

    # Change to the directory
    print("\nColab: Changing directory to ", drive_root)
    %cd $drive_root
# Verify we're in the correct working directory
%pwd
print("Archivos en el directorio: ")
print(os.listdir())

We're running Colab
Colab: mounting Google drive on  /content/gdrive
Mounted at /content/gdrive

Colab: making sure  /content/gdrive/My Drive/08_MIAR/actividades/proyecto practico  exists.

Colab: Changing directory to  /content/gdrive/My Drive/08_MIAR/actividades/proyecto practico
/content/gdrive/My Drive/08_MIAR/actividades/proyecto practico
Archivos en el directorio: 
['dqn_models']


## 1.4. Instalar librerías necesarias

In [3]:
if IN_COLAB:
  %pip install gym==0.17.3
  %pip install git+https://github.com/Kojoley/atari-py.git
  %pip install keras-rl2==1.0.5
  %pip install tensorflow==2.12  #2.8
else:
  %pip install gym==0.17.3
  %pip install git+https://github.com/Kojoley/atari-py.git
  %pip install pyglet==1.5.0
  %pip install h5py==3.1.0
  %pip install Pillow==9.5.0
  %pip install keras-rl2==1.0.5
  %pip install Keras==2.2.4
  %pip install tensorflow==2.5.3
  %pip install torch==2.0.1
  %pip install agents==1.4.0

Collecting git+https://github.com/Kojoley/atari-py.git
  Cloning https://github.com/Kojoley/atari-py.git to /tmp/pip-req-build-ist0yhvu
  Running command git clone --filter=blob:none --quiet https://github.com/Kojoley/atari-py.git /tmp/pip-req-build-ist0yhvu
  Resolved https://github.com/Kojoley/atari-py.git to commit 86a1e05c0a95e9e6233c3a413521fdb34ca8a089
  Preparing metadata (setup.py) ... [?25l[?25hdone


## **PARTE 3**. Desarrollo y preguntas
Importar librerías

In [25]:
from __future__ import division

from PIL import Image
import numpy as np
import gym

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
#from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

#### Configuración base

In [32]:
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

env_name = 'SpaceInvaders-v0'
env = gym.make(env_name)

np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

#--- nuevo -- ejfm
height, width, channels = env.observation_space.shape
obs = env.reset()
print(height, width, channels)
print("Forma de la observación:", obs.shape)  # Debe ser (height, width, channels)
env.unwrapped.get_action_meanings()

210 160 3
Forma de la observación: (210, 160, 3)


['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [43]:
class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')

    def process_state_batch(self, batch):

        # ejfm -- modificado
        # Agrega la dimensión de canales (axis=-1) si no está presente
        if batch.ndim == 4:                         # Forma: (batch_size, window_length, h, w)
            batch = np.expand_dims(batch, axis=-1)  # Nueva forma: (batch_size, window_length, h, w, 1)
            processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

## 1. EJFM Implementación de la red neuronal

In [52]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3, 84, 84, 1)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(nb_actions, activation='linear'))
    return model

In [58]:
model = build_model(84, 84, 1, nb_actions)
model.summary()

## 2. EJFM Implementación de la solución DQN

In [60]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [61]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg',
                   nb_actions=nb_actions, nb_steps_warmup=1000,
                   processor=AtariProcessor()
                  )
    return dqn

In [62]:
from keras.optimizers import Adam  # ¡Usar keras, NO tf.keras!

dqn = build_agent(model, nb_actions)
#dqn.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mse')
#dqn.compile(Adam(learning_rate=1e-4))
dqn.compile(Adam(lr=1e-4))

  super().__init__(name, **kwargs)


In [63]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...


  updates=self.state_updates,


  424/10000: episode: 1, duration: 4.282s, episode steps: 424, steps per second:  99, episode reward:  4.000, mean reward:  0.009 [ 0.000,  1.000], mean action: 2.460 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --


  updates=self.state_updates,


 1252/10000: episode: 2, duration: 86.471s, episode steps: 828, steps per second:  10, episode reward: 10.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 2.483 [0.000, 5.000],  loss: 0.006499, mean_q: 0.058046, mean_eps: 0.898660
 2277/10000: episode: 3, duration: 315.683s, episode steps: 1025, steps per second:   3, episode reward: 15.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 2.417 [0.000, 5.000],  loss: 0.004590, mean_q: 0.078690, mean_eps: 0.841240
 2908/10000: episode: 4, duration: 191.471s, episode steps: 631, steps per second:   3, episode reward: 10.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 2.393 [0.000, 5.000],  loss: 0.004924, mean_q: 0.114718, mean_eps: 0.766720
 3315/10000: episode: 5, duration: 124.817s, episode steps: 407, steps per second:   3, episode reward:  5.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 2.565 [0.000, 5.000],  loss: 0.003500, mean_q: 0.098987, mean_eps: 0.720010
 4284/10000: episode: 6, duration: 299.307s,

<keras.callbacks.History at 0x7fcfc7633090>

In [66]:
dqn.save_weights('dqn_01_weights.h5f')

In [67]:
# Testing part to calculate the mean reward
#weights_filename = 'dqn_{}_weights.h5f'.format(env_name)
weights_filename = 'dqn_01_weights.h5f'.format(env_name)
dqn.load_weights(weights_filename)
dqn.test(env, nb_episodes=10, visualize=False)

Testing for 10 episodes ...
Episode 1: reward: 3.000, steps: 515
Episode 2: reward: 8.000, steps: 686
Episode 3: reward: 5.000, steps: 518
Episode 4: reward: 8.000, steps: 639
Episode 5: reward: 19.000, steps: 1025
Episode 6: reward: 9.000, steps: 649
Episode 7: reward: 12.000, steps: 817
Episode 8: reward: 7.000, steps: 698
Episode 9: reward: 5.000, steps: 384
Episode 10: reward: 19.000, steps: 1135


<keras.callbacks.History at 0x7fcfc6fa5850>