<a href="https://colab.research.google.com/github/LuisIrigoyen/trabajo_final_RL/blob/pruebas_CarlosKong/proyecto_final_APR_V01_en_colab_ck_725000.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# MONTAJE EN GOOGLE DRIVE
mount='/content/gdrive'
drive_root = mount + "/My Drive/08_MIAR/actividades/proyecto practico"

try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

import os
if IN_COLAB:
  print("Montando Google Drive en", mount)
  drive.mount(mount)
  os.makedirs(drive_root, exist_ok=True)
  %cd $drive_root
%pwd

# INSTALACIÓN DE DEPENDENCIAS
%pip install gym==0.17.3
%pip install git+https://github.com/Kojoley/atari-py.git
%pip install keras-rl2==1.0.5
%pip install tensorflow==2.12
%pip install opencv-python

# IMPORTACIONES
import numpy as np
import gym
import cv2
#import tensorflow as tf
from PIL import Image

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

In [None]:
# ENTORNO
env = gym.make('SpaceInvaders-v0')  # Cambia si deseas: 'Breakout-v0'
nb_actions = env.action_space.n

# PREPROCESAMIENTO
class AtariProcessor(Processor):
    def process_observation(self, observation):
        img = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
        img = cv2.resize(img, (84, 84))
        return img.astype('uint8')

    def process_state_batch(self, batch):
        return batch.astype('float32') / 255.0

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

In [None]:
# MODELO DQN
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=(4, 84, 84)))
model.add(Convolution2D(32, (8, 8), strides=(4, 4), activation='relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2), activation='relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1), activation='relu'))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))

# MEMORIA Y POLICY
memory = SequentialMemory(limit=100000, window_length=4)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps',
                              value_max=1.0, value_min=0.1, value_test=0.05,
                              nb_steps=200000)

In [None]:
# AGENTE DQN
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory,
               nb_steps_warmup=50000, enable_double_dqn=True,
               enable_dueling_network=True, dueling_type='avg',
               target_model_update=10000, policy=policy,
               processor=AtariProcessor())

from keras.optimizers import Adam
dqn.compile(Adam(lr=0.00025), metrics=['mae'])

# CHECKPOINTS Y LOGGING
checkpoint_path = drive_root + '/dqn_weights_{step}.h5f'
weights_filename = drive_root + '/dqn_final_weights.h5f'

callbacks = [
    ModelIntervalCheckpoint(checkpoint_path, interval=25000),
    FileLogger(drive_root + '/dqn_log.json', interval=10000)
]

In [None]:
#dqn_weights_100000.h5f
#Continuación al 325000 + 100000 + 100000+100000+100000 = 825000

dqn.load_weights("dqn_weights_100000.h5f")
# 🔢 Paso de partida 3
starting_step = 100000 #ultimo weight actualizado
remaining_steps = 200000 - starting_step


# 🧩 Callback personalizado para continuar numeración de checkpoints
class OffsetModelCheckpoint(ModelIntervalCheckpoint):
    def __init__(self, filepath, interval, offset):
        super().__init__(filepath, interval)
        self.offset = offset

    def on_step_end(self, step, logs={}):
        # Ajusta el número de paso en el nombre del archivo
        self.step = step + self.offset
        super().on_step_end(step, logs)

# 🚀 Entrenamiento con pasos continuados desde 100000
dqn.fit(env, nb_steps=remaining_steps, visualize=False, verbose=2,
        callbacks=[
            FileLogger("dqn_log_continuacion.json", interval=10000),
            OffsetModelCheckpoint("dqn_weights_{step}.h5f", interval=25000, offset=starting_step)
        ])