## Codigo original

In [None]:
# 📌 INSTALAR DEPENDENCIAS EN GOOGLE COLAB
!sudo apt-get update --fix-missing
!sudo apt-get install -y xvfb ffmpeg
!pip install -U gym
!pip install pygame
!pip install keras
!pip install tensorflow
!pip install pyvirtualdisplay

# 📌 IMPORTAR LIBRERÍAS NECESARIAS
import numpy as np
import random
import cv2
import base64
from collections import deque
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
from IPython.display import HTML
import gym
from pyvirtualdisplay import Display

# 🎥 HABILITAR EL RENDERIZADO EN COLAB
display = Display(visible=0, size=(400, 300))
display.start()
print("¡Virtual Display iniciado correctamente!")

# 📌 REGISTRAR MÉTRICAS PARA TENSORBOARD
tensorboard_callback = TensorBoard(log_dir="./logs")

# 🔥 DEFINICIÓN DEL AGENTE DQL
class DQLAgent():
    def __init__(self, env):
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.95
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.memory = deque(maxlen=1000)
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Dense(48, input_dim=self.state_size, activation='tanh'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.uniform(0,1) <= self.epsilon:
            return env.action_space.sample()
        else:
            act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward if done else reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            train_target = self.model.predict(state, verbose=0)
            train_target[0][action] = target
            self.model.fit(state, train_target, verbose=0, callbacks=[tensorboard_callback])

    def adaptiveEGreedy(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# 📌 INICIALIZAR EL ENTORNO
env = gym.make('CartPole-v1', render_mode="rgb_array")

# 📌 ENTRENAMIENTO DEL AGENTE
if __name__ == "__main__":
    agent = DQLAgent(env)
    batch_size = 16
    episodes = 10

    for e in range(episodes):
        state = env.reset()
        if isinstance(state, tuple):
            state = state[0]
        state = np.reshape(state, [1, 4])
        time = 0

        while True:
            action = agent.act(state)
            next_state, reward, done, _, _ = env.step(action)
            next_state = np.reshape(next_state, [1, 4])
            agent.remember(state, action, reward, next_state, done)
            agent.replay(batch_size)
            agent.adaptiveEGreedy()
            state = next_state

            if done:
                print(f'Episode: {e}, Time: {time}')
                break
            time += 1

        if e % 5 == 0:
            agent.model.save('cartpole_dql.keras')

    print("Entrenamiento finalizado. Guardando modelo...")
    agent.model.save('cartpole_dql_final.keras')

# 🎥 GRABAR VIDEO DEL AGENTE
def record_video(env, agent, video_path="cartpole_video.mp4", frames=500):
    obs = env.reset()
    if isinstance(obs, tuple):
        obs = obs[0]
    obs = np.reshape(obs, [1, 4])

    frame_shape = (600, 400)
    out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, frame_shape)

    for _ in range(frames):
        frame = env.render()
        if frame is None:
            print("⚠️ Error: El frame renderizado es None.")
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        frame = cv2.resize(frame, frame_shape)
        out.write(frame)

        action = np.argmax(agent.model.predict(obs, verbose=0))
        obs, _, done, _, _ = env.step(action)
        obs = np.reshape(obs, [1, 4])

        if done:
            break

    out.release()
    env.close()
    print("🎥 Video guardado correctamente en", video_path)

# 📌 LLAMAR A LA FUNCIÓN PARA GRABAR EL VIDEO
record_video(env, agent, "cartpole_video.mp4")

# 📌 CONVERTIR Y MOSTRAR EL VIDEO EN GOOGLE COLAB
!ffmpeg -i cartpole_video.mp4 -vcodec libx264 cartpole_video_fixed.mp4

from IPython.display import HTML
import base64

def display_video(video_path):
    video_file = open(video_path, "rb").read()
    video_url = f"data:video/mp4;base64,{base64.b64encode(video_file).decode()}"
    return HTML(f'<video width="600" height="400" controls><source src="{video_url}" type="video/mp4"></video>')

# 📌 Mostrar el video corregido en Colab
display_video("cartpole_video_fixed.mp4")


Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:5 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:6 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:9 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [3,688 kB]
Get:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:11 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:12 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,321 kB]
Get:13 http://security.ubuntu.com/ub

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  if not isinstance(terminated, (bool, np.bool8)):


Episode: 0, Time: 44
Episode: 1, Time: 33
Episode: 2, Time: 14
Episode: 3, Time: 19
Episode: 4, Time: 10
Episode: 5, Time: 11
Episode: 6, Time: 11
Episode: 7, Time: 11
Episode: 8, Time: 7
Episode: 9, Time: 8
Entrenamiento finalizado. Guardando modelo...
🎥 Video guardado correctamente en cartpole_video.mp4
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenj

# Cart Pole mountain

In [1]:
# 📌 INSTALAR DEPENDENCIAS EN GOOGLE COLAB
!sudo apt-get update --fix-missing
!sudo apt-get install -y xvfb ffmpeg
!pip install -U gym
!pip install pygame
!pip install keras
!pip install tensorflow
!pip install pyvirtualdisplay

# 📌 IMPORTAR LIBRERÍAS NECESARIAS
import numpy as np
import random
import cv2
import base64
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
from IPython.display import HTML
import gym
from pyvirtualdisplay import Display

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
0% [Waiting for headers] [Waiting for headers] [1 InRelease 0 B/3,632 B 0%] [Co0% [Waiting for headers] [Waiting for headers] [Connected to r2u.stat.illinois.                                                                               Get:2 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,369 kB]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:10 http:/

In [None]:
# 🎥 HABILITAR EL RENDERIZADO EN COLAB
display = Display(visible=0, size=(400, 300))
display.start()
print("¡Virtual Display iniciado correctamente!")

# 📌 REGISTRAR MÉTRICAS PARA TENSORBOARD
tensorboard_callback = TensorBoard(log_dir="./logs")

# 🔥 DEFINICIÓN DEL AGENTE DQL
class DQLAgent():
    def __init__(self, env):
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.shape[0]  # Espacio de acción continuo
        self.gamma = 0.95
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.memory = deque(maxlen=1000)
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Dense(48, input_dim=self.state_size, activation='tanh'))
        model.add(Dense(self.action_size, activation='tanh'))  # Ajuste para salida continua
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if random.uniform(0, 1) <= self.epsilon:
            return np.array([random.uniform(env.action_space.low[0], env.action_space.high[0])])
        else:
            return self.model.predict(state, verbose=0)[0]

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward if done else reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            train_target = self.model.predict(state, verbose=0)
            train_target[0] = target  # Ajustado para espacio de acción continuo
            self.model.fit(state, train_target, verbose=0, callbacks=[tensorboard_callback])

    def adaptiveEGreedy(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# 📌 INICIALIZAR EL ENTORNO
env = gym.make('MountainCarContinuous-v0')

# 📌 ENTRENAMIENTO DEL AGENTE
if __name__ == "__main__":
    agent = DQLAgent(env)
    batch_size = 16
    episodes = 10

    for e in range(episodes):
        state = env.reset()
        if isinstance(state, tuple):
            state = state[0]
        state = np.reshape(state, [1, 2])  # Ajuste a (1,2)
        time = 0

        while True:
            action = agent.act(state)
            next_state, reward, done, _, _ = env.step(action)
            next_state = np.reshape(next_state, [1, 2])  # Ajuste a (1,2)
            agent.remember(state, action, reward, next_state, done)
            agent.replay(batch_size)
            agent.adaptiveEGreedy()
            state = next_state

            if done:
                print(f'Episode: {e}, Time: {time}')
                break
            time += 1

        if e % 5 == 0:
            agent.model.save('mountaincar_dql.keras')

    print("Entrenamiento finalizado. Guardando modelo...")
    agent.model.save('mountaincar_dql_final.keras')

# 🎥 GRABAR VIDEO DEL AGENTE
def record_video(env, agent, video_path="mountaincar_video.mp4", frames=500):
    obs = env.reset()
    if isinstance(obs, tuple):
        obs = obs[0]
    obs = np.reshape(obs, [1, 2])  # Ajuste a (1,2)

    frame_shape = (600, 400)
    out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, frame_shape)

    for _ in range(frames):
        frame = env.render()
        if frame is None:
            print("⚠️ Error: El frame renderizado es None.")
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        frame = cv2.resize(frame, frame_shape)
        out.write(frame)

        action = agent.act(obs)  # No usar np.argmax() en espacio continuo
        obs, _, done, _, _ = env.step(action)
        obs = np.reshape(obs, [1, 2])  # Ajuste a (1,2)

        if done:
            break

    out.release()
    env.close()
    print("🎥 Video guardado correctamente en", video_path)

# 📌 LLAMAR A LA FUNCIÓN PARA GRABAR EL VIDEO
record_video(env, agent, "mountaincar_video.mp4")

# 📌 CONVERTIR Y MOSTRAR EL VIDEO EN GOOGLE COLAB
!ffmpeg -i mountaincar_video.mp4 -vcodec libx264 mountaincar_video_fixed.mp4

def display_video(video_path):
    video_file = open(video_path, "rb").read()
    video_url = f"data:video/mp4;base64,{base64.b64encode(video_file).decode()}"
    return HTML(f'<video width="600" height="400" controls><source src="{video_url}" type="video/mp4"></video>')

# 📌 Mostrar el video corregido en Colab
display_video("mountaincar_video_fixed.mp4")


¡Virtual Display iniciado correctamente!


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  if not isinstance(terminated, (bool, np.bool8)):
