In [None]:
# RL - DQN con Keras y Gymnasium
################################
# Este cuaderno Jupyter presenta una implementación del algoritmo Deep Q-Network (DQN) aplicadoaplicado en entornos de la librería Gymnasium. 
# Utilizando la librería Keras para construir y entrenar una red neuronal, exploramos cómo un agente aprender de las recompensas del entorno.
# Creado por: [@MrCabss69]
# Fecha de creación: Thu Mar 11 2024


In [None]:
import random
from collections import deque
import gymnasium as gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [None]:
# Constantes de configuración
GAMMA = 0.99
LEARNING_RATE = 0.001

In [None]:
class ExperienceReplayBuffer:
    """Gestiona un buffer de experiencias para el aprendizaje por refuerzo."""

    def __init__(self, capacity=1000):
        self.buffer = deque(maxlen=capacity)

    def add(self, experience):
        """Añade una experiencia al buffer."""
        self.buffer.append(experience)

    def sample(self, batch_size):
        """Devuelve una muestra aleatoria de experiencias del buffer."""
        batch_size = min(batch_size, len(self.buffer))
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        """Devuelve el tamaño actual del buffer."""
        return len(self.buffer)

In [None]:
def create_model(input_shape, num_actions):
    """Construye y devuelve un modelo de red neuronal para el agente DQN."""
    model = Sequential([
        Dense(512, activation='relu', input_shape=input_shape),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='mse')
    return model

def epsilon_greedy(state, model, epsilon=0.1):
    """Selecciona una acción usando la política ε-greedy."""
    if np.random.rand() < epsilon:
        return np.random.randint(model.output_shape[-1])
    else:
        q_values = model.predict(state[np.newaxis])
        return np.argmax(q_values[0])

In [None]:
def train_model(model, target_model, buffer, env, episodes=1000, batch_size=64, update_freq=10):
    """Entrena el modelo del agente DQN."""
    for episode in range(episodes):
        state, _ = env.reset()
        done = False
        score = 0

        while not done:
            action = epsilon_greedy(state, model)
            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            score += reward
            buffer.add((state, action, reward, next_state, done))
            state = next_state

            if len(buffer) >= batch_size:
                samples = buffer.sample(batch_size)
                states, actions, rewards, next_states, dones = map(np.array, zip(*samples))
                q_values_next = target_model.predict(next_states)
                q_values_next[dones] = 0
                targets = rewards + GAMMA * np.amax(q_values_next, axis=1)
                q_values = model.predict(states)
                q_values[range(batch_size), actions] = targets
                model.fit(states, q_values, epochs=1, verbose=0)

            if episode % update_freq == 0:
                target_model.set_weights(model.get_weights())

        print(f"Episode: {episode + 1}/{episodes}, Score: {score}")

In [None]:
env = gym.make('CartPole-v1')
num_actions = env.action_space.n
state_shape = env.observation_space.shape

model = create_model((state_shape[0],), num_actions)
target_model = create_model((state_shape[0],), num_actions)
buffer = ExperienceReplayBuffer(capacity=1000)

In [None]:
train_model(model, target_model, buffer, env)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


def plot_weights(model, layer_index, annot=True, title="Weights Visualization"):
    weights = model.layers[layer_index].get_weights()[0]
    plt.figure(figsize=(10, 10))
    sns.heatmap(weights, annot=annot, fmt=".2f", cmap='viridis')
    plt.title(title)
    plt.show()

In [None]:
plot_weights(model,0)

In [None]:
model.save('/trained/dqn_1k.h5')