In [None]:
!pip install ale

Collecting ale
  Downloading Ale-0.8.4.tar.gz (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.4/53.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ale
  Building wheel for ale (setup.py) ... [?25l[?25hdone
  Created wheel for ale: filename=Ale-0.8.4-py3-none-any.whl size=70154 sha256=a51d2573ec74dc782a672ef2a958de43cbd30499006aeaf715c9279e39769410
  Stored in directory: /root/.cache/pip/wheels/2d/01/eb/61d0ee426a7f13c4d898c01b266ab2fbecf9ba0cb87e53df21
Successfully built ale
Installing collected packages: ale
Successfully installed ale-0.8.4


In [None]:
!pip install shimmy

Collecting shimmy
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Downloading Shimmy-2.0.0-py3-none-any.whl (30 kB)
Installing collected packages: shimmy
Successfully installed shimmy-2.0.0


# Preguntas del laboratorio


**Objetivos y mecánicas del juego:**


Basicamente Galaxian es un juego donde una nave debe dispararle a otras para ganar puntos. El objetivo del juego es matar a todas las naves posibles a lo largo de 3 vidas. Las naves enemigas disparan y pueden hacer ataques suicida por asi decirlo. El exito se mide en el puntaje.

**Definición del estado del entorno:**

El entorno ALE/Galaxian-v5 puede generar tres tipos de observación:

1. RGB: Representa el entorno con imágenes a color de tamaño (210, 160, 3) píxeles.
2. Grayscale: Versión en escala de grises de las imágenes RGB, con tamaño (210, 160).
3. RAM: Estado comprimido de la memoria del Atari, representado como un vector de 128 valores enteros (0–255).





**Acciones disponibles**

El espacio tiene 6 acciones.

1. NOOP: No realiza ninguna acción.
2. FIRE: Dispara un proyectil hacia arriba.
3. RIGHT: Mueve la nave hacia la derecha.
4. LEFT: Mueve la nave hacia la izquierda.
5. RIGHTFIRE: Mueve a la derecha y dispara simultáneamente.
6. LEFTFIRE: Mueve a la izquierda y dispara simultáneamente.



In [None]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
from datetime import datetime
import cv2
import os
import ale_py
import shimmy
import matplotlib.pyplot as plt


class DQN(nn.Module):
    """Red Neuronal Convolucional para Deep Q-Learning"""

    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU()
        )

        conv_out_size = self._get_conv_output(input_shape)

        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions)
        )

    def _get_conv_output(self, shape):
        with torch.no_grad():
            dummy_input = torch.zeros(1, *shape)
            output = self.conv(dummy_input)
            return int(np.prod(output.size()))

    def forward(self, x):
        conv_out = self.conv(x)
        conv_out = conv_out.view(conv_out.size(0), -1)
        return self.fc(conv_out)


class ReplayBuffer:
    """Memoria de experiencias para entrenamiento DQN"""

    def __init__(self, capacity=50000, device="cpu"):
        self.buffer = deque(maxlen=capacity)
        self.device = device

    def push(self, state, action, reward, next_state, done):
        if len(self.buffer) == self.buffer.maxlen:
            self.buffer.pop()
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size=32):
        transitions = random.sample(self.buffer, batch_size)
        batch_state, batch_action, batch_reward, batch_next_state, batch_done = zip(*transitions)

        batch_state = torch.FloatTensor(np.array(batch_state)).to(self.device)
        batch_action = torch.LongTensor(batch_action).to(self.device)
        batch_reward = torch.FloatTensor(batch_reward).to(self.device)
        batch_next_state = torch.FloatTensor(np.array(batch_next_state)).to(self.device)
        batch_done = torch.FloatTensor(batch_done).to(self.device)

        return batch_state, batch_action, batch_reward, batch_next_state, batch_done

    def __len__(self):
        return len(self.buffer)


class DQNAgent:
    """Agente DQN para jugar Galaxian"""

    def __init__(self, state_shape, n_actions, learning_rate=0.00025, gamma=0.99,
                 epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.995, replay_buffer_capacity=100000):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.n_actions = n_actions
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay

        self.policy_net = DQN(state_shape, n_actions).to(self.device)
        self.target_net = DQN(state_shape, n_actions).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        self.memory = ReplayBuffer(replay_buffer_capacity, device=self.device)

        # Contador para entrenar menos frecuentemente
        self.train_counter = 0

    def select_action(self, state, training=True):
        if training and random.random() < self.epsilon:
            return random.randrange(self.n_actions)

        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            q_values = self.policy_net(state_tensor)
            return q_values.max(1)[1].item()

    def train_step(self, batch_size=32):
        if len(self.memory) < batch_size:
            return None

        # Solo entrenar cada 4 pasos para estabilidad
        self.train_counter += 1
        if self.train_counter % 4 != 0:
            return None

        batch_state, batch_action, batch_reward, batch_next_state, batch_done = self.memory.sample(batch_size)

        # Clip de recompensas para estabilidad
        batch_reward = torch.clamp(batch_reward, -1, 1)

        current_q = self.policy_net(batch_state).gather(1, batch_action.unsqueeze(1))

        with torch.no_grad():
            next_q = self.target_net(batch_next_state).max(1)[0]
            target_q = batch_reward + (1 - batch_done) * self.gamma * next_q

        loss = nn.SmoothL1Loss()(current_q.squeeze(), target_q)

        self.optimizer.zero_grad()
        loss.backward()
        # Gradient clipping para evitar explosión de gradientes
        torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 10.0)
        self.optimizer.step()

        return loss.item()

    def update_target_network(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def decay_epsilon(self):
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)

    def save(self, filepath):
        torch.save({
            'policy_net_state_dict': self.policy_net.state_dict(),
            'target_net_state_dict': self.target_net.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'epsilon': self.epsilon
        }, filepath)

    def load(self, filepath):
        checkpoint = torch.load(filepath, map_location=self.device, weights_only=False)
        self.policy_net.load_state_dict(checkpoint['policy_net_state_dict'])
        self.target_net.load_state_dict(checkpoint['target_net_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.epsilon = checkpoint['epsilon']


def preprocess_frame(frame):
    """Preprocesa un frame del juego para la red neuronal"""
    gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    resized = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_AREA)
    normalized = resized.astype(np.float32) / 255.0
    return normalized


def stack_frames(frames, new_frame, is_new_episode):
    """Apila 4 frames consecutivos para dar noción de movimiento"""
    if is_new_episode:
        frames.clear()
        for _ in range(4):
            frames.append(new_frame)
    else:
        frames.append(new_frame)

    return np.stack(frames, axis=0)


def plot_training_metrics(rewards, losses, durations, email):
    """Genera gráficas de las métricas de entrenamiento"""
    email_prefix = email.split("@")[0]

    fig, axes = plt.subplots(3, 1, figsize=(12, 10))
    fig.suptitle('Métricas de Entrenamiento DQN - Galaxian', fontsize=16, fontweight='bold')

    # Gráfica de recompensas
    axes[0].plot(rewards, color='#2E86AB', linewidth=1.5, alpha=0.7)
    axes[0].set_xlabel('Episodio', fontsize=11)
    axes[0].set_ylabel('Recompensa Total', fontsize=11)
    axes[0].set_title('Recompensa por Episodio', fontsize=12, fontweight='bold')
    axes[0].grid(True, alpha=0.3)

    # Media móvil de recompensas (últimos 10 episodios)
    if len(rewards) >= 10:
        moving_avg = np.convolve(rewards, np.ones(10)/10, mode='valid')
        axes[0].plot(range(9, len(rewards)), moving_avg, color='#A23B72',
                     linewidth=2, label='Media Móvil (10 episodios)')
        axes[0].legend()

    # Gráfica de pérdidas
    if losses:
        axes[1].plot(losses, color='#F18F01', linewidth=1, alpha=0.7)
        axes[1].set_xlabel('Episodio', fontsize=11)
        axes[1].set_ylabel('Pérdida Promedio', fontsize=11)
        axes[1].set_title('Pérdida de la Red Neuronal', fontsize=12, fontweight='bold')
        axes[1].grid(True, alpha=0.3)

        # Media móvil de pérdidas
        if len(losses) >= 10:
            loss_moving_avg = np.convolve(losses, np.ones(10)/10, mode='valid')
            axes[1].plot(range(9, len(losses)), loss_moving_avg, color='#C73E1D',
                        linewidth=2, label='Media Móvil (10 episodios)')
            axes[1].legend()

    # Gráfica de duración de episodios
    axes[2].plot(durations, color='#6A994E', linewidth=1.5, alpha=0.7)
    axes[2].set_xlabel('Episodio', fontsize=11)
    axes[2].set_ylabel('Duración (pasos)', fontsize=11)
    axes[2].set_title('Duración del Episodio', fontsize=12, fontweight='bold')
    axes[2].grid(True, alpha=0.3)

    # Media móvil de duraciones
    if len(durations) >= 10:
        duration_moving_avg = np.convolve(durations, np.ones(10)/10, mode='valid')
        axes[2].plot(range(9, len(durations)), duration_moving_avg, color='#386641',
                     linewidth=2, label='Media Móvil (10 episodios)')
        axes[2].legend()

    plt.tight_layout()

    # Guardar gráfica
    plot_filename = f'training_metrics_{email_prefix}.png'
    plt.savefig(plot_filename, dpi=150, bbox_inches='tight')
    print(f"Gráfica guardada: {plot_filename}")
    plt.close()


def record_episode(policy, email="estudiante@uvg.edu.gt", output_dir="videos"):
    """Graba un episodio completo usando la política proporcionada"""
    os.makedirs(output_dir, exist_ok=True)

    env = gym.make('ALE/Galaxian-v5', render_mode='rgb_array')

    state, info = env.reset()
    done = False
    truncated = False
    total_reward = 0
    frames = []

    stacked_frames = deque(maxlen=4)
    processed_frame = preprocess_frame(state)
    state_stack = stack_frames(stacked_frames, processed_frame, True)

    print("Iniciando grabación del episodio...")

    while not (done or truncated):
        frame = env.render()
        frames.append(frame)

        action = policy.select_action(state_stack, training=False)

        next_state, reward, done, truncated, info = env.step(action)
        total_reward += reward

        processed_frame = preprocess_frame(next_state)
        state_stack = stack_frames(stacked_frames, processed_frame, False)

    env.close()

    timestamp = datetime.now().strftime("%Y%m%d%H%M")
    email_prefix = email.split('@')[0]
    score = int(total_reward)
    filename = f"{email_prefix}_{timestamp}_{score}.mp4"
    filepath = os.path.join(output_dir, filename)

    print(f"Guardando video: {filepath}")
    print(f"Puntuación obtenida: {score}")
    print(f"Total de frames: {len(frames)}")

    if len(frames) > 0:
        height, width = frames[0].shape[:2]
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(filepath, fourcc, 30.0, (width, height))

        for frame in frames:
            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            out.write(frame_bgr)

        out.release()
        print(f"Video guardado exitosamente: {filepath}")
    else:
        print("No frames to save for the video.")

    return filepath


def train_agent(episodes=15, email="estudiante@uvg.edu.gt", checkpoint_path=None):
    env = gym.make('ALE/Galaxian-v5')

    state_shape = (4, 84, 84)
    n_actions = env.action_space.n

    agent = DQNAgent(state_shape, n_actions)

    stacked_frames = deque(maxlen=4)
    update_target_frequency = 10000  # Actualizar target network menos frecuentemente
    steps = 0
    episode_start = 0

    # Listas para almacenar métricas
    episode_rewards = []
    episode_losses = []
    episode_durations = []

    if checkpoint_path and os.path.exists(checkpoint_path):
        print(f"Cargando checkpoint desde: {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path, map_location=agent.device, weights_only=False)

        agent.policy_net.load_state_dict(checkpoint['policy_net_state_dict'])
        agent.target_net.load_state_dict(checkpoint['target_net_state_dict'])
        agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        agent.epsilon = checkpoint['epsilon']
        episode_start = checkpoint.get('episode', 0)
        steps = checkpoint.get('steps', 0)

        # Cargar métricas previas si existen
        episode_rewards = checkpoint.get('episode_rewards', [])
        episode_losses = checkpoint.get('episode_losses', [])
        episode_durations = checkpoint.get('episode_durations', [])

        # Reiniciar el replay buffer (en lugar de cargarlo)
        agent.memory = ReplayBuffer(capacity=100000, device=agent.device)

        print(f"Checkpoint cargado. Continuando desde episodio {episode_start}")
        print(f"Epsilon actual: {agent.epsilon:.3f}")

    print(f"\nIniciando entrenamiento por {episodes} episodios...")
    print(f"Dispositivo: {agent.device}")
    print(f"Acciones disponibles: {n_actions}")

    for episode in range(episode_start, episode_start + episodes):
        state, _ = env.reset()
        processed_frame = preprocess_frame(state)
        state_stack = stack_frames(stacked_frames, processed_frame, True)

        episode_reward = 0
        episode_loss_sum = 0
        episode_loss_count = 0
        episode_duration = 0
        done = False
        truncated = False

        while not (done or truncated):
            action = agent.select_action(state_stack, training=True)
            next_state, reward, done, truncated, _ = env.step(action)

            processed_frame = preprocess_frame(next_state)
            next_state_stack = stack_frames(stacked_frames, processed_frame, False)

            # Clip de recompensas para estabilidad
            clipped_reward = np.clip(reward, -1, 1)

            agent.memory.push(state_stack, action, clipped_reward, next_state_stack, done)

            loss = agent.train_step()
            if loss is not None:
                episode_loss_sum += loss
                episode_loss_count += 1

            episode_reward += reward
            episode_duration += 1
            state_stack = next_state_stack
            steps += 1

            if steps % update_target_frequency == 0:
                agent.update_target_network()
                print(f"  >> Target network actualizada en paso {steps}")

        agent.decay_epsilon()

        # Guardar métricas del episodio
        episode_rewards.append(episode_reward)
        episode_durations.append(episode_duration)

        avg_loss = episode_loss_sum / episode_loss_count if episode_loss_count > 0 else 0
        episode_losses.append(avg_loss)

        print(f"Episodio {episode + 1}/{episode_start + episodes} | "
              f"Recompensa: {episode_reward:.2f} | "
              f"Duración: {episode_duration} pasos | "
              f"Pérdida: {avg_loss:.4f} | "
              f"Epsilon: {agent.epsilon:.3f} | "
              f"Buffer: {len(agent.memory)}")

    # Guardar modelo final (último modelo)
    email_prefix = email.split("@")[0]
    final_model_path = f'final_model_{email_prefix}.pth'

    torch.save({
        'policy_net_state_dict': agent.policy_net.state_dict(),
        'target_net_state_dict': agent.target_net.state_dict(),
        'optimizer_state_dict': agent.optimizer.state_dict(),
        'epsilon': agent.epsilon,
        'episode': episode_start + episodes,
        'steps': steps,
        'replay_buffer': agent.memory.buffer,  # Puedes elegir no guardar el buffer aquí si no lo quieres
    }, final_model_path)

    print(f"\nModelo final guardado: {final_model_path}")

    # Generar gráficas
    print("\nGenerando gráficas de métricas...")
    plot_training_metrics(episode_rewards, episode_losses, episode_durations, email)

    # Grabar video al final del entrenamiento
    print(f"\nGrabando video del modelo entrenado...")
    record_episode(agent, email)

    env.close()
    print("\n=== Entrenamiento completado ===")
    print(f"Episodios totales: {episode_start + episodes}")
    print(f"Recompensa final: {episode_rewards[-1]:.2f}")
    print(f"Recompensa promedio: {np.mean(episode_rewards):.2f}")
    print(f"Epsilon final: {agent.epsilon:.3f}")

    return agent


## ENTRENAMIENTO INICIAL (15 EPISODIOS)

In [2]:
EMAIL = "estudiante@uvg.edu.gt"

# Número de episodios para entrenar
EPISODES = 100

print("="*70)
print("ENTRENAMIENTO INICIAL - DQN Galaxian")
print("="*70)

# Entrenar desde cero (sin checkpoint)
agent = train_agent(
episodes=EPISODES,
email=EMAIL,
checkpoint_path=None  # None = entrenamiento desde cero
)

print("\n¡Entrenamiento inicial completado!")
print(f"Modelo guardado como: final_model_{EMAIL.split('@')[0]}.pth")
print(f"Gráficas guardadas como: training_metrics_{EMAIL.split('@')[0]}.png")


ENTRENAMIENTO INICIAL - DQN Galaxian

Iniciando entrenamiento por 100 episodios...
Dispositivo: cpu
Acciones disponibles: 6
Episodio 1/100 | Recompensa: 660.00 | Duración: 511 pasos | Pérdida: 0.0163 | Epsilon: 0.995 | Buffer: 511
Episodio 2/100 | Recompensa: 730.00 | Duración: 497 pasos | Pérdida: 0.0131 | Epsilon: 0.990 | Buffer: 1008
Episodio 3/100 | Recompensa: 150.00 | Duración: 283 pasos | Pérdida: 0.0133 | Epsilon: 0.985 | Buffer: 1291
Episodio 4/100 | Recompensa: 660.00 | Duración: 447 pasos | Pérdida: 0.0106 | Epsilon: 0.980 | Buffer: 1738
Episodio 5/100 | Recompensa: 730.00 | Duración: 683 pasos | Pérdida: 0.0110 | Epsilon: 0.975 | Buffer: 2421
Episodio 6/100 | Recompensa: 510.00 | Duración: 323 pasos | Pérdida: 0.0101 | Epsilon: 0.970 | Buffer: 2744
Episodio 7/100 | Recompensa: 1030.00 | Duración: 625 pasos | Pérdida: 0.0133 | Epsilon: 0.966 | Buffer: 3369
Episodio 8/100 | Recompensa: 980.00 | Duración: 841 pasos | Pérdida: 0.0118 | Epsilon: 0.961 | Buffer: 4210
Episodio 9/1

MemoryError: Unable to allocate 3.45 MiB for an array with shape (32, 4, 84, 84) and data type float32

## REENTRENAR DESDE MODELO GUARDADO

In [None]:

EMAIL = "ang23010@uvg.edu.gt"
email_prefix = EMAIL.split("@")[0]


best_model_path = f'final_model_{email_prefix}.pth'
checkpoint_final_path = f'checkpoint_final_{email_prefix}.pth'

print("=" * 60)
print("CONTINUANDO ENTRENAMIENTO")
print("=" * 60)

if os.path.exists(best_model_path):
    print(f"Modelo encontrado: {best_model_path}")

    checkpoint = torch.load(best_model_path, map_location='cpu', weights_only=False)
    print(f"  - Episodio previo: {checkpoint.get('episode', 'N/A')}")
    best_score = checkpoint.get('best_score', 'N/A')
    if isinstance(best_score, (int, float)):
        print(f"  - Mejor score: {best_score:.2f}")
    else:
        print(f"  - Mejor score: {best_score}")
    print(f"  - Epsilon: {checkpoint.get('epsilon', 'N/A'):.4f}")
    print()

    print("Continuando por 15 episodios adicionales...")
    agent = train_agent(
        episodes=50,
        email=EMAIL,
        checkpoint_path=best_model_path
    )
else:
    print(f"No se encontró el modelo: {best_model_path}")
    print("Ejecutar primero CELDA 2 para entrenar el modelo inicial")

CONTINUANDO ENTRENAMIENTO
Modelo encontrado: final_model_ang23010.pth
  - Episodio previo: 75
  - Mejor score: N/A
  - Epsilon: 0.6866

Continuando por 15 episodios adicionales...
Cargando checkpoint desde: final_model_ang23010.pth
Checkpoint cargado. Continuando desde episodio 75
Epsilon actual: 0.687

Iniciando entrenamiento por 50 episodios...
Dispositivo: cuda
Acciones disponibles: 6
Episodio 76/125 | Recompensa: 490.00 | Duración: 469 pasos | Pérdida: 0.0073 | Epsilon: 0.683 | Buffer: 469
Episodio 77/125 | Recompensa: 1260.00 | Duración: 1137 pasos | Pérdida: 0.0084 | Epsilon: 0.680 | Buffer: 1606
Episodio 78/125 | Recompensa: 670.00 | Duración: 347 pasos | Pérdida: 0.0055 | Epsilon: 0.676 | Buffer: 1953
Episodio 79/125 | Recompensa: 930.00 | Duración: 741 pasos | Pérdida: 0.0083 | Epsilon: 0.673 | Buffer: 2694
Episodio 80/125 | Recompensa: 330.00 | Duración: 321 pasos | Pérdida: 0.0075 | Epsilon: 0.670 | Buffer: 3015
Episodio 81/125 | Recompensa: 980.00 | Duración: 953 pasos | Pé

## Grabar episodio del modelo guardado

In [None]:

EMAIL = "ang23010@uvg.edu.gt"
email_prefix = EMAIL.split("@")[0]
best_model_path = f'best_model_{email_prefix}.pth'

print("=" * 60)
print("REPLICANDO MEJOR EPISODIO CON MODELO ENTRENADO")
print("=" * 60)

if os.path.exists(best_model_path):
    state_shape = (4, 84, 84)
    n_actions = 6

    agent = DQNAgent(state_shape, n_actions)
    agent.load(best_model_path)

    print(f"Cargado el mejor modelo desde: {best_model_path}")

    record_episode(agent, email=EMAIL)
    print("Video del mejor episodio grabado exitosamente")
else:
    print(f"No se encontró el modelo: {best_model_path}")

REPLICANDO MEJOR EPISODIO CON MODELO ENTRENADO
Cargado el mejor modelo desde: best_model_ang23010.pth
Iniciando grabación del episodio...
Guardando video: videos/ang23010_202510300330_990.mp4
Puntuación obtenida: 990
Total de frames: 1027
Video guardado exitosamente: videos/ang23010_202510300330_990.mp4
✓ Video del mejor episodio grabado exitosamente


In [None]:
from google.colab import drive
drive.mount('/content/drive')

  return datetime.utcnow().replace(tzinfo=utc)


Mounted at /content/drive


In [None]:
import os
import fnmatch

def find_notebook_path(drive_path, notebook_name):
    for root, _, files in os.walk(drive_path):
        for filename in fnmatch.filter(files, notebook_name):
            return os.path.join(root, filename)
    return None

notebook_name = "LAB_10_RL.ipynb"
drive_path = '/content/drive/MyDrive'
notebook_path = find_notebook_path(drive_path, notebook_name)

!jupyter nbconvert --to html "{notebook_path}"


[NbConvertApp] Converting notebook /content/drive/MyDrive/LAB_10_RL.ipynb to html
[NbConvertApp] Writing 365373 bytes to /content/drive/MyDrive/LAB_10_RL.html


In [None]:
!ls /content/drive/MyDrive/

'Colab Notebooks'			  LAB_10_RL.ipynb   Untitled0.ipynb
'Copia de Fireball Stats'$'\n''.gsheet'   new_model.ipynb
