# Deep Q-Network Training with CUDA

## Environment Setup

In [None]:
!pip install torch gymnasium numpy matplotlib

In [None]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")

## Clone Repository

In [None]:
!git clone https://github.com/ChristianPE1/Labs-Robotica-EPCC.git
%cd Labs-Robotica-EPCC/lab-5-webots

## Import Modules

In [None]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt

import config
from dqn_agent import DQNAgent
from utils import get_device_info, compute_statistics

## Configuration

In [None]:
print("DQN Configuration:")
print(f"  Environment: {config.ENV_NAME}")
print(f"  Episodes: {config.NUM_EPISODES}")
print(f"  Learning rate: {config.LEARNING_RATE}")
print(f"  Gamma: {config.GAMMA}")
print(f"  Batch size: {config.BATCH_SIZE}")
print(f"  Memory size: {config.MEMORY_SIZE}")
print(f"  Hidden layers: {config.HIDDEN_LAYERS}")
print(f"  Device: {config.DEVICE}")

## Training

In [None]:
!python train.py

## Visualization

In [None]:
!python visualize.py

## Load and Display Results

In [None]:
from utils import load_metrics

metrics = load_metrics('metrics/training_metrics.pkl')

episode_rewards = metrics['episode_rewards']
episode_lengths = metrics['episode_lengths']

print("Training Statistics:")
print(f"  Total episodes: {len(episode_rewards)}")
print(f"  Average reward: {np.mean(episode_rewards):.2f}")
print(f"  Max reward: {np.max(episode_rewards):.2f}")
print(f"  Average episode length: {np.mean(episode_lengths):.1f}")
print(f"  Max episode length: {np.max(episode_lengths)}")

## Display Generated Plots

In [None]:
from IPython.display import Image, display

print("Reward Curve:")
display(Image('plots/reward_curve.png'))

print("\nEpisode Length:")
display(Image('plots/episode_length.png'))

print("\nLoss Curve:")
display(Image('plots/loss_curve.png'))

print("\nSuccess Rate:")
display(Image('plots/success_rate.png'))

## Test Trained Agent

In [None]:
from utils import load_checkpoint

# Create environment
env = gym.make(config.ENV_NAME, render_mode='rgb_array')
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

# Initialize agent
agent = DQNAgent(state_dim, action_dim, config, config.DEVICE)

# Load trained model
checkpoint = torch.load('checkpoints/dqn_final.pt')
agent.load_state_dict(checkpoint['agent_state'])

# Test for 10 episodes
test_rewards = []
test_lengths = []

for episode in range(10):
    state, _ = env.reset()
    episode_reward = 0
    episode_length = 0
    done = False
    truncated = False
    
    while not (done or truncated):
        action = agent.select_action(state, training=False)
        state, reward, done, truncated, _ = env.step(action)
        episode_reward += reward
        episode_length += 1
    
    test_rewards.append(episode_reward)
    test_lengths.append(episode_length)
    print(f"Test Episode {episode + 1}: Reward = {episode_reward:.2f}, Length = {episode_length}")

print(f"\nTest Statistics:")
print(f"  Average reward: {np.mean(test_rewards):.2f}")
print(f"  Average length: {np.mean(test_lengths):.1f}")

env.close()

## Download Results

In [None]:
from google.colab import files

files.download('checkpoints/dqn_final.pt')

files.download('metrics/training_metrics.pkl')

files.download('plots/reward_curve.png')
files.download('plots/episode_length.png')
files.download('plots/loss_curve.png')
files.download('plots/success_rate.png')