# Show training results from a checkpoint

In [1]:
import sys
sys.path.append("..")

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from tqdm import trange

In [4]:
# all envs

from environment.qlearning.obstacle_avoidance_env import ObstacleAvoidanceEnv
from environment.qlearning.phototaxis_env import PhototaxisEnv
from environment.qlearning.exploration_env import ExplorationEnv
from utils.reader import get_yaml_path, read_file

In [5]:
from agent.qagent import QAgent
from training.qlearning import QLearning

In [6]:
import pygame
import numpy as np

## Connect to Simulator

In [7]:
server_address = "localhost:50051" # adjust the port as needed
client_name = "RLClient"
env = ExplorationEnv(server_address, client_name) # adjust the env as needed
env.connect_to_client()

2025-11-04 10:56:20,571 — INFO — ✓ Connected to localhost:50051



## Load Configuration - Original Complex Reward

In [8]:
config_path = get_yaml_path("resources", "configurations", "exploration.yml") # adjust the configuration as needed
config = read_file(config_path)

# print(config)

In [9]:
env.init(config)

2025-11-04 10:56:20,656 — INFO — ✓ Initialization successful


(True, '')

## Create Agent

In [10]:
agent = QAgent(env)
agentId = "00000000-0000-0000-0000-000000000001"
agents = { agentId: agent }

## Training Loop



In [11]:
def run_episodes(
    episode_count, 
    episode_max_steps, 
    render=False, 
    fps=60,
    checkpoint_interval=None,
    checkpoint_path="scripts/resources/generated/exploration/",
    load_checkpoint=None,
    start_episode=0
):
    import os
    
    # Load existing agent if specified
    if load_checkpoint:
        for agent_id, agent_obj in agents.items():
            agent_obj.load(load_checkpoint)
            print(f"Loaded agent from {load_checkpoint}")
    
    # Create checkpoint directory if needed
    if checkpoint_interval:
        os.makedirs(os.path.dirname(checkpoint_path) if os.path.dirname(checkpoint_path) else ".", exist_ok=True)
    
    running = True
    paused = False
    current_fps = fps
    
    # Initialize pygame for rendering
    if render:
        pygame.init()
        screen = pygame.display.set_mode((800, 600))
        pygame.display.set_caption(f"Q-Learning - FPS: {current_fps}")
        clock = pygame.time.Clock()
        
        # Font for displaying info
        try:
            font = pygame.font.Font(None, 24)
            info_font = pygame.font.Font(None, 20)
        except:
            font = None
            info_font = None
    
    try:
        for ep_idx in trange(episode_count, desc="Training", unit="ep"):
            actual_episode = start_episode + ep_idx
            obs, _ = env.reset()
            done = False
            total_reward = {agentId: 0}
            step_count = 0
            
            while not done and step_count < episode_max_steps:
                # Handle pygame events
                if render:
                    for event in pygame.event.get():
                        if event.type == pygame.QUIT:
                            running = False
                        elif event.type == pygame.KEYDOWN:
                            if event.key == pygame.K_ESCAPE or event.key == pygame.K_q:
                                running = False
                            elif event.key == pygame.K_SPACE:
                                paused = not paused
                            elif event.key == pygame.K_UP:
                                current_fps = min(240, current_fps + 10)
                                pygame.display.set_caption(f"Q-Learning - FPS: {current_fps}")
                            elif event.key == pygame.K_DOWN:
                                current_fps = max(10, current_fps - 10)
                                pygame.display.set_caption(f"Q-Learning - FPS: {current_fps}")
                            elif event.key == pygame.K_s:
                                # Manual save
                                for agent_id, agent_obj in agents.items():
                                    save_path = f"{checkpoint_path}_manual_ep{actual_episode}"
                                    agent_obj.save(save_path)
                                    print(f"\n[Manual Save] Episode {actual_episode}")
                
                if not running:
                    break
                
                # Skip step if paused
                if paused and render:
                    pygame.time.wait(100)
                    continue
                
                # Choose and execute actions
                actions = {
                    k: agents[k].choose_action(v, epsilon_greedy=not render) 
                    for k, v in obs.items()
                }
                next_obs, rewards, terminateds, truncateds, _ = env.step(actions)
                
                done = terminateds[agentId] or truncateds[agentId]
                
                # Update Q-table (only during training)
                if not render:
                    for k in next_obs.keys():
                        agents[k].update_q(obs[k], actions[k], rewards[k], next_obs[k], done)
                        total_reward[k] += rewards[k]
                else:
                    # Track reward even during rendering
                    total_reward[agentId] += rewards[agentId]
                
                obs = next_obs
                
                # Render visualization
                if render:
                    rgb_array = env.render()
                    surface = pygame.surfarray.make_surface(np.transpose(rgb_array, (1, 0, 2)))
                    screen.blit(surface, (0, 0))
                    
                    # Display info overlay
                    if font and info_font:
                        info_texts = [
                            f"Episode: {actual_episode + 1}/{start_episode + episode_count}",
                            f"Step: {step_count}/{episode_max_steps}",
                            f"Reward: {total_reward[agentId]:.2f}",
                            f"Epsilon: {agents[agentId].epsilon:.4f}",
                            f"FPS: {current_fps} (↑/↓ to adjust)",
                            f"{'PAUSED' if paused else 'SPACE: Pause'}"
                        ]
                        
                        y_offset = 10
                        for text in info_texts:
                            color = (255, 255, 0) if paused else (255, 255, 255)
                            text_surface = info_font.render(text, True, color, (0, 0, 0))
                            screen.blit(text_surface, (10, y_offset))
                            y_offset += 25
                    
                    pygame.display.flip()
                    clock.tick(current_fps)
                
                step_count += 1
            
            if not running:
                print("\nTraining interrupted by user")
                break
            
            # Decay epsilon after episode
            for agent_obj in agents.values():
                agent_obj.decay_epsilon(actual_episode)
            
            # Save checkpoint at intervals
            if checkpoint_interval and (ep_idx + 1) % checkpoint_interval == 0:
                for agent_id, agent_obj in agents.items():
                    save_path = f"{checkpoint_path}_ep{actual_episode + 1}"
                    agent_obj.save(save_path)
                    print(f"\n[Checkpoint] Saved at episode {actual_episode + 1}")
    
    finally:
        # Cleanup pygame
        if render:
            pygame.quit()
        
        # Final save if checkpointing was enabled
        if checkpoint_interval and running:
            for agent_id, agent_obj in agents.items():
                save_path = f"{checkpoint_path}_final"
                agent_obj.save(save_path)
                print(f"\n[Final Save] Training complete")

## Evaluate the Trained Agent

**Keyboard Controls (during render):**
- `↑/↓`: Adjust FPS (10-240)
- `SPACE`: Pause/Resume
- `S`: Manual checkpoint save
- `ESC/Q`: Quit training

In [14]:
# Evaluate with rendering (use ↑/↓ to adjust speed, SPACE to pause)
path = get_yaml_path("src", "scripts", "resources", "generated", "exploration", "checkpoints_ep119")
run_episodes(1, 10000, render=True, load_checkpoint=path)

2025-11-04 11:44:12,522 — INFO — Agent loaded from C:\Users\HP\Desktop\UNIBO\LaureaMagistrale\1_anno\Paradigmi di Programmazione e Sviluppo (PPS)\Esame\PPS-22-srs\python\src\scripts\resources\generated\exploration\checkpoints_ep119.npz
2025-11-04 11:44:12,523 — INFO —   Q-table shape: (200, 3)
2025-11-04 11:44:12,524 — INFO —   Current epsilon: 0.4431
2025-11-04 11:44:12,525 — INFO —   Total episodes trained: 119
Loaded agent from C:\Users\HP\Desktop\UNIBO\LaureaMagistrale\1_anno\Paradigmi di Programmazione e Sviluppo (PPS)\Esame\PPS-22-srs\python\src\scripts\resources\generated\exploration\checkpoints_ep119


Training:   0%|          | 0/1 [00:51<?, ?ep/s]


Training interrupted by user



