# Risk-Aware RL for Optical Network Provisioning
## CVaR-MaskablePPO Training with Google Drive Integration

**Key Features:**
- ✅ **Saves all results to Google Drive** (survives runtime disconnects)
- ✅ **Automatic baseline comparison** (MaskablePPO vs CVaR-MaskablePPO)
- ✅ **Real-time visualization** of training progress
- ✅ **Comprehensive evaluation** and analysis
- ✅ **Checkpoint management** for resuming training

**Training Plan:**
1. **Baseline:** MaskablePPO (no CVaR) - 500K steps
2. **CVaR Agent:** CVaR-MaskablePPO - 500K steps
3. **Comparison:** Side-by-side performance analysis

**All results saved to:** `/content/drive/MyDrive/Risk_Aware_RL/`

## 1. Mount Google Drive and Setup

In [None]:
# Mount Google Drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Create project directory in Drive
DRIVE_ROOT = '/content/drive/MyDrive/Colab Notebooks/Risk_aware_RL'
os.makedirs(DRIVE_ROOT, exist_ok=True)

print(f"✓ Google Drive mounted")
print(f"✓ Project directory: {DRIVE_ROOT}")
print(f"\nAll training results will be saved to Google Drive!")
print(f"Your files will persist even if runtime disconnects.")

Mounted at /content/drive
✓ Google Drive mounted
✓ Project directory: /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL

All training results will be saved to Google Drive!
Your files will persist even if runtime disconnects.


In [None]:
# Core imports
import sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
from datetime import datetime
import warnings
import shutil
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (15, 8)
plt.rcParams['font.size'] = 11

# Check if we're in Colab
try:
    import google.colab
    IN_COLAB = True
    print("✓ Running in Google Colab")
except:
    IN_COLAB = False
    print("⚠ Not in Colab - files will save locally")

# Add project to path
project_root = Path('/content')
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print("✓ Basic imports loaded")

✓ Running in Google Colab
✓ Basic imports loaded


## 2. Check GPU and Install Dependencies

In [None]:
# Check GPU
import torch

print("="*70)
print("GPU CHECK")
print("="*70)

if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9

    print(f"✓ GPU Available: {gpu_name}")
    print(f"✓ GPU Memory: {gpu_memory:.1f} GB")

    if 'T4' in gpu_name:
        print("\n🎯 Perfect! T4 is ideal for this project")
        print("   Expected training time: ~1.5-2 hours per agent")
    elif 'K80' in gpu_name:
        print("\n⚠️  K80 is older but will work")
        print("   Expected training time: ~3-4 hours per agent")
    elif 'P100' in gpu_name:
        print("\n✅ P100 is great!")
        print("   Expected training time: ~1-1.5 hours per agent")
    elif 'V100' in gpu_name:
        print("\n🎉 V100 is excellent!")
        print("   Expected training time: ~45-60 min per agent")
    elif 'A100' in gpu_name:
        print("\n🚀 A100 is top-tier!")
        print("   Expected training time: ~20-30 min per agent")
    elif 'L4' in gpu_name:
        print("\n🎉 L4 is excellent value!")
        print("   Expected training time: ~30-45 min per agent")

    DEVICE = 'cuda'
else:
    print("❌ No GPU available - using CPU")
    print("   Expected training time: ~8-12 hours per agent")
    print("\n💡 Enable GPU: Runtime → Change runtime type → GPU")
    DEVICE = 'cpu'

print("="*70)

GPU CHECK
✓ GPU Available: Tesla T4
✓ GPU Memory: 15.6 GB

🎯 Perfect! T4 is ideal for this project
   Expected training time: ~1.5-2 hours per agent


In [None]:
# Upload your project files
from google.colab import files
import zipfile

print("="*70)
print("UPLOAD PROJECT FILES")
print("="*70)
# print("\nPlease upload your project ZIP file containing:")
# print("  - configs/")
# print("  - envs/")
# print("  - models/")
# print("  - utils/")
# print("  - config_files/")
# print("\n" + "="*70 + "\n")

# uploaded = files.upload()

# # Extract uploaded files
# for filename in uploaded.keys():
#     if filename.endswith('.zip'):
#         print(f"\nExtracting {filename}...")
#         with zipfile.ZipFile(filename, 'r') as zip_ref:
#             zip_ref.extractall('/content')
#         print(f"✓ Extracted to /content/")

ZIP_PATH = "/content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/Risk_aware_RL_0211.zip"  # if stored in Drive

# 2) Where to extract
PROJECT_DIR = "/content/Risk_aware_RL_Provisioning"

import os, zipfile, shutil, pathlib
if os.path.exists(PROJECT_DIR):
    shutil.rmtree(PROJECT_DIR)

with zipfile.ZipFile(ZIP_PATH, "r") as z:
    z.extractall(PROJECT_DIR)

sys.path.insert(0, PROJECT_DIR)

# # Verify required directories exist
# required_dirs = ['configs', 'envs', 'models', 'utils', 'config_files']
# missing = [d for d in required_dirs if not os.path.exists(f'/content/{d}')]

# if missing:
#     print(f"\n⚠️  Missing directories: {missing}")
#     print("Please ensure your ZIP contains all required folders.")
# else:
#     print("\n✓ All required directories present!")

UPLOAD PROJECT FILES


In [None]:
# Install dependencies (if needed)
print("Installing dependencies...\n")

!pip -q install -r /content/Risk_aware_RL_Provisioning/requirements.txt

print("\n✓ Dependencies installed")

Installing dependencies...

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.0/188.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.2/93.2 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25h
✓ Dependencies installed


## 3. Import Project Modules

In [None]:
# RL imports
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, CallbackList, BaseCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

try:
    from sb3_contrib import MaskablePPO
    from sb3_contrib.common.wrappers import ActionMasker
    MASKABLE_AVAILABLE = True
    print("✓ MaskablePPO available")
except ImportError:
    MASKABLE_AVAILABLE = False
    print("⚠️  MaskablePPO not available")

# Project imports
from configs.config import get_default_config
from utils.topology import load_topology, compute_ksp, add_link_ids_to_paths
from utils.qot import get_default_modulations, SimpleQoTProvider, GSNRQoTProvider
from utils.criticality import compute_link_criticality_betweenness
from utils.traffic import TrafficGenerator
from envs.risk_aware_env import RiskAwareProvisioningEnv
from envs.state_encoder import StateEncoder, EncoderConfig
from utils.qot import slots_needed as qot_slots_needed

try:
    from models.cvar_maskable_ppo import CVaRMaskablePPO
    TRUE_CVAR_AVAILABLE = True
    print("✓ TRUE CVaR-MaskablePPO available")
except ImportError:
    TRUE_CVAR_AVAILABLE = False
    print("⚠️  TRUE CVaR-MaskablePPO not available")

print("\n✓ All modules imported successfully")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


✓ MaskablePPO available


  return datetime.utcnow().replace(tzinfo=utc)


Install with: pip install torch-geometric
✓ TRUE CVaR-MaskablePPO available

✓ All modules imported successfully


## 4. Configuration

In [None]:
# Load configuration
from configs.config import print_config, save_config
config = get_default_config()
print_config(config)

CONFIGURATION

NETWORK:
----------------------------------------------------------------------
  topology_name........................... US24
  topology_path........................... config_files/topo_us24_txOnly.xlsx
  k_paths................................. 5
  bands................................... ['C', 'L', 'S']
  slots_per_band.......................... 400
  slot_bandwidth_ghz...................... 12.500000
  guard_band_slots........................ 1
  gsnr_data_path.......................... None
  gsnr_channel_spacing_ghz................ 50.000000

TRAFFIC:
----------------------------------------------------------------------
  mean_service_holding_time............... 900.000000
  mean_service_inter_arrival_time......... 1.000000
  bit_rates............................... [100, 200, 400]
  bit_rate_probabilities.................. list

ENV:
----------------------------------------------------------------------
  episode_length.......................... 1000
  use_mask

In [None]:
from logging import DEBUG
# topology setting
topology_name = 'US24'
config['network'].topology_name = topology_name
config['network'].topology_path = f"{PROJECT_DIR}/config_files/topo_{topology_name.lower()}_txOnly.xlsx"
config['network'].gsnr_data_path = f"{PROJECT_DIR}/config_files/{topology_name.lower()}_roadm_all_pairs_ksp_gsnr.pkl"

# traffic setting
TRAFFIC_LOAD_ERLANGS = 900  # Target traffic load
config['traffic'].mean_service_holding_time = 10
# Load = (1/inter_arrival) × holding_time
# So: inter_arrival = holding_time / load
config['traffic'].mean_service_inter_arrival_time = \
  config['traffic'].mean_service_holding_time / TRAFFIC_LOAD_ERLANGS
config['traffic'].bit_rate_probabilities = [0.5, 0.3, 0.2]

# Bit-rate weighted rewards
config['env'].reward_scheme = 'bitrate_weighted'
config['env'].bitrate_normalization = min(config['traffic'].bit_rates)

# Training parameters
config['training'].total_timesteps = 500_000
config['env'].episode_length = 1000
config['training'].n_steps = 1000
config['training'].batch_size = 250
config['training'].n_epochs = 4
config['training'].device = DEVICE
config['training'].seed = 42

# CVaR parameters (for CVaR agent only)
CVAR_ALPHA = 0.1
CVAR_WEIGHT = 0.5

# Create timestamped directories in Google Drive
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# ORIGINAL
# EXPERIMENT_DIR = os.path.join(DRIVE_ROOT, f"experiment_{timestamp}")
# CHECKPOINT ROOT
EXPERIMENT_DIR = os.path.join(DRIVE_ROOT, 'experiment_20260211_061750')
BASELINE_DIR = os.path.join(EXPERIMENT_DIR, "baseline_maskable_ppo")
CVAR_DIR = os.path.join(EXPERIMENT_DIR, "cvar_maskable_ppo")
COMPARISON_DIR = os.path.join(EXPERIMENT_DIR, "comparison")

os.makedirs(BASELINE_DIR, exist_ok=True)
os.makedirs(CVAR_DIR, exist_ok=True)
os.makedirs(COMPARISON_DIR, exist_ok=True)

print("="*70)
print("EXPERIMENT CONFIGURATION")
print("="*70)
print_config(config)

save_config(config, filepath=os.path.join(EXPERIMENT_DIR, 'experiment_config.json'))

print("\n✓ Configuration saved to Drive")

EXPERIMENT CONFIGURATION
CONFIGURATION

NETWORK:
----------------------------------------------------------------------
  topology_name........................... US24
  topology_path........................... /content/Risk_aware_RL_Provisioning/config_files/topo_us24_txOnly.xlsx
  k_paths................................. 5
  bands................................... ['C', 'L', 'S']
  slots_per_band.......................... 400
  slot_bandwidth_ghz...................... 12.500000
  guard_band_slots........................ 1
  gsnr_data_path.......................... /content/Risk_aware_RL_Provisioning/config_files/us24_roadm_all_pairs_ksp_gsnr.pkl
  gsnr_channel_spacing_ghz................ 50.000000

TRAFFIC:
----------------------------------------------------------------------
  mean_service_holding_time............... 10
  mean_service_inter_arrival_time......... 0.011111
  bit_rates............................... [100, 200, 400]
  bit_rate_probabilities.................. [0.5, 0.3

## 5. Prepare Environment Data

In [None]:
print("Preparing environment data...\n")

# Load topology
print("[1/5] Loading topology...")
topology = load_topology(
    config['network'].topology_path,
    k_paths=config['network'].k_paths
)
print(f"  ✓ {topology.number_of_nodes()} nodes, {topology.number_of_edges()} edges")

# Compute K-shortest paths
print("\n[2/5] Computing K-shortest paths...")
ksp_dict = compute_ksp(
    G=topology,
    k=config['network'].k_paths,
    gsnr_data_path=config['network'].gsnr_data_path
)
add_link_ids_to_paths(topology, ksp_dict)
total_paths = sum(len(paths) for paths in ksp_dict.values())
print(f"  ✓ {total_paths} paths for {len(ksp_dict)} SD pairs")

# Setup QoT provider
print("\n[3/5] Setting up QoT provider...")
modulations = get_default_modulations()

if config['network'].gsnr_data_path and os.path.exists(config['network'].gsnr_data_path):
    print(f"  Using GSNR-based QoT")
    qot_provider = GSNRQoTProvider(
        gsnr_data_path=config['network'].gsnr_data_path,
        modulations=modulations,
        ksp_dict=ksp_dict,
        channel_spacing_ghz=config['network'].gsnr_channel_spacing_ghz,
        slot_bandwidth_ghz=config['network'].slot_bandwidth_ghz
    )
else:
    print(f"  Using simple length-based QoT")
    path_lengths = {path.path_id: path.length for paths in ksp_dict.values() for path in paths}
    qot_provider = SimpleQoTProvider(modulations, path_lengths)
print(f"  ✓ QoT provider ready")

# Compute link criticality
print("\n[4/5] Computing link criticality...")
edge_criticality = compute_link_criticality_betweenness(topology)
print(f"  ✓ Criticality range: [{edge_criticality.min():.3f}, {edge_criticality.max():.3f}]")

# Create state encoder
print("\n[5/5] Creating state encoder...")
encoder_config = EncoderConfig(
    num_nodes=topology.number_of_nodes(),
    bands=list(range(len(config['network'].bands))),
    K=config['network'].k_paths,
    H_max=topology.number_of_nodes(),
    num_mods=len(modulations),
    delta_norm_db=10.0,
    highrisk_q=config['env'].highrisk_quantile
)

def slots_needed_fn(bitrate, modulation, band):
    return qot_slots_needed(
        bitrate,
        modulation.spectral_efficiency,
        config['network'].slot_bandwidth_ghz,
        config['network'].guard_band_slots
    )

encoder = StateEncoder(
    cfg=encoder_config,
    num_links=topology.number_of_edges(),
    edge_criticality=edge_criticality,
    qot_provider=qot_provider,
    slots_needed_fn=slots_needed_fn
)
print(f"  ✓ Observation dimension: {encoder.obs_dim()}")

print("\n✓ Environment data preparation complete!")

Preparing environment data...

[1/5] Loading topology...
  Loaded topology: 24 nodes, 86 directed edges (43 physical links)
  ✓ 24 nodes, 86 edges

[2/5] Computing K-shortest paths...
Extracting K-shortest paths from GSNR data...


  return datetime.utcnow().replace(tzinfo=utc)


  ✓ Extracted 2760 paths for 552 SD pairs
  ✓ 2760 paths for 552 SD pairs

[3/5] Setting up QoT provider...
  Using GSNR-based QoT
  ✓ QoT provider ready

[4/5] Computing link criticality...
  ✓ Criticality range: [0.050, 1.000]

[5/5] Creating state encoder...
  ✓ Observation dimension: 1475

✓ Environment data preparation complete!


## 6. Create Environment Factory

In [None]:
from gymnasium.wrappers import RecordEpisodeStatistics
def make_env(seed=42, use_masking=True, log_file=None):
    """Create environment instance."""
    traffic_gen = TrafficGenerator(
        nodes=list(topology.nodes()),
        mean_holding_time=config['traffic'].mean_service_holding_time,
        mean_inter_arrival=config['traffic'].mean_service_inter_arrival_time,
        bit_rates=config['traffic'].bit_rates,
        bit_rate_probs=config['traffic'].bit_rate_probabilities,
        seed=seed
    )

    env = RiskAwareProvisioningEnv(
        topology=topology,
        ksp_dict=ksp_dict,
        qot_provider=qot_provider,
        edge_criticality=edge_criticality,
        encoder=encoder,
        bands=list(range(len(config['network'].bands))),
        slots_per_band=config['network'].slots_per_band,
        slot_bandwidth_ghz=config['network'].slot_bandwidth_ghz,
        guard_slots=config['network'].guard_band_slots,
        K=config['network'].k_paths,
        episode_length=config['env'].episode_length,
        traffic_generator=traffic_gen,
        use_action_masking=use_masking,
        reward_scheme=config['env'].reward_scheme,
        seed=seed,
        log_file=log_file
    )

    # Wrapper order: ActionMasker FIRST, then Monitor
    if use_masking and MASKABLE_AVAILABLE:
        env = ActionMasker(env, lambda e: e.unwrapped.action_masks())

    # env = Monitor(env)
    # env = RecordEpisodeStatistics(env)  # ← This handles new API correctly

    return env

print("✓ Environment factory created")

✓ Environment factory created


## 7. Custom Callback to Save to Drive

In [None]:
class DriveCheckpointCallback(BaseCallback):
    """
    Callback that saves checkpoints directly to Google Drive.
    Ensures model persistence even if runtime disconnects.
    """
    def __init__(self, save_freq, save_path, name_prefix='model', verbose=1):
        super().__init__(verbose)
        self.save_freq = save_freq
        self.save_path = save_path
        self.name_prefix = name_prefix
        os.makedirs(save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.save_freq == 0:
            model_path = os.path.join(
                self.save_path,
                f"{self.name_prefix}_{self.num_timesteps}_steps"
            )
            self.model.save(model_path)
            if self.verbose > 0:
                print(f"✓ Checkpoint saved to Drive: {model_path}")
        return True

print("✓ Drive checkpoint callback created")

✓ Drive checkpoint callback created


## 8. Train Baseline Agent (MaskablePPO - No CVaR)

In [None]:
# import time

# print("="*70)
# print("TRAINING BASELINE AGENT (MaskablePPO - No CVaR)")
# print("="*70)
# print(f"\nThis is the baseline for comparison.")
# print(f"Standard PPO with action masking, but NO CVaR optimization.\n")
# print("="*70)

# # Create environments
# baseline_log = os.path.join(BASELINE_DIR, 'service_allocation.log')
# baseline_train_env = DummyVecEnv([lambda: make_env(config['training'].seed, use_masking=True, log_file=baseline_log)])
# baseline_eval_env = DummyVecEnv([lambda: make_env(config['training'].seed + 1000, use_masking=True)])

# # PPO configuration (WITH EXPLORATION for better learning)
# ppo_kwargs = {
#     'learning_rate': config['training'].learning_rate,
#     'n_steps': config['training'].n_steps,
#     'batch_size': config['training'].batch_size,
#     'n_epochs': config['training'].n_epochs,
#     'gamma': config['training'].gamma,
#     'gae_lambda': config['training'].gae_lambda,
#     'clip_range': config['training'].clip_range,
#     'ent_coef': config['training'].ent_coef,
#     'vf_coef': config['training'].vf_coef,
#     'verbose': 1,
#     'device': DEVICE,
#     'seed': config['training'].seed,
#     'tensorboard_log': BASELINE_DIR
# }

# print("\n💡 Exploration enabled (ent_coef=0.01) for better learning!")

# # Create baseline agent
# if MASKABLE_AVAILABLE:
#     baseline_agent = MaskablePPO(
#         policy='MlpPolicy',
#         env=baseline_train_env,
#         **ppo_kwargs
#     )
#     print("✓ Created MaskablePPO baseline agent")
# else:
#     baseline_agent = PPO(
#         policy='MlpPolicy',
#         env=baseline_train_env,
#         **ppo_kwargs
#     )
#     print("✓ Created PPO baseline agent (MaskablePPO not available)")

# # Setup callbacks (save to Drive!)
# baseline_eval_callback = EvalCallback(
#     baseline_eval_env,
#     best_model_save_path=os.path.join(BASELINE_DIR, 'best_model'),
#     log_path=os.path.join(BASELINE_DIR, 'eval'),
#     eval_freq=config['training'].eval_freq,
#     n_eval_episodes=config['training'].n_eval_episodes,
#     deterministic=True,
#     verbose=1
# )

# baseline_checkpoint_callback = DriveCheckpointCallback(
#     save_freq=config['training'].save_freq,
#     save_path=os.path.join(BASELINE_DIR, 'checkpoints'),
#     name_prefix='baseline',
#     verbose=1
# )

# baseline_callbacks = CallbackList([baseline_eval_callback, baseline_checkpoint_callback])

# # Train baseline
# print(f"\nStarting baseline training...")
# print(f"Expected: Reward increasing from ~250 → ~550")
# print(f"Results will be saved to: {BASELINE_DIR}\n")

# baseline_start = time.time()

# try:
#     baseline_agent.learn(
#         total_timesteps=config['training'].total_timesteps,
#         callback=baseline_callbacks,
#         progress_bar=True
#     )
#     baseline_time = time.time() - baseline_start

#     # Save final model to Drive
#     baseline_final_path = os.path.join(BASELINE_DIR, 'final_model')
#     baseline_agent.save(baseline_final_path)

#     print("\n" + "="*70)
#     print("BASELINE TRAINING COMPLETE!")
#     print("="*70)
#     print(f"Time: {baseline_time/3600:.2f} hours")
#     print(f"Final model saved to Drive: {baseline_final_path}")
#     print("="*70)

#     # Save training info
#     # baseline_info = {
#     #     'agent_type': 'MaskablePPO (Baseline)',
#     #     'total_timesteps': TOTAL_TIMESTEPS,
#     #     'training_time_hours': baseline_time / 3600,
#     #     'device': DEVICE,
#     #     # 'exploration_enabled': True,
#     #     # 'ent_coef': 0.01
#     # }
#     # with open(os.path.join(BASELINE_DIR, 'training_info.json'), 'w') as f:
#     #     json.dump(baseline_info, f, indent=2)

# except KeyboardInterrupt:
#     baseline_time = time.time() - baseline_start
#     print("\nBaseline training interrupted. Progress saved to Drive.")

# # Close environments
# baseline_train_env.close()
# baseline_eval_env.close()

In [None]:
import time
import os
import glob

print("="*70)
print("RESUMING BASELINE AGENT TRAINING (MaskablePPO - No CVaR)")
print("="*70)

# ============================================
# STEP 1: FIND LATEST CHECKPOINT
# ============================================
checkpoint_dir = os.path.join(BASELINE_DIR, 'checkpoints')
checkpoint_pattern = os.path.join(checkpoint_dir, 'baseline_*_steps.zip')
checkpoints = glob.glob(checkpoint_pattern)

if checkpoints:
    # Sort by step number to get the latest
    def extract_steps(cp):
        try:
            # Extract number from filename like "baseline_46000_steps.zip"
            basename = os.path.basename(cp)
            steps_str = basename.split('_')[1]
            return int(steps_str)
        except:
            return 0

    checkpoints.sort(key=extract_steps)
    latest_checkpoint = checkpoints[-1]
    checkpoint_steps = extract_steps(latest_checkpoint)

    print(f"\n✅ Found checkpoint: {os.path.basename(latest_checkpoint)}")
    print(f"   Steps completed: {checkpoint_steps:,}")
    print(f"   Progress: {checkpoint_steps/config['training'].total_timesteps*100:.1f}%")

    RESUME_FROM_CHECKPOINT = True
    CHECKPOINT_PATH = latest_checkpoint
    STARTING_STEPS = checkpoint_steps

else:
    print(f"\n⚠️  No checkpoints found in {checkpoint_dir}")
    print(f"   Starting training from scratch")

    RESUME_FROM_CHECKPOINT = False
    CHECKPOINT_PATH = None
    STARTING_STEPS = 0

# Calculate remaining timesteps
REMAINING_TIMESTEPS = config['training'].total_timesteps - STARTING_STEPS

print(f"\n{'='*70}")
print(f"Training Plan:")
print(f"  Starting from:  {STARTING_STEPS:,} steps")
print(f"  Training for:   {REMAINING_TIMESTEPS:,} more steps")
print(f"  Target total:   {config['training'].total_timesteps:,} steps")
print(f"{'='*70}\n")

# ============================================
# STEP 2: CREATE ENVIRONMENTS
# ============================================
baseline_log = os.path.join(BASELINE_DIR, 'service_allocation.log')
baseline_train_env = DummyVecEnv([lambda: make_env(config['training'].seed, use_masking=True, log_file=baseline_log)])
baseline_eval_env = DummyVecEnv([lambda: make_env(config['training'].seed + 1000, use_masking=True)])

print("✓ Environments created")

# ============================================
# STEP 3: LOAD OR CREATE AGENT
# ============================================

if RESUME_FROM_CHECKPOINT:
    # Load from checkpoint
    print(f"\nLoading checkpoint: {os.path.basename(CHECKPOINT_PATH)}")

    if MASKABLE_AVAILABLE:
        baseline_agent = MaskablePPO.load(
            CHECKPOINT_PATH,
            env=baseline_train_env,
            device=DEVICE,
            # Keep the same hyperparameters
            custom_objects={
                'learning_rate': config['training'].learning_rate,
                'clip_range': config['training'].clip_range,
            }
        )
        print("✓ MaskablePPO checkpoint loaded")
    else:
        baseline_agent = PPO.load(
            CHECKPOINT_PATH,
            env=baseline_train_env,
            device=DEVICE,
            custom_objects={
                'learning_rate': config['training'].learning_rate,
                'clip_range': config['training'].clip_range,
            }
        )
        print("✓ PPO checkpoint loaded")

    # Verify the timestep count
    actual_timesteps = baseline_agent.num_timesteps
    if actual_timesteps != STARTING_STEPS:
        print(f"⚠️  Model timesteps ({actual_timesteps:,}) differ from expected ({STARTING_STEPS:,})")
        print(f"   Using model's timestep count: {actual_timesteps:,}")
        STARTING_STEPS = actual_timesteps
        REMAINING_TIMESTEPS = config['training'].total_timesteps - actual_timesteps

else:
    # Create new agent from scratch
    ppo_kwargs = {
        'learning_rate': config['training'].learning_rate,
        'n_steps': config['training'].n_steps,
        'batch_size': config['training'].batch_size,
        'n_epochs': config['training'].n_epochs,
        'gamma': config['training'].gamma,
        'gae_lambda': config['training'].gae_lambda,
        'clip_range': config['training'].clip_range,
        'ent_coef': config['training'].ent_coef,
        'vf_coef': config['training'].vf_coef,
        'verbose': 1,
        'device': DEVICE,
        'seed': config['training'].seed,
        'tensorboard_log': BASELINE_DIR
    }

    if MASKABLE_AVAILABLE:
        baseline_agent = MaskablePPO(
            policy='MlpPolicy',
            env=baseline_train_env,
            **ppo_kwargs
        )
        print("✓ Created new MaskablePPO baseline agent")
    else:
        baseline_agent = PPO(
            policy='MlpPolicy',
            env=baseline_train_env,
            **ppo_kwargs
        )
        print("✓ Created new PPO baseline agent")

# ============================================
# STEP 4: SETUP CALLBACKS
# ============================================
baseline_eval_callback = EvalCallback(
    baseline_eval_env,
    best_model_save_path=os.path.join(BASELINE_DIR, 'best_model'),
    log_path=os.path.join(BASELINE_DIR, 'eval'),
    eval_freq=config['training'].eval_freq,
    n_eval_episodes=config['training'].n_eval_episodes,
    deterministic=True,
    verbose=1
)

baseline_checkpoint_callback = DriveCheckpointCallback(
    save_freq=config['training'].save_freq,
    save_path=os.path.join(BASELINE_DIR, 'checkpoints'),
    name_prefix='baseline',
    verbose=1
)

baseline_callbacks = CallbackList([baseline_eval_callback, baseline_checkpoint_callback])

print("✓ Callbacks configured")

# ============================================
# STEP 5: TRAIN/RESUME TRAINING
# ============================================
if RESUME_FROM_CHECKPOINT:
    print(f"\n{'='*70}")
    print(f"🚀 RESUMING TRAINING FROM STEP {STARTING_STEPS:,}")
    print(f"{'='*70}")
else:
    print(f"\n{'='*70}")
    print(f"🚀 STARTING NEW TRAINING")
    print(f"{'='*70}")

print(f"Standard PPO with action masking, but NO CVaR optimization.")
print(f"Expected: Reward increasing from ~250 → ~550")
print(f"Results saving to: {BASELINE_DIR}")
print(f"\nTraining for {REMAINING_TIMESTEPS:,} timesteps...")
print(f"{'='*70}\n")

baseline_start = time.time()

try:
    baseline_agent.learn(
        total_timesteps=REMAINING_TIMESTEPS,
        callback=baseline_callbacks,
        reset_num_timesteps=False,  # ⚠️ CRITICAL: Don't reset the step counter!
        tb_log_name="PPO",           # Use same tensorboard log name
        progress_bar=True
    )
    baseline_time = time.time() - baseline_start

    # Save final model to Drive
    baseline_final_path = os.path.join(BASELINE_DIR, 'final_model')
    baseline_agent.save(baseline_final_path)

    print("\n" + "="*70)
    print("✅ BASELINE TRAINING COMPLETE!")
    print("="*70)
    print(f"Total timesteps: {baseline_agent.num_timesteps:,}")
    print(f"Session time: {baseline_time/3600:.2f} hours")
    if RESUME_FROM_CHECKPOINT:
        print(f"(Resumed from step {STARTING_STEPS:,})")
    print(f"Final model saved to: {baseline_final_path}")
    print("="*70)

    # Save training info
    baseline_info = {
        'agent_type': 'MaskablePPO (Baseline)',
        'total_timesteps': baseline_agent.num_timesteps,
        'session_training_time_hours': baseline_time / 3600,
        'resumed_from_checkpoint': RESUME_FROM_CHECKPOINT,
        'starting_steps': STARTING_STEPS if RESUME_FROM_CHECKPOINT else 0,
        'device': str(DEVICE),
    }
    with open(os.path.join(BASELINE_DIR, 'training_info.json'), 'w') as f:
        json.dump(baseline_info, f, indent=2)

    print(f"✓ Training info saved")

except KeyboardInterrupt:
    baseline_time = time.time() - baseline_start
    print(f"\n⚠️  Training interrupted at step {baseline_agent.num_timesteps:,}")
    print(f"   Session time: {baseline_time/3600:.2f} hours")
    print(f"   Latest checkpoint saved to Drive: {checkpoint_dir}")
    print(f"   You can resume from this point by re-running this cell")

except Exception as e:
    baseline_time = time.time() - baseline_start
    print(f"\n❌ Training crashed with error:")
    print(f"   {type(e).__name__}: {str(e)}")
    print(f"\n   Current step: {baseline_agent.num_timesteps:,}")
    print(f"   Session time: {baseline_time/3600:.2f} hours")
    print(f"   Latest checkpoint saved to: {checkpoint_dir}")
    print(f"\n💡 Fix the error and re-run this cell to resume training")
    raise

finally:
    # Always close environments
    baseline_train_env.close()
    baseline_eval_env.close()
    print("\n✓ Environments closed")

RESUMING BASELINE AGENT TRAINING (MaskablePPO - No CVaR)

✅ Found checkpoint: baseline_500000_steps.zip
   Steps completed: 500,000
   Progress: 100.0%

Training Plan:
  Starting from:  500,000 steps
  Training for:   0 more steps
  Target total:   500,000 steps

✓ Environments created

Loading checkpoint: baseline_500000_steps.zip
✓ MaskablePPO checkpoint loaded
✓ Callbacks configured

🚀 RESUMING TRAINING FROM STEP 500,000
Standard PPO with action masking, but NO CVaR optimization.
Expected: Reward increasing from ~250 → ~550
Results saving to: /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750/baseline_maskable_ppo

Training for 0 timesteps...

Logging to /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750/baseline_maskable_ppo/PPO_0


Output()

  return datetime.utcnow().replace(tzinfo=utc)



✅ BASELINE TRAINING COMPLETE!
Total timesteps: 500,000
Session time: 0.00 hours
(Resumed from step 500,000)
Final model saved to: /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750/baseline_maskable_ppo/final_model
✓ Training info saved

✓ Environments closed


In [None]:
# # print("\n" + "="*70)
# # print("Testing CVaR Episode Tracking")
# # print("="*70)

# print("\n" + "="*70)
# print("Testing CVaR Episode Tracking (BYPASSING MONITOR)")
# print("="*70)

# # Create environment DIRECTLY (skip make_env to bypass Monitor)
# traffic_gen = TrafficGenerator(
#     nodes=list(topology.nodes()),
#     mean_holding_time=config['traffic'].mean_service_holding_time,
#     mean_inter_arrival=config['traffic'].mean_service_inter_arrival_time,
#     bit_rates=config['traffic'].bit_rates,
#     bit_rate_probs=config['traffic'].bit_rate_probabilities,
#     seed=42
# )

# test_env = RiskAwareProvisioningEnv(
#     topology=topology,
#     ksp_dict=ksp_dict,
#     qot_provider=qot_provider,
#     edge_criticality=edge_criticality,
#     encoder=encoder,
#     bands=list(range(len(config['network'].bands))),
#     slots_per_band=config['network'].slots_per_band,
#     slot_bandwidth_ghz=config['network'].slot_bandwidth_ghz,
#     guard_slots=config['network'].guard_band_slots,
#     K=config['network'].k_paths,
#     episode_length=config['env'].episode_length,
#     traffic_generator=traffic_gen,
#     use_action_masking=True,
#     reward_scheme=config['env'].reward_scheme,
#     seed=42
# )

# # Add ActionMasker only (no Monitor!)
# if MASKABLE_AVAILABLE:
#     test_env = ActionMasker(test_env, lambda e: e.unwrapped.action_masks())


# # Run for 2 episodes
# episode_count = 0
# episode_returns = []

# obs, info = test_env.reset()
# done = False
# episode_reward = 0
# step_count = 0

# for _ in range(2500):  # Enough for 2 episodes of 100 steps each
#     action = test_env.action_space.sample()
#     obs, reward, terminated, truncated, info = test_env.step(action)

#     episode_reward += reward
#     step_count += 1

#     done = terminated or truncated

#     if done:
#         episode_count += 1
#         print(f"\n✓ Episode {episode_count} completed:")
#         print(f"  Steps: {step_count}")
#         print(f"  Return: {episode_reward:.1f}")

#         # Check if 'episode' key exists in info
#         if 'episode' in info:
#             print(f"  Info['episode']: {info['episode']}")
#             episode_returns.append(info['episode']['r'])
#         else:
#             print(f"  ⚠️ WARNING: 'episode' key missing from info!")

#         # Reset
#         obs, info = test_env.reset()
#         episode_reward = 0
#         step_count = 0

#         if episode_count >= 2:
#             break

# test_env.close()

# print(f"\n{'='*70}")
# print(f"Test Summary:")
# print(f"  Episodes completed: {episode_count}")
# print(f"  Returns collected: {len(episode_returns)}")
# if len(episode_returns) > 0:
#     print(f"  Mean return: {np.mean(episode_returns):.1f}")
#     print(f"  Std return:  {np.std(episode_returns):.1f}")
# print(f"{'='*70}\n")


Testing CVaR Episode Tracking (BYPASSING MONITOR)

✓ Episode 1 completed:
  Steps: 1000
  Return: 109.0
  Info['episode']: {'r': 108.9999999999998, 'l': 1000, 't': 11.102136780751236}

✓ Episode 2 completed:
  Steps: 1000
  Return: 115.9
  Info['episode']: {'r': 115.90000000000015, 'l': 1000, 't': 10.963980044368542}

Test Summary:
  Episodes completed: 2
  Returns collected: 2
  Mean return: 112.4
  Std return:  3.5



In [None]:
print("\n" + "="*70)
print("Testing Simplified CVaR Model")
print("="*70)

# Create test environment
test_env = make_env(seed=42, use_masking=True)

# Create CVaR agent
test_agent = CVaRMaskablePPO(
    policy='MlpPolicy',
    env=DummyVecEnv([lambda: test_env]),
    alpha=0.1,
    cvar_weight=0.5,
    verbose=1,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    device='cuda'
)

print("\n✓ CVaR agent created successfully")
print("\nRunning 5000 training steps...")

# Train for just 5000 steps
test_agent.learn(total_timesteps=5000, progress_bar=True)

print("\n✅ Test complete - no freezing!")

Output()


Testing Simplified CVaR Model
Using cuda device

✓ CVaR agent created successfully

Running 5000 training steps...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 164      |
| time/              |          |
|    fps             | 19       |
|    iterations      | 1        |
|    time_elapsed    | 105      |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 167          |
| time/                   |              |
|    fps                  | 19           |
|    iterations           | 2            |
|    time_elapsed         | 210          |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0126158055 |
|    clip_fraction        | 0.138        |
|    clip_range           | 


✅ Test complete - no freezing!


## 9. Train CVaR Agent (CVaR-MaskablePPO)

In [None]:
import time
print("="*70)
print("TRAINING CVAR AGENT (CVaR-MaskablePPO)")
print("="*70)
print(f"\nThis agent uses TRUE CVaR optimization.")
print(f"Focus on worst {CVAR_ALPHA*100:.0f}% of episodes.")
print(f"CVaR weight: {CVAR_WEIGHT} (balance between mean and worst-case)\n")
print("="*70)

# Create environments
cvar_log = os.path.join(CVAR_DIR, 'service_allocation.log')
cvar_train_env = DummyVecEnv([lambda: make_env(config['training'].seed, use_masking=True, log_file=cvar_log)])
cvar_eval_env = DummyVecEnv([lambda: make_env(config['training'].seed + 1000, use_masking=True)])

ppo_kwargs = {
        'learning_rate': config['training'].learning_rate,
        'n_steps': config['training'].n_steps,
        'batch_size': config['training'].batch_size,
        'n_epochs': config['training'].n_epochs,
        'gamma': config['training'].gamma,
        'gae_lambda': config['training'].gae_lambda,
        'clip_range': config['training'].clip_range,
        'ent_coef': config['training'].ent_coef,
        'vf_coef': config['training'].vf_coef,
        'verbose': 1,
        'device': DEVICE,
        'seed': config['training'].seed,
        'tensorboard_log': BASELINE_DIR
    }

# Create CVaR agent
if TRUE_CVAR_AVAILABLE and MASKABLE_AVAILABLE:
    cvar_agent = CVaRMaskablePPO(
        policy='MlpPolicy',
        env=cvar_train_env,
        alpha=config['training'].cvar_alpha,
        cvar_weight=config['training'].cvar_weight,
        **ppo_kwargs
    )
    cvar_agent.tensorboard_log = CVAR_DIR
    print("✓ Created TRUE CVaR-MaskablePPO agent")
else:
    print("❌ CVaR-MaskablePPO not available!")
    print("   Make sure cvar_maskable_ppo.py is in models/ directory")
    raise ImportError("CVaRMaskablePPO not available")

# Setup callbacks
cvar_eval_callback = EvalCallback(
    cvar_eval_env,
    best_model_save_path=os.path.join(CVAR_DIR, 'best_model'),
    log_path=os.path.join(CVAR_DIR, 'eval'),
    eval_freq=config['training'].eval_freq,
    n_eval_episodes=config['training'].n_eval_episodes,
    deterministic=True,
    verbose=1
)

cvar_checkpoint_callback = DriveCheckpointCallback(
    save_freq=config['training'].save_freq,
    save_path=os.path.join(CVAR_DIR, 'checkpoints'),
    name_prefix='cvar',
    verbose=1
)

cvar_callbacks = CallbackList([cvar_eval_callback, cvar_checkpoint_callback])

# Train CVaR agent
print(f"\nStarting CVaR training...")
print(f"Results will be saved to: {CVAR_DIR}\n")

cvar_start = time.time()

try:
    cvar_agent.learn(
        total_timesteps=config['training'].total_timesteps,
        callback=cvar_callbacks,
        progress_bar=True
    )
    cvar_time = time.time() - cvar_start

    # Save final model to Drive
    cvar_final_path = os.path.join(CVAR_DIR, 'final_model')
    cvar_agent.save(cvar_final_path)

    print("\n" + "="*70)
    print("CVAR TRAINING COMPLETE!")
    print("="*70)
    print(f"Time: {cvar_time/3600:.2f} hours")
    print(f"Final model saved to Drive: {cvar_final_path}")
    print("="*70)

    # # Save training info
    # cvar_info = {
    #     'agent_type': 'CVaRMaskablePPO',
    #     'total_timesteps': config['training'].total_timesteps,
    #     'training_time_hours': cvar_time / 3600,
    #     'cvar_alpha': CVAR_ALPHA,
    #     'cvar_weight': CVAR_WEIGHT,
    #     'device': DEVICE
    # }
    # with open(os.path.join(CVAR_DIR, 'training_info.json'), 'w') as f:
    #     json.dump(cvar_info, f, indent=2)

except KeyboardInterrupt:
    cvar_time = time.time() - cvar_start
    print("\nCVaR training interrupted. Progress saved to Drive.")

# Close environments
cvar_train_env.close()
cvar_eval_env.close()

TRAINING CVAR AGENT (CVaR-MaskablePPO)

This agent uses TRUE CVaR optimization.
Focus on worst 10% of episodes.
CVaR weight: 0.5 (balance between mean and worst-case)

Using cuda device
✓ Created TRUE CVaR-MaskablePPO agent

Starting CVaR training...
Results will be saved to: /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750/cvar_maskable_ppo

Logging to /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750/cvar_maskable_ppo/MaskablePPO_2


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 160      |
| time/              |          |
|    fps             | 20       |
|    iterations      | 1        |
|    time_elapsed    | 48       |
|    total_timesteps | 1000     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 162          |
| time/                   |              |
|    fps                  | 20           |
|    iterations           | 2            |
|    time_elapsed         | 97           |
|    total_timesteps      | 2000         |
| train/                  |              |
|    approx_kl            | 0.0023256452 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 160          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 86.8         |
| time/                   |              |
|    total_timesteps      | 5000         |
| train/                  |              |
|    approx_kl            | 0.0016236918 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 173          |
|    entropy_loss         | -2.48        |
|    explained_variance   | -0.0265      |
|    learning_rate        | 0.0003       |
|    loss                 | 0.992        |
|    n_updates            | 16           |
|    policy_gradient_loss | -0.00319     |
|    value_loss           | 3.09         |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 168      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 5        |
|    time_elapsed    | 698      |
|    total_timesteps | 5000     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 170          |
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 6            |
|    time_elapsed         | 745          |
|    total_timesteps      | 6000         |
| train/                  |              |
|    approx_kl            | 0.0019085408 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 168          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 157          |
| time/                   |              |
|    total_timesteps      | 10000        |
| train/                  |              |
|    approx_kl            | 0.0033517838 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 175          |
|    entropy_loss         | -2.45        |
|    explained_variance   | -0.000806    |
|    learning_rate        | 0.0003       |
|    loss                 | 0.989        |
|    n_updates            | 36           |
|    policy_gradient_loss | -0.00188     |
|    value_loss           | 2.39         |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 10       |
|    time_elapsed    | 1392     |
|    total_timesteps | 10000    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 171          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 11           |
|    time_elapsed         | 1440         |
|    total_timesteps      | 11000        |
| train/                  |              |
|    approx_kl            | 0.0039177705 |
|    clip_fraction        | 0.00125      |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 103          |
| time/                   |              |
|    total_timesteps      | 15000        |
| train/                  |              |
|    approx_kl            | 0.0011969463 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 178          |
|    entropy_loss         | -2.42        |
|    explained_variance   | -3.42e-05    |
|    learning_rate        | 0.0003       |
|    loss                 | 0.901        |
|    n_updates            | 56           |
|    policy_gradient_loss | -0.000635    |
|    value_loss           | 2.04         |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 170      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 137          |
| time/                   |              |
|    total_timesteps      | 20000        |
| train/                  |              |
|    approx_kl            | 0.0067111948 |
|    clip_fraction        | 0.0163       |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 174          |
|    entropy_loss         | -2.41        |
|    explained_variance   | 4.77e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 1.08         |
|    n_updates            | 76           |
|    policy_gradient_loss | -0.00698     |
|    value_loss           | 2.01         |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 169      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 20       |
|    time_elapsed    | 2790     |
|    total_timesteps | 20000    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 169         |
| time/                   |             |
|    fps                  | 7           |
|    iterations           | 21          |
|    time_elapsed         | 2838        |
|    total_timesteps      | 21000       |
| train/                  |             |
|    approx_kl            | 0.011528218 |
|    clip_fraction        | 0.0925      |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 163         |
|    entropy_loss         | -2

----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 1e+03      |
|    mean_reward          | 133        |
| time/                   |            |
|    total_timesteps      | 25000      |
| train/                  |            |
|    approx_kl            | 0.00392885 |
|    clip_fraction        | 0.0085     |
|    clip_range           | 0.2        |
|    cvar_tail_segments   | 1          |
|    cvar_var_threshold   | 177        |
|    entropy_loss         | -2.38      |
|    explained_variance   | 6.2e-06    |
|    learning_rate        | 0.0003     |
|    loss                 | 0.857      |
|    n_updates            | 96         |
|    policy_gradient_loss | -0.0023    |
|    value_loss           | 1.87       |
----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 170      |
| time/              |          |
|    fps  

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 11.4        |
| time/                   |             |
|    total_timesteps      | 30000       |
| train/                  |             |
|    approx_kl            | 0.017343253 |
|    clip_fraction        | 0.124       |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 176         |
|    entropy_loss         | -2.38       |
|    explained_variance   | 8.52e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.29        |
|    n_updates            | 116         |
|    policy_gradient_loss | -0.00888    |
|    value_loss           | 2.3         |
-----------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 30       |
|    time_elapsed    | 4195     |
|    total_timesteps | 30000    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 171         |
| time/                   |             |
|    fps                  | 7           |
|    iterations           | 31          |
|    time_elapsed         | 4243        |
|    total_timesteps      | 31000       |
| train/                  |             |
|    approx_kl            | 0.011378443 |
|    clip_fraction        | 0.0468      |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 171         |
|    entropy_loss         | -2

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 7.66        |
| time/                   |             |
|    total_timesteps      | 35000       |
| train/                  |             |
|    approx_kl            | 0.004060293 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 175         |
|    entropy_loss         | -2.33       |
|    explained_variance   | 0.0424      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.592       |
|    n_updates            | 136         |
|    policy_gradient_loss | -0.00438    |
|    value_loss           | 2.14        |
-----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 13.2          |
| time/                   |               |
|    total_timesteps      | 40000         |
| train/                  |               |
|    approx_kl            | 0.00012865967 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 182           |
|    entropy_loss         | -2.31         |
|    explained_variance   | 0.441         |
|    learning_rate        | 0.0003        |
|    loss                 | 1.42          |
|    n_updates            | 156           |
|    policy_gradient_loss | -0.000316     |
|    value_loss           | 1.79          |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 40       |
|    time_elapsed    | 5610     |
|    total_timesteps | 40000    |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 171           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 41            |
|    time_elapsed         | 5657          |
|    total_timesteps      | 41000         |
| train/                  |               |
|    approx_kl            | 0.00034657685 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 175           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 10.5          |
| time/                   |               |
|    total_timesteps      | 45000         |
| train/                  |               |
|    approx_kl            | 0.00017242177 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 164           |
|    entropy_loss         | -2.3          |
|    explained_variance   | 0.585         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.379         |
|    n_updates            | 176           |
|    policy_gradient_loss | -9.21e-06     |
|    value_loss           | 1.57          |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 10.7         |
| time/                   |              |
|    total_timesteps      | 50000        |
| train/                  |              |
|    approx_kl            | 0.0006297531 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 173          |
|    entropy_loss         | -2.3         |
|    explained_variance   | 0.777        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.421        |
|    n_updates            | 196          |
|    policy_gradient_loss | -0.001       |
|    value_loss           | 1.34         |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 50       |
|    time_elapsed    | 7014     |
|    total_timesteps | 50000    |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 171           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 51            |
|    time_elapsed         | 7062          |
|    total_timesteps      | 51000         |
| train/                  |               |
|    approx_kl            | 0.00051648804 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 166           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 8.52          |
| time/                   |               |
|    total_timesteps      | 55000         |
| train/                  |               |
|    approx_kl            | 0.00037811685 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 181           |
|    entropy_loss         | -2.3          |
|    explained_variance   | 0.85          |
|    learning_rate        | 0.0003        |
|    loss                 | 0.408         |
|    n_updates            | 216           |
|    policy_gradient_loss | -0.00134      |
|    value_loss           | 1.07          |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 8.18          |
| time/                   |               |
|    total_timesteps      | 60000         |
| train/                  |               |
|    approx_kl            | 0.00021618849 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 173           |
|    entropy_loss         | -2.3          |
|    explained_variance   | 0.846         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.493         |
|    n_updates            | 236           |
|    policy_gradient_loss | -0.000942     |
|    value_loss           | 0.944         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 60       |
|    time_elapsed    | 8426     |
|    total_timesteps | 60000    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 171          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 61           |
|    time_elapsed         | 8474         |
|    total_timesteps      | 61000        |
| train/                  |              |
|    approx_kl            | 6.557638e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 178          |
|    entropy_lo

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 13.2          |
| time/                   |               |
|    total_timesteps      | 65000         |
| train/                  |               |
|    approx_kl            | 6.0561422e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 173           |
|    entropy_loss         | -2.32         |
|    explained_variance   | 0.864         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.463         |
|    n_updates            | 256           |
|    policy_gradient_loss | -0.000254     |
|    value_loss           | 0.884         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 4.93          |
| time/                   |               |
|    total_timesteps      | 70000         |
| train/                  |               |
|    approx_kl            | 0.00014672555 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 178           |
|    entropy_loss         | -2.32         |
|    explained_variance   | 0.961         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.405         |
|    n_updates            | 276           |
|    policy_gradient_loss | -0.000239     |
|    value_loss           | 0.575         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 70       |
|    time_elapsed    | 9837     |
|    total_timesteps | 70000    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 172          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 71           |
|    time_elapsed         | 9884         |
|    total_timesteps      | 71000        |
| train/                  |              |
|    approx_kl            | 4.150301e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_lo

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 8.67          |
| time/                   |               |
|    total_timesteps      | 75000         |
| train/                  |               |
|    approx_kl            | 0.00014840245 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 175           |
|    entropy_loss         | -2.31         |
|    explained_variance   | 0.96          |
|    learning_rate        | 0.0003        |
|    loss                 | 0.253         |
|    n_updates            | 296           |
|    policy_gradient_loss | -0.000305     |
|    value_loss           | 0.562         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 11.7          |
| time/                   |               |
|    total_timesteps      | 80000         |
| train/                  |               |
|    approx_kl            | 0.00017811623 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 159           |
|    entropy_loss         | -2.31         |
|    explained_variance   | 0.975         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.125         |
|    n_updates            | 316           |
|    policy_gradient_loss | -0.000449     |
|    value_loss           | 0.384         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 80       |
|    time_elapsed    | 11238    |
|    total_timesteps | 80000    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 171          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 81           |
|    time_elapsed         | 11285        |
|    total_timesteps      | 81000        |
| train/                  |              |
|    approx_kl            | 8.981372e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_lo

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 8.99          |
| time/                   |               |
|    total_timesteps      | 85000         |
| train/                  |               |
|    approx_kl            | 2.7698756e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 157           |
|    entropy_loss         | -2.32         |
|    explained_variance   | 0.956         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.237         |
|    n_updates            | 336           |
|    policy_gradient_loss | -0.000157     |
|    value_loss           | 0.571         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 6.95         |
| time/                   |              |
|    total_timesteps      | 90000        |
| train/                  |              |
|    approx_kl            | 4.295969e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 170          |
|    entropy_loss         | -2.33        |
|    explained_variance   | 0.98         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.109        |
|    n_updates            | 356          |
|    policy_gradient_loss | -0.000442    |
|    value_loss           | 0.37         |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 90       |
|    time_elapsed    | 12636    |
|    total_timesteps | 90000    |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 171           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 91            |
|    time_elapsed         | 12682         |
|    total_timesteps      | 91000         |
| train/                  |               |
|    approx_kl            | 2.7439477e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 170           |


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 13.9        |
| time/                   |             |
|    total_timesteps      | 95000       |
| train/                  |             |
|    approx_kl            | 6.12948e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 171         |
|    entropy_loss         | -2.33       |
|    explained_variance   | 0.938       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.333       |
|    n_updates            | 376         |
|    policy_gradient_loss | -0.00023    |
|    value_loss           | 0.685       |
-----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 13.3         |
| time/                   |              |
|    total_timesteps      | 100000       |
| train/                  |              |
|    approx_kl            | 0.0001038403 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 160          |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.962        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.282        |
|    n_updates            | 396          |
|    policy_gradient_loss | -0.000897    |
|    value_loss           | 0.561        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 100      |
|    time_elapsed    | 14032    |
|    total_timesteps | 100000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 171          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 101          |
|    time_elapsed         | 14079        |
|    total_timesteps      | 101000       |
| train/                  |              |
|    approx_kl            | 9.626479e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 9.89         |
| time/                   |              |
|    total_timesteps      | 105000       |
| train/                  |              |
|    approx_kl            | 4.218507e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 178          |
|    entropy_loss         | -2.33        |
|    explained_variance   | 0.962        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.213        |
|    n_updates            | 416          |
|    policy_gradient_loss | -0.000308    |
|    value_loss           | 0.523        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 10.3          |
| time/                   |               |
|    total_timesteps      | 110000        |
| train/                  |               |
|    approx_kl            | 2.0922185e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 166           |
|    entropy_loss         | -2.31         |
|    explained_variance   | 0.979         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.226         |
|    n_updates            | 436           |
|    policy_gradient_loss | -0.000103     |
|    value_loss           | 0.412         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 110      |
|    time_elapsed    | 15442    |
|    total_timesteps | 110000   |
---------------------------------
--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1e+03          |
|    ep_rew_mean          | 171            |
| time/                   |                |
|    fps                  | 7              |
|    iterations           | 111            |
|    time_elapsed         | 15490          |
|    total_timesteps      | 111000         |
| train/                  |                |
|    approx_kl            | 1.27455005e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    cvar_tail_segments   | 1              |
|    cvar_var_threshold   | 17

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 8.02          |
| time/                   |               |
|    total_timesteps      | 115000        |
| train/                  |               |
|    approx_kl            | 1.9045594e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 176           |
|    entropy_loss         | -2.32         |
|    explained_variance   | 0.851         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.679         |
|    n_updates            | 456           |
|    policy_gradient_loss | -0.000138     |
|    value_loss           | 0.974         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 10.3          |
| time/                   |               |
|    total_timesteps      | 120000        |
| train/                  |               |
|    approx_kl            | 6.6648965e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 173           |
|    entropy_loss         | -2.33         |
|    explained_variance   | 0.978         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.208         |
|    n_updates            | 476           |
|    policy_gradient_loss | -0.000358     |
|    value_loss           | 0.358         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 120      |
|    time_elapsed    | 16840    |
|    total_timesteps | 120000   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 172         |
| time/                   |             |
|    fps                  | 7           |
|    iterations           | 121         |
|    time_elapsed         | 16887       |
|    total_timesteps      | 121000      |
| train/                  |             |
|    approx_kl            | 8.19012e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 172         |
|    entropy_loss         | -2

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 10            |
| time/                   |               |
|    total_timesteps      | 125000        |
| train/                  |               |
|    approx_kl            | 3.8092316e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 178           |
|    entropy_loss         | -2.31         |
|    explained_variance   | 0.977         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.39          |
|    n_updates            | 496           |
|    policy_gradient_loss | 3.77e-05      |
|    value_loss           | 0.543         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 10.9          |
| time/                   |               |
|    total_timesteps      | 130000        |
| train/                  |               |
|    approx_kl            | 0.00015820969 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 177           |
|    entropy_loss         | -2.32         |
|    explained_variance   | 0.981         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.204         |
|    n_updates            | 516           |
|    policy_gradient_loss | -0.000303     |
|    value_loss           | 0.363         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 130      |
|    time_elapsed    | 18246    |
|    total_timesteps | 130000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 172           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 131           |
|    time_elapsed         | 18293         |
|    total_timesteps      | 131000        |
| train/                  |               |
|    approx_kl            | 0.00013169379 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 169           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 10.5         |
| time/                   |              |
|    total_timesteps      | 135000       |
| train/                  |              |
|    approx_kl            | 0.0002945176 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.974        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.203        |
|    n_updates            | 536          |
|    policy_gradient_loss | -0.0005      |
|    value_loss           | 0.433        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 9.05         |
| time/                   |              |
|    total_timesteps      | 140000       |
| train/                  |              |
|    approx_kl            | 6.162912e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 168          |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.981        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.154        |
|    n_updates            | 556          |
|    policy_gradient_loss | -0.000311    |
|    value_loss           | 0.348        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 140      |
|    time_elapsed    | 19637    |
|    total_timesteps | 140000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 172          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 141          |
|    time_elapsed         | 19684        |
|    total_timesteps      | 141000       |
| train/                  |              |
|    approx_kl            | 0.0001040414 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 171          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 11           |
| time/                   |              |
|    total_timesteps      | 145000       |
| train/                  |              |
|    approx_kl            | 0.0002167743 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_loss         | -2.32        |
|    explained_variance   | 0.984        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.144        |
|    n_updates            | 576          |
|    policy_gradient_loss | -0.000804    |
|    value_loss           | 0.444        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 12.8          |
| time/                   |               |
|    total_timesteps      | 150000        |
| train/                  |               |
|    approx_kl            | 0.00013133985 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 184           |
|    entropy_loss         | -2.3          |
|    explained_variance   | 0.975         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.136         |
|    n_updates            | 596           |
|    policy_gradient_loss | -0.000656     |
|    value_loss           | 0.394         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 150      |
|    time_elapsed    | 21027    |
|    total_timesteps | 150000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 172          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 151          |
|    time_elapsed         | 21074        |
|    total_timesteps      | 151000       |
| train/                  |              |
|    approx_kl            | 7.329107e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 177          |
|    entropy_lo

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 6.19          |
| time/                   |               |
|    total_timesteps      | 155000        |
| train/                  |               |
|    approx_kl            | 0.00031260666 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 170           |
|    entropy_loss         | -2.22         |
|    explained_variance   | 0.981         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.18          |
|    n_updates            | 616           |
|    policy_gradient_loss | -0.00103      |
|    value_loss           | 0.49          |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 131           |
| time/                   |               |
|    total_timesteps      | 160000        |
| train/                  |               |
|    approx_kl            | 0.00010312027 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 173           |
|    entropy_loss         | -2.22         |
|    explained_variance   | 0.989         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.102         |
|    n_updates            | 636           |
|    policy_gradient_loss | -0.000648     |
|    value_loss           | 0.318         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 160      |
|    time_elapsed    | 22421    |
|    total_timesteps | 160000   |
---------------------------------
--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 1e+03          |
|    ep_rew_mean          | 172            |
| time/                   |                |
|    fps                  | 7              |
|    iterations           | 161            |
|    time_elapsed         | 22469          |
|    total_timesteps      | 161000         |
| train/                  |                |
|    approx_kl            | 0.000104509716 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    cvar_tail_segments   | 1              |
|    cvar_var_threshold   | 17

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 11.1          |
| time/                   |               |
|    total_timesteps      | 165000        |
| train/                  |               |
|    approx_kl            | 0.00037925955 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 164           |
|    entropy_loss         | -2.21         |
|    explained_variance   | 0.976         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.246         |
|    n_updates            | 656           |
|    policy_gradient_loss | -0.000945     |
|    value_loss           | 0.406         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 9.22         |
| time/                   |              |
|    total_timesteps      | 170000       |
| train/                  |              |
|    approx_kl            | 0.0007376314 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 169          |
|    entropy_loss         | -2.2         |
|    explained_variance   | 0.974        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.13         |
|    n_updates            | 676          |
|    policy_gradient_loss | -0.000935    |
|    value_loss           | 0.397        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 170      |
|    time_elapsed    | 23842    |
|    total_timesteps | 170000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 172           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 171           |
|    time_elapsed         | 23890         |
|    total_timesteps      | 171000        |
| train/                  |               |
|    approx_kl            | 0.00028596105 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 170           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 9.14         |
| time/                   |              |
|    total_timesteps      | 175000       |
| train/                  |              |
|    approx_kl            | 0.0009444192 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 166          |
|    entropy_loss         | -2.14        |
|    explained_variance   | 0.982        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0888       |
|    n_updates            | 696          |
|    policy_gradient_loss | -0.00153     |
|    value_loss           | 0.292        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 11.1          |
| time/                   |               |
|    total_timesteps      | 180000        |
| train/                  |               |
|    approx_kl            | 0.00020137132 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 176           |
|    entropy_loss         | -2.1          |
|    explained_variance   | 0.972         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.228         |
|    n_updates            | 716           |
|    policy_gradient_loss | 0.000149      |
|    value_loss           | 0.461         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 173      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 180      |
|    time_elapsed    | 25254    |
|    total_timesteps | 180000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 173          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 181          |
|    time_elapsed         | 25301        |
|    total_timesteps      | 181000       |
| train/                  |              |
|    approx_kl            | 0.0002790276 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 174          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 12           |
| time/                   |              |
|    total_timesteps      | 185000       |
| train/                  |              |
|    approx_kl            | 0.0005431593 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_loss         | -2.07        |
|    explained_variance   | 0.96         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.382        |
|    n_updates            | 736          |
|    policy_gradient_loss | -0.00195     |
|    value_loss           | 0.575        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 8.92          |
| time/                   |               |
|    total_timesteps      | 190000        |
| train/                  |               |
|    approx_kl            | 0.00035174406 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 176           |
|    entropy_loss         | -2.03         |
|    explained_variance   | 0.964         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.166         |
|    n_updates            | 756           |
|    policy_gradient_loss | -0.00101      |
|    value_loss           | 0.411         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 173      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 190      |
|    time_elapsed    | 26669    |
|    total_timesteps | 190000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 173           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 191           |
|    time_elapsed         | 26717         |
|    total_timesteps      | 191000        |
| train/                  |               |
|    approx_kl            | 0.00024254125 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 176           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 9.88          |
| time/                   |               |
|    total_timesteps      | 195000        |
| train/                  |               |
|    approx_kl            | 0.00016201449 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 182           |
|    entropy_loss         | -2.07         |
|    explained_variance   | 0.986         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.116         |
|    n_updates            | 776           |
|    policy_gradient_loss | 0.000129      |
|    value_loss           | 0.293         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 12.8          |
| time/                   |               |
|    total_timesteps      | 200000        |
| train/                  |               |
|    approx_kl            | 0.00048497465 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 178           |
|    entropy_loss         | -2.09         |
|    explained_variance   | 0.977         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.176         |
|    n_updates            | 796           |
|    policy_gradient_loss | -0.000946     |
|    value_loss           | 0.334         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 173      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 200      |
|    time_elapsed    | 28069    |
|    total_timesteps | 200000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 173           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 201           |
|    time_elapsed         | 28116         |
|    total_timesteps      | 201000        |
| train/                  |               |
|    approx_kl            | 0.00035909307 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 183           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 9.63         |
| time/                   |              |
|    total_timesteps      | 205000       |
| train/                  |              |
|    approx_kl            | 7.929111e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 179          |
|    entropy_loss         | -2           |
|    explained_variance   | 0.873        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.609        |
|    n_updates            | 816          |
|    policy_gradient_loss | -0.000532    |
|    value_loss           | 0.888        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 174      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 10.6         |
| time/                   |              |
|    total_timesteps      | 210000       |
| train/                  |              |
|    approx_kl            | 0.0002875131 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 179          |
|    entropy_loss         | -2.01        |
|    explained_variance   | 0.981        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.125        |
|    n_updates            | 836          |
|    policy_gradient_loss | -8.74e-05    |
|    value_loss           | 0.379        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 174      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 210      |
|    time_elapsed    | 29470    |
|    total_timesteps | 210000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 174           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 211           |
|    time_elapsed         | 29516         |
|    total_timesteps      | 211000        |
| train/                  |               |
|    approx_kl            | 0.00041787192 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 182           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 130           |
| time/                   |               |
|    total_timesteps      | 215000        |
| train/                  |               |
|    approx_kl            | 0.00069168495 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 182           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.978         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.131         |
|    n_updates            | 856           |
|    policy_gradient_loss | -0.00201      |
|    value_loss           | 0.32          |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 130          |
| time/                   |              |
|    total_timesteps      | 220000       |
| train/                  |              |
|    approx_kl            | 0.0010177991 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 173          |
|    entropy_loss         | -1.92        |
|    explained_variance   | 0.99         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.113        |
|    n_updates            | 876          |
|    policy_gradient_loss | -0.00082     |
|    value_loss           | 0.243        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 220      |
|    time_elapsed    | 30854    |
|    total_timesteps | 220000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 175           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 221           |
|    time_elapsed         | 30902         |
|    total_timesteps      | 221000        |
| train/                  |               |
|    approx_kl            | 0.00023885795 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 178           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 130          |
| time/                   |              |
|    total_timesteps      | 225000       |
| train/                  |              |
|    approx_kl            | 0.0025181216 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 175          |
|    entropy_loss         | -1.77        |
|    explained_variance   | 0.983        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.106        |
|    n_updates            | 896          |
|    policy_gradient_loss | -0.00285     |
|    value_loss           | 0.338        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 134          |
| time/                   |              |
|    total_timesteps      | 230000       |
| train/                  |              |
|    approx_kl            | 0.0013127036 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 170          |
|    entropy_loss         | -1.71        |
|    explained_variance   | 0.986        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.124        |
|    n_updates            | 916          |
|    policy_gradient_loss | -0.00123     |
|    value_loss           | 0.262        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 230      |
|    time_elapsed    | 32238    |
|    total_timesteps | 230000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 175           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 231           |
|    time_elapsed         | 32285         |
|    total_timesteps      | 231000        |
| train/                  |               |
|    approx_kl            | 0.00093785167 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 179           |


-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 129         |
| time/                   |             |
|    total_timesteps      | 235000      |
| train/                  |             |
|    approx_kl            | 0.001483263 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 170         |
|    entropy_loss         | -1.8        |
|    explained_variance   | 0.987       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0797      |
|    n_updates            | 936         |
|    policy_gradient_loss | -0.00107    |
|    value_loss           | 0.287       |
-----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 133          |
| time/                   |              |
|    total_timesteps      | 240000       |
| train/                  |              |
|    approx_kl            | 0.0004947222 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 171          |
|    entropy_loss         | -1.67        |
|    explained_variance   | 0.977        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.193        |
|    n_updates            | 956          |
|    policy_gradient_loss | -0.0013      |
|    value_loss           | 0.315        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 240      |
|    time_elapsed    | 33630    |
|    total_timesteps | 240000   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 175        |
| time/                   |            |
|    fps                  | 7          |
|    iterations           | 241        |
|    time_elapsed         | 33678      |
|    total_timesteps      | 241000     |
| train/                  |            |
|    approx_kl            | 0.00493258 |
|    clip_fraction        | 0.0285     |
|    clip_range           | 0.2        |
|    cvar_tail_segments   | 1          |
|    cvar_var_threshold   | 170        |
|    entropy_loss         | -1.65      |
|   

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 136           |
| time/                   |               |
|    total_timesteps      | 245000        |
| train/                  |               |
|    approx_kl            | 0.00075958256 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 181           |
|    entropy_loss         | -1.59         |
|    explained_variance   | 0.988         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.139         |
|    n_updates            | 976           |
|    policy_gradient_loss | 0.000337      |
|    value_loss           | 0.301         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 131           |
| time/                   |               |
|    total_timesteps      | 250000        |
| train/                  |               |
|    approx_kl            | 0.00016894493 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 176           |
|    entropy_loss         | -1.51         |
|    explained_variance   | 0.98          |
|    learning_rate        | 0.0003        |
|    loss                 | 0.154         |
|    n_updates            | 996           |
|    policy_gradient_loss | -0.000462     |
|    value_loss           | 0.452         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 250      |
|    time_elapsed    | 35021    |
|    total_timesteps | 250000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 175          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 251          |
|    time_elapsed         | 35068        |
|    total_timesteps      | 251000       |
| train/                  |              |
|    approx_kl            | 7.623488e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 170          |
|    entropy_lo

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 134           |
| time/                   |               |
|    total_timesteps      | 255000        |
| train/                  |               |
|    approx_kl            | 0.00071955053 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 174           |
|    entropy_loss         | -1.53         |
|    explained_variance   | 0.963         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.355         |
|    n_updates            | 1016          |
|    policy_gradient_loss | -0.0011       |
|    value_loss           | 0.453         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 136          |
| time/                   |              |
|    total_timesteps      | 260000       |
| train/                  |              |
|    approx_kl            | 0.0009781454 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 175          |
|    entropy_loss         | -1.62        |
|    explained_variance   | 0.972        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.163        |
|    n_updates            | 1036         |
|    policy_gradient_loss | -0.00153     |
|    value_loss           | 0.429        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 260      |
|    time_elapsed    | 36404    |
|    total_timesteps | 260000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 175           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 261           |
|    time_elapsed         | 36452         |
|    total_timesteps      | 261000        |
| train/                  |               |
|    approx_kl            | 0.00092554523 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 171           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 133          |
| time/                   |              |
|    total_timesteps      | 265000       |
| train/                  |              |
|    approx_kl            | 0.0009876452 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 180          |
|    entropy_loss         | -1.59        |
|    explained_variance   | 0.981        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.123        |
|    n_updates            | 1056         |
|    policy_gradient_loss | -0.00147     |
|    value_loss           | 0.422        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 132          |
| time/                   |              |
|    total_timesteps      | 270000       |
| train/                  |              |
|    approx_kl            | 0.0013331939 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 164          |
|    entropy_loss         | -1.52        |
|    explained_variance   | 0.972        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.154        |
|    n_updates            | 1076         |
|    policy_gradient_loss | -0.00174     |
|    value_loss           | 0.445        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 270      |
|    time_elapsed    | 37792    |
|    total_timesteps | 270000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 175           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 271           |
|    time_elapsed         | 37840         |
|    total_timesteps      | 271000        |
| train/                  |               |
|    approx_kl            | 0.00047926657 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 174           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 136          |
| time/                   |              |
|    total_timesteps      | 275000       |
| train/                  |              |
|    approx_kl            | 0.0006104554 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_loss         | -1.39        |
|    explained_variance   | 0.903        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.355        |
|    n_updates            | 1096         |
|    policy_gradient_loss | -0.000571    |
|    value_loss           | 0.681        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 131          |
| time/                   |              |
|    total_timesteps      | 280000       |
| train/                  |              |
|    approx_kl            | 0.0043998715 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 167          |
|    entropy_loss         | -1.33        |
|    explained_variance   | 0.977        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.321        |
|    n_updates            | 1116         |
|    policy_gradient_loss | -0.00172     |
|    value_loss           | 0.447        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 175      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 280      |
|    time_elapsed    | 39178    |
|    total_timesteps | 280000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 175          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 281          |
|    time_elapsed         | 39226        |
|    total_timesteps      | 281000       |
| train/                  |              |
|    approx_kl            | 0.0014934744 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 174          |
|    entropy_lo

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 133           |
| time/                   |               |
|    total_timesteps      | 285000        |
| train/                  |               |
|    approx_kl            | 0.00087628665 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 158           |
|    entropy_loss         | -1.03         |
|    explained_variance   | 0.891         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.438         |
|    n_updates            | 1136          |
|    policy_gradient_loss | -0.000197     |
|    value_loss           | 0.942         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 133           |
| time/                   |               |
|    total_timesteps      | 290000        |
| train/                  |               |
|    approx_kl            | 0.00043821658 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 152           |
|    entropy_loss         | -1.14         |
|    explained_variance   | 0.969         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.193         |
|    n_updates            | 1156          |
|    policy_gradient_loss | -0.000771     |
|    value_loss           | 0.445         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 174      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 290      |
|    time_elapsed    | 40569    |
|    total_timesteps | 290000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 174          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 291          |
|    time_elapsed         | 40616        |
|    total_timesteps      | 291000       |
| train/                  |              |
|    approx_kl            | 9.337545e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 160          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 135          |
| time/                   |              |
|    total_timesteps      | 295000       |
| train/                  |              |
|    approx_kl            | 0.0007279189 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 165          |
|    entropy_loss         | -1.17        |
|    explained_variance   | 0.983        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.106        |
|    n_updates            | 1176         |
|    policy_gradient_loss | -0.000438    |
|    value_loss           | 0.237        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 173      |
| ti

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 135         |
| time/                   |             |
|    total_timesteps      | 300000      |
| train/                  |             |
|    approx_kl            | 0.004125924 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 170         |
|    entropy_loss         | -1.18       |
|    explained_variance   | 0.982       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.18        |
|    n_updates            | 1196        |
|    policy_gradient_loss | -0.00374    |
|    value_loss           | 0.543       |
-----------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 173      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 300      |
|    time_elapsed    | 41960    |
|    total_timesteps | 300000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 173          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 301          |
|    time_elapsed         | 42008        |
|    total_timesteps      | 301000       |
| train/                  |              |
|    approx_kl            | 0.0072898925 |
|    clip_fraction        | 0.031        |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 161          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 133          |
| time/                   |              |
|    total_timesteps      | 305000       |
| train/                  |              |
|    approx_kl            | 0.0018624875 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 161          |
|    entropy_loss         | -1.05        |
|    explained_variance   | 0.951        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.298        |
|    n_updates            | 1216         |
|    policy_gradient_loss | -0.00134     |
|    value_loss           | 0.662        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| ti

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 136         |
| time/                   |             |
|    total_timesteps      | 310000      |
| train/                  |             |
|    approx_kl            | 0.002162012 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 174         |
|    entropy_loss         | -1.27       |
|    explained_variance   | 0.97        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.172       |
|    n_updates            | 1236        |
|    policy_gradient_loss | 0.000293    |
|    value_loss           | 0.553       |
-----------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 172      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 310      |
|    time_elapsed    | 43363    |
|    total_timesteps | 310000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 172          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 311          |
|    time_elapsed         | 43411        |
|    total_timesteps      | 311000       |
| train/                  |              |
|    approx_kl            | 0.0003124609 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 175          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 130          |
| time/                   |              |
|    total_timesteps      | 315000       |
| train/                  |              |
|    approx_kl            | 0.0022332903 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 170          |
|    entropy_loss         | -1.1         |
|    explained_variance   | 0.974        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.277        |
|    n_updates            | 1256         |
|    policy_gradient_loss | -0.00118     |
|    value_loss           | 0.409        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 171      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 135          |
| time/                   |              |
|    total_timesteps      | 320000       |
| train/                  |              |
|    approx_kl            | 0.0007313251 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 159          |
|    entropy_loss         | -0.904       |
|    explained_variance   | 0.94         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.26         |
|    n_updates            | 1276         |
|    policy_gradient_loss | -0.00116     |
|    value_loss           | 0.544        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 170      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 320      |
|    time_elapsed    | 44763    |
|    total_timesteps | 320000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 170           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 321           |
|    time_elapsed         | 44811         |
|    total_timesteps      | 321000        |
| train/                  |               |
|    approx_kl            | 0.00017164211 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 157           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 137           |
| time/                   |               |
|    total_timesteps      | 325000        |
| train/                  |               |
|    approx_kl            | 0.00023479422 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 146           |
|    entropy_loss         | -0.797        |
|    explained_variance   | 0.973         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.11          |
|    n_updates            | 1296          |
|    policy_gradient_loss | -0.000491     |
|    value_loss           | 0.62          |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 132           |
| time/                   |               |
|    total_timesteps      | 330000        |
| train/                  |               |
|    approx_kl            | 4.7659087e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 141           |
|    entropy_loss         | -0.559        |
|    explained_variance   | 0.972         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.231         |
|    n_updates            | 1316          |
|    policy_gradient_loss | -4.22e-05     |
|    value_loss           | 0.52          |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 167      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 330      |
|    time_elapsed    | 46157    |
|    total_timesteps | 330000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 167           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 331           |
|    time_elapsed         | 46205         |
|    total_timesteps      | 331000        |
| train/                  |               |
|    approx_kl            | 3.3311695e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 150           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 135          |
| time/                   |              |
|    total_timesteps      | 335000       |
| train/                  |              |
|    approx_kl            | 9.648573e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 132          |
|    entropy_loss         | -0.565       |
|    explained_variance   | 0.978        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.188        |
|    n_updates            | 1336         |
|    policy_gradient_loss | -0.000238    |
|    value_loss           | 0.488        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 166      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 130           |
| time/                   |               |
|    total_timesteps      | 340000        |
| train/                  |               |
|    approx_kl            | 0.00012288969 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 142           |
|    entropy_loss         | -0.572        |
|    explained_variance   | 0.978         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.23          |
|    n_updates            | 1356          |
|    policy_gradient_loss | -0.00038      |
|    value_loss           | 0.415         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 164      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 340      |
|    time_elapsed    | 47549    |
|    total_timesteps | 340000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 164           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 341           |
|    time_elapsed         | 47596         |
|    total_timesteps      | 341000        |
| train/                  |               |
|    approx_kl            | 1.6985461e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 144           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 131           |
| time/                   |               |
|    total_timesteps      | 345000        |
| train/                  |               |
|    approx_kl            | 0.00041068989 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 150           |
|    entropy_loss         | -0.706        |
|    explained_variance   | 0.981         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.176         |
|    n_updates            | 1376          |
|    policy_gradient_loss | -0.000741     |
|    value_loss           | 0.436         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 135         |
| time/                   |             |
|    total_timesteps      | 350000      |
| train/                  |             |
|    approx_kl            | 0.005618737 |
|    clip_fraction        | 0.0348      |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 150         |
|    entropy_loss         | -0.862      |
|    explained_variance   | 0.983       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.209       |
|    n_updates            | 1396        |
|    policy_gradient_loss | -0.00406    |
|    value_loss           | 0.323       |
-----------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 162      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 350      |
|    time_elapsed    | 48954    |
|    total_timesteps | 350000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 162           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 351           |
|    time_elapsed         | 49002         |
|    total_timesteps      | 351000        |
| train/                  |               |
|    approx_kl            | 0.00052014843 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 161           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 131           |
| time/                   |               |
|    total_timesteps      | 355000        |
| train/                  |               |
|    approx_kl            | 5.8098765e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 176           |
|    entropy_loss         | -1.02         |
|    explained_variance   | 0.975         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.115         |
|    n_updates            | 1416          |
|    policy_gradient_loss | -0.000363     |
|    value_loss           | 0.443         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 130          |
| time/                   |              |
|    total_timesteps      | 360000       |
| train/                  |              |
|    approx_kl            | 0.0009845087 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 173          |
|    entropy_loss         | -1.16        |
|    explained_variance   | 0.981        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.135        |
|    n_updates            | 1436         |
|    policy_gradient_loss | -0.000497    |
|    value_loss           | 0.395        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 161      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 360      |
|    time_elapsed    | 50339    |
|    total_timesteps | 360000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 161           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 361           |
|    time_elapsed         | 50386         |
|    total_timesteps      | 361000        |
| train/                  |               |
|    approx_kl            | 0.00048321608 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 168           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 133           |
| time/                   |               |
|    total_timesteps      | 365000        |
| train/                  |               |
|    approx_kl            | 0.00060406554 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 161           |
|    entropy_loss         | -1.13         |
|    explained_variance   | 0.979         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.174         |
|    n_updates            | 1456          |
|    policy_gradient_loss | -0.000646     |
|    value_loss           | 0.424         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 134           |
| time/                   |               |
|    total_timesteps      | 370000        |
| train/                  |               |
|    approx_kl            | 0.00024370305 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 152           |
|    entropy_loss         | -0.939        |
|    explained_variance   | 0.977         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.195         |
|    n_updates            | 1476          |
|    policy_gradient_loss | -3.87e-05     |
|    value_loss           | 0.399         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 160      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 370      |
|    time_elapsed    | 51726    |
|    total_timesteps | 370000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 160           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 371           |
|    time_elapsed         | 51774         |
|    total_timesteps      | 371000        |
| train/                  |               |
|    approx_kl            | 0.00029212586 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 157           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 130           |
| time/                   |               |
|    total_timesteps      | 375000        |
| train/                  |               |
|    approx_kl            | 0.00028301982 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 153           |
|    entropy_loss         | -0.785        |
|    explained_variance   | 0.913         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.494         |
|    n_updates            | 1496          |
|    policy_gradient_loss | -0.000365     |
|    value_loss           | 1.04          |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 129           |
| time/                   |               |
|    total_timesteps      | 380000        |
| train/                  |               |
|    approx_kl            | 0.00067349005 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 169           |
|    entropy_loss         | -0.806        |
|    explained_variance   | 0.97          |
|    learning_rate        | 0.0003        |
|    loss                 | 0.315         |
|    n_updates            | 1516          |
|    policy_gradient_loss | 0.000226      |
|    value_loss           | 0.536         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 159      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 380      |
|    time_elapsed    | 53101    |
|    total_timesteps | 380000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 158          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 381          |
|    time_elapsed         | 53148        |
|    total_timesteps      | 381000       |
| train/                  |              |
|    approx_kl            | 7.728565e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 149          |
|    entropy_lo

--------------------------------------------
| eval/                   |                |
|    mean_ep_length       | 1e+03          |
|    mean_reward          | 133            |
| time/                   |                |
|    total_timesteps      | 385000         |
| train/                  |                |
|    approx_kl            | 1.39189815e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    cvar_tail_segments   | 1              |
|    cvar_var_threshold   | 152            |
|    entropy_loss         | -0.793         |
|    explained_variance   | 0.94           |
|    learning_rate        | 0.0003         |
|    loss                 | 0.245          |
|    n_updates            | 1536           |
|    policy_gradient_loss | -3.1e-05       |
|    value_loss           | 0.668          |
--------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 136           |
| time/                   |               |
|    total_timesteps      | 390000        |
| train/                  |               |
|    approx_kl            | 0.00014188047 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 163           |
|    entropy_loss         | -0.942        |
|    explained_variance   | 0.973         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.212         |
|    n_updates            | 1556          |
|    policy_gradient_loss | 3.81e-06      |
|    value_loss           | 0.519         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 158      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 390      |
|    time_elapsed    | 54485    |
|    total_timesteps | 390000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 158           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 391           |
|    time_elapsed         | 54532         |
|    total_timesteps      | 391000        |
| train/                  |               |
|    approx_kl            | 3.9172024e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 158           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 130           |
| time/                   |               |
|    total_timesteps      | 395000        |
| train/                  |               |
|    approx_kl            | 0.00034967402 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 159           |
|    entropy_loss         | -0.996        |
|    explained_variance   | 0.979         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.182         |
|    n_updates            | 1576          |
|    policy_gradient_loss | -0.000869     |
|    value_loss           | 0.394         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 132          |
| time/                   |              |
|    total_timesteps      | 400000       |
| train/                  |              |
|    approx_kl            | 0.0013959305 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 163          |
|    entropy_loss         | -0.904       |
|    explained_variance   | 0.962        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.266        |
|    n_updates            | 1596         |
|    policy_gradient_loss | -0.00169     |
|    value_loss           | 0.48         |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 157      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 400      |
|    time_elapsed    | 55862    |
|    total_timesteps | 400000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 157          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 401          |
|    time_elapsed         | 55909        |
|    total_timesteps      | 401000       |
| train/                  |              |
|    approx_kl            | 0.0021128529 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 156          |
|    entropy_lo

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 132         |
| time/                   |             |
|    total_timesteps      | 405000      |
| train/                  |             |
|    approx_kl            | 8.47479e-05 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 162         |
|    entropy_loss         | -0.846      |
|    explained_variance   | 0.983       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.193       |
|    n_updates            | 1616        |
|    policy_gradient_loss | -0.000119   |
|    value_loss           | 0.322       |
-----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 157      |
| time/              |  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 128          |
| time/                   |              |
|    total_timesteps      | 410000       |
| train/                  |              |
|    approx_kl            | 0.0007161568 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 163          |
|    entropy_loss         | -1           |
|    explained_variance   | 0.964        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.244        |
|    n_updates            | 1636         |
|    policy_gradient_loss | -0.00177     |
|    value_loss           | 0.422        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 157      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 410      |
|    time_elapsed    | 57248    |
|    total_timesteps | 410000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 156          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 411          |
|    time_elapsed         | 57295        |
|    total_timesteps      | 411000       |
| train/                  |              |
|    approx_kl            | 0.0005288649 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 173          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 130          |
| time/                   |              |
|    total_timesteps      | 415000       |
| train/                  |              |
|    approx_kl            | 0.0025183477 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 161          |
|    entropy_loss         | -1.03        |
|    explained_variance   | 0.973        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.228        |
|    n_updates            | 1656         |
|    policy_gradient_loss | -0.000358    |
|    value_loss           | 0.433        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 157      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 137          |
| time/                   |              |
|    total_timesteps      | 420000       |
| train/                  |              |
|    approx_kl            | 0.0017647548 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 150          |
|    entropy_loss         | -1.06        |
|    explained_variance   | 0.967        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.415        |
|    n_updates            | 1676         |
|    policy_gradient_loss | -0.00135     |
|    value_loss           | 0.625        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 157      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 420      |
|    time_elapsed    | 58633    |
|    total_timesteps | 420000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 157          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 421          |
|    time_elapsed         | 58680        |
|    total_timesteps      | 421000       |
| train/                  |              |
|    approx_kl            | 0.0015045201 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 158          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 130          |
| time/                   |              |
|    total_timesteps      | 425000       |
| train/                  |              |
|    approx_kl            | 0.0002832037 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 172          |
|    entropy_loss         | -1.11        |
|    explained_variance   | 0.98         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.203        |
|    n_updates            | 1696         |
|    policy_gradient_loss | 4.5e-05      |
|    value_loss           | 0.398        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 158      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 132           |
| time/                   |               |
|    total_timesteps      | 430000        |
| train/                  |               |
|    approx_kl            | 0.00055558595 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 172           |
|    entropy_loss         | -1.11         |
|    explained_variance   | 0.983         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.206         |
|    n_updates            | 1716          |
|    policy_gradient_loss | 0.00016       |
|    value_loss           | 0.549         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 160      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 430      |
|    time_elapsed    | 60016    |
|    total_timesteps | 430000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 160          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 431          |
|    time_elapsed         | 60063        |
|    total_timesteps      | 431000       |
| train/                  |              |
|    approx_kl            | 0.0010215401 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 184          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 131          |
| time/                   |              |
|    total_timesteps      | 435000       |
| train/                  |              |
|    approx_kl            | 0.0031702116 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 173          |
|    entropy_loss         | -0.981       |
|    explained_variance   | 0.982        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.139        |
|    n_updates            | 1736         |
|    policy_gradient_loss | -0.0025      |
|    value_loss           | 0.354        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 161      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 134           |
| time/                   |               |
|    total_timesteps      | 440000        |
| train/                  |               |
|    approx_kl            | 1.1250109e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 152           |
|    entropy_loss         | -0.796        |
|    explained_variance   | 0.982         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.173         |
|    n_updates            | 1756          |
|    policy_gradient_loss | -0.000153     |
|    value_loss           | 0.38          |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 162      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 440      |
|    time_elapsed    | 61398    |
|    total_timesteps | 440000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 162           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 441           |
|    time_elapsed         | 61445         |
|    total_timesteps      | 441000        |
| train/                  |               |
|    approx_kl            | 5.6699504e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 156           |


------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 133          |
| time/                   |              |
|    total_timesteps      | 445000       |
| train/                  |              |
|    approx_kl            | 0.0006749727 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 150          |
|    entropy_loss         | -0.774       |
|    explained_variance   | 0.929        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.355        |
|    n_updates            | 1776         |
|    policy_gradient_loss | -0.000724    |
|    value_loss           | 0.76         |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 162      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 131          |
| time/                   |              |
|    total_timesteps      | 450000       |
| train/                  |              |
|    approx_kl            | 0.0005491054 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 160          |
|    entropy_loss         | -0.95        |
|    explained_variance   | 0.978        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.244        |
|    n_updates            | 1796         |
|    policy_gradient_loss | -0.000363    |
|    value_loss           | 0.387        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 162      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 450      |
|    time_elapsed    | 62772    |
|    total_timesteps | 450000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 162          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 451          |
|    time_elapsed         | 62819        |
|    total_timesteps      | 451000       |
| train/                  |              |
|    approx_kl            | 0.0003600185 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 161          |
|    entropy_lo

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 133          |
| time/                   |              |
|    total_timesteps      | 455000       |
| train/                  |              |
|    approx_kl            | 0.0012131477 |
|    clip_fraction        | 0.00075      |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 163          |
|    entropy_loss         | -1.07        |
|    explained_variance   | 0.971        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.295        |
|    n_updates            | 1816         |
|    policy_gradient_loss | -0.00087     |
|    value_loss           | 0.597        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 162      |
| ti

-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 136           |
| time/                   |               |
|    total_timesteps      | 460000        |
| train/                  |               |
|    approx_kl            | 5.6187306e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 171           |
|    entropy_loss         | -1.23         |
|    explained_variance   | 0.977         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.139         |
|    n_updates            | 1836          |
|    policy_gradient_loss | 0.000201      |
|    value_loss           | 0.455         |
-------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 162      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 460      |
|    time_elapsed    | 64161    |
|    total_timesteps | 460000   |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1e+03         |
|    ep_rew_mean          | 162           |
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 461           |
|    time_elapsed         | 64209         |
|    total_timesteps      | 461000        |
| train/                  |               |
|    approx_kl            | 0.00038477464 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 180           |


-------------------------------------------
| eval/                   |               |
|    mean_ep_length       | 1e+03         |
|    mean_reward          | 137           |
| time/                   |               |
|    total_timesteps      | 465000        |
| train/                  |               |
|    approx_kl            | 0.00037932245 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    cvar_tail_segments   | 1             |
|    cvar_var_threshold   | 180           |
|    entropy_loss         | -1.36         |
|    explained_variance   | 0.975         |
|    learning_rate        | 0.0003        |
|    loss                 | 0.148         |
|    n_updates            | 1856          |
|    policy_gradient_loss | 0.000383      |
|    value_loss           | 0.578         |
-------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 99.8         |
| time/                   |              |
|    total_timesteps      | 470000       |
| train/                  |              |
|    approx_kl            | 0.0017624438 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 163          |
|    entropy_loss         | -1.44        |
|    explained_variance   | 0.957        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.153        |
|    n_updates            | 1876         |
|    policy_gradient_loss | -0.0016      |
|    value_loss           | 0.455        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 163      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 470      |
|    time_elapsed    | 65539    |
|    total_timesteps | 470000   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 163        |
| time/                   |            |
|    fps                  | 7          |
|    iterations           | 471        |
|    time_elapsed         | 65586      |
|    total_timesteps      | 471000     |
| train/                  |            |
|    approx_kl            | 0.00526473 |
|    clip_fraction        | 0.0468     |
|    clip_range           | 0.2        |
|    cvar_tail_segments   | 1          |
|    cvar_var_threshold   | 154        |
|    entropy_loss         | -1.48      |
|   

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 103          |
| time/                   |              |
|    total_timesteps      | 475000       |
| train/                  |              |
|    approx_kl            | 0.0016388306 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 162          |
|    entropy_loss         | -1.4         |
|    explained_variance   | 0.959        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.322        |
|    n_updates            | 1896         |
|    policy_gradient_loss | -0.00203     |
|    value_loss           | 0.542        |
------------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 163      |
| ti

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 134          |
| time/                   |              |
|    total_timesteps      | 480000       |
| train/                  |              |
|    approx_kl            | 0.0025886418 |
|    clip_fraction        | 0.002        |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 167          |
|    entropy_loss         | -1.47        |
|    explained_variance   | 0.981        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.184        |
|    n_updates            | 1916         |
|    policy_gradient_loss | -0.00129     |
|    value_loss           | 0.341        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 163      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 480      |
|    time_elapsed    | 66916    |
|    total_timesteps | 480000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 164          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 481          |
|    time_elapsed         | 66963        |
|    total_timesteps      | 481000       |
| train/                  |              |
|    approx_kl            | 0.0029763258 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 170          |
|    entropy_lo

-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 130         |
| time/                   |             |
|    total_timesteps      | 485000      |
| train/                  |             |
|    approx_kl            | 0.004309585 |
|    clip_fraction        | 0.0025      |
|    clip_range           | 0.2         |
|    cvar_tail_segments   | 1           |
|    cvar_var_threshold   | 170         |
|    entropy_loss         | -1.24       |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.298       |
|    n_updates            | 1936        |
|    policy_gradient_loss | -0.00268    |
|    value_loss           | 0.933       |
-----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 165      |
| time/              |  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 136          |
| time/                   |              |
|    total_timesteps      | 490000       |
| train/                  |              |
|    approx_kl            | 0.0023012569 |
|    clip_fraction        | 0.00525      |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 180          |
|    entropy_loss         | -1.15        |
|    explained_variance   | 0.958        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.256        |
|    n_updates            | 1956         |
|    policy_gradient_loss | -0.00275     |
|    value_loss           | 0.504        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 165      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 490      |
|    time_elapsed    | 68292    |
|    total_timesteps | 490000   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 166          |
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 491          |
|    time_elapsed         | 68340        |
|    total_timesteps      | 491000       |
| train/                  |              |
|    approx_kl            | 0.0044827564 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 168          |
|    entropy_lo

----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 1e+03      |
|    mean_reward          | 131        |
| time/                   |            |
|    total_timesteps      | 495000     |
| train/                  |            |
|    approx_kl            | 0.00130251 |
|    clip_fraction        | 0          |
|    clip_range           | 0.2        |
|    cvar_tail_segments   | 1          |
|    cvar_var_threshold   | 169        |
|    entropy_loss         | -1.02      |
|    explained_variance   | 0.916      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.285      |
|    n_updates            | 1976       |
|    policy_gradient_loss | -0.00186   |
|    value_loss           | 0.694      |
----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 166      |
| time/              |          |
|    fps  

------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | 129          |
| time/                   |              |
|    total_timesteps      | 500000       |
| train/                  |              |
|    approx_kl            | 0.0031619207 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    cvar_tail_segments   | 1            |
|    cvar_var_threshold   | 174          |
|    entropy_loss         | -1.28        |
|    explained_variance   | 0.98         |
|    learning_rate        | 0.0003       |
|    loss                 | 0.162        |
|    n_updates            | 1996         |
|    policy_gradient_loss | -0.000459    |
|    value_loss           | 0.408        |
------------------------------------------


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 166      |
| time/              |          |
|    fps             | 7        |
|    iterations      | 500      |
|    time_elapsed    | 69690    |
|    total_timesteps | 500000   |
---------------------------------



CVAR TRAINING COMPLETE!
Time: 19.36 hours
Final model saved to Drive: /content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750/cvar_maskable_ppo/final_model


## 10. Evaluate Both Agents

## 10. Load Saved Models & Evaluate

**Since the runtime was disconnected, we need to reload the saved models from Google Drive before evaluation.**

Make sure you have run cells 1-16 first to set up the environment!

In [None]:
# ============================================================# LOAD SAVED MODELS FROM GOOGLE DRIVE & EVALUATE# ============================================================# This cell loads pre-trained models saved in Google Drive.# Use this when runtime was disconnected after training.import osimport numpy as npimport jsonprint("="*70)print("LOADING SAVED MODELS FROM GOOGLE DRIVE")print("="*70)# Paths to saved models (adjust if your experiment directory differs)EXPERIMENT_DIR = '/content/drive/MyDrive/Colab Notebooks/Risk_aware_RL/experiment_20260211_061750'BASELINE_DIR = os.path.join(EXPERIMENT_DIR, "baseline_maskable_ppo")CVAR_DIR = os.path.join(EXPERIMENT_DIR, "cvar_maskable_ppo")COMPARISON_DIR = os.path.join(EXPERIMENT_DIR, "comparison")# Ensure comparison directory existsos.makedirs(COMPARISON_DIR, exist_ok=True)# Load Baseline Agentbaseline_final_path = os.path.join(BASELINE_DIR, 'final_model.zip')baseline_best_path = os.path.join(BASELINE_DIR, 'best_model', 'best_model.zip')# Try final model first, then best modelif os.path.exists(baseline_final_path):    baseline_path = baseline_final_path    print(f"✓ Found baseline final model: {baseline_final_path}")elif os.path.exists(baseline_best_path):    baseline_path = baseline_best_path    print(f"✓ Found baseline best model: {baseline_best_path}")else:    # Search for checkpoints    import glob    checkpoints = glob.glob(os.path.join(BASELINE_DIR, 'checkpoints', '*.zip'))    if checkpoints:        checkpoints.sort(key=lambda x: int(x.split('_')[-2]) if x.split('_')[-2].isdigit() else 0)        baseline_path = checkpoints[-1]        print(f"✓ Found baseline checkpoint: {baseline_path}")    else:        raise FileNotFoundError("No baseline model found!")# Load CVaR Agentcvar_final_path = os.path.join(CVAR_DIR, 'final_model.zip')cvar_best_path = os.path.join(CVAR_DIR, 'best_model', 'best_model.zip')if os.path.exists(cvar_final_path):    cvar_path = cvar_final_path    print(f"✓ Found CVaR final model: {cvar_final_path}")elif os.path.exists(cvar_best_path):    cvar_path = cvar_best_path    print(f"✓ Found CVaR best model: {cvar_best_path}")else:    checkpoints = glob.glob(os.path.join(CVAR_DIR, 'checkpoints', '*.zip'))    if checkpoints:        checkpoints.sort(key=lambda x: int(x.split('_')[-2]) if x.split('_')[-2].isdigit() else 0)        cvar_path = checkpoints[-1]        print(f"✓ Found CVaR checkpoint: {cvar_path}")    else:        raise FileNotFoundError("No CVaR model found!")# Create a dummy environment to load the models# Note: make_env function must be available (run earlier cells first!)try:    dummy_env = DummyVecEnv([lambda: make_env(seed=9999, use_masking=True)])except:    print("\n⚠️  Environment not set up. Please run cells 1-16 first to set up environment.")    raise# Load modelsprint("\nLoading baseline agent...")baseline_agent = MaskablePPO.load(baseline_path, env=dummy_env, device=DEVICE)print(f"  Loaded from: {baseline_path}")print("\nLoading CVaR agent...")# For CVaR agent, we need to use the custom classfrom models.cvar_maskable_ppo_true import CVaRMaskablePPOcvar_agent = CVaRMaskablePPO.load(cvar_path, env=dummy_env, device=DEVICE)print(f"  Loaded from: {cvar_path}")dummy_env.close()print("\n" + "="*70)print("✅ MODELS LOADED SUCCESSFULLY!")print("="*70)

In [None]:
# ============================================================# EVALUATE BOTH AGENTS# ============================================================print("="*70)print("EVALUATING BOTH AGENTS")print("="*70)def evaluate_agent(agent, env, n_episodes=50, agent_name="Agent"):    """Evaluate agent and return statistics."""    episode_returns = []    episode_lengths = []        use_masking = hasattr(env.envs[0], 'action_masks')        for ep in range(n_episodes):        obs = env.reset()        done = False        ep_return = 0        steps = 0                while not done:            if use_masking:                masks = env.env_method('action_masks')[0]                action, _ = agent.predict(obs, action_masks=masks, deterministic=True)            else:                action, _ = agent.predict(obs, deterministic=True)                        obs, reward, done, info = env.step(action)            ep_return += reward[0]            steps += 1                        if done[0]:                break                episode_returns.append(ep_return)        episode_lengths.append(steps)                if (ep + 1) % 10 == 0:            print(f"  {agent_name}: {ep+1}/{n_episodes} episodes")        returns = np.array(episode_returns)        # Compute statistics    stats = {        'mean_return': float(np.mean(returns)),        'std_return': float(np.std(returns)),        'min_return': float(np.min(returns)),        'max_return': float(np.max(returns)),        'median_return': float(np.median(returns)),        'var_0.1': float(np.percentile(returns, 10)),        'cvar_0.1': float(np.mean(returns[returns <= np.percentile(returns, 10)])),        'all_returns': returns.tolist()    }        return stats# Create evaluation environmenteval_env = DummyVecEnv([lambda: make_env(config['training'].seed + 2000, use_masking=True)])# Evaluate baselineprint("\nEvaluating Baseline Agent...")baseline_stats = evaluate_agent(baseline_agent, eval_env, n_episodes=50, agent_name="Baseline")# Evaluate CVaR agentprint("\nEvaluating CVaR Agent...")cvar_stats = evaluate_agent(cvar_agent, eval_env, n_episodes=50, agent_name="CVaR")eval_env.close()# Save evaluation results to Driveeval_results = {    'baseline': baseline_stats,    'cvar': cvar_stats}with open(os.path.join(COMPARISON_DIR, 'evaluation_results.json'), 'w') as f:    json.dump(eval_results, f, indent=2)print("\n✓ Evaluation complete and saved to Drive!")print(f"  Results saved to: {COMPARISON_DIR}/evaluation_results.json")

### Alternative: Load Previously Saved Evaluation Results

If evaluation was already run before, you can skip re-evaluation and just load the saved JSON:

In [None]:
# ============================================================# ALTERNATIVE: LOAD EVALUATION RESULTS FROM SAVED JSON# ============================================================# If you already ran evaluation before and just want to plot results,# run this cell instead of the evaluation cell above.import jsonimport os# Try to load existing evaluation resultseval_json_path = os.path.join(COMPARISON_DIR, 'evaluation_results.json')if os.path.exists(eval_json_path):    print("="*70)    print("LOADING SAVED EVALUATION RESULTS")    print("="*70)        with open(eval_json_path, 'r') as f:        eval_results = json.load(f)        baseline_stats = eval_results['baseline']    cvar_stats = eval_results['cvar']        print(f"\n✓ Loaded evaluation results from: {eval_json_path}")    print(f"\nBaseline Stats:")    print(f"  Mean Return: {baseline_stats['mean_return']:.2f} ± {baseline_stats['std_return']:.2f}")    print(f"  CVaR (10%):  {baseline_stats['cvar_0.1']:.2f}")        print(f"\nCVaR Agent Stats:")    print(f"  Mean Return: {cvar_stats['mean_return']:.2f} ± {cvar_stats['std_return']:.2f}")    print(f"  CVaR (10%):  {cvar_stats['cvar_0.1']:.2f}")else:    print("⚠️  No saved evaluation results found.")    print("   Please run the evaluation cell first.")

## 11. Visualization & Comparison Plots

In [None]:
# ============================================================# COMPARISON AND VISUALIZATION# ============================================================import matplotlib.pyplot as pltimport seaborn as snsimport numpy as np# Set stylesns.set_style('whitegrid')plt.rcParams['figure.figsize'] = (18, 14)plt.rcParams['font.size'] = 11# Create comprehensive comparison plotsfig = plt.figure(figsize=(18, 14))gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)# Color schemecolors = ['#2E86AB', '#D62246']  # Blue for Baseline, Red for CVaRagent_names = ['Baseline\n(MaskablePPO)', 'CVaR\n(CVaR-MaskablePPO)']# ----------------------------------------------------------------# 1. Mean Return Comparison (Top Left)# ----------------------------------------------------------------ax1 = fig.add_subplot(gs[0, 0])means = [baseline_stats['mean_return'], cvar_stats['mean_return']]stds = [baseline_stats['std_return'], cvar_stats['std_return']]bars = ax1.bar(agent_names, means, yerr=stds, color=colors, alpha=0.7, capsize=10, edgecolor='black', linewidth=1.2)ax1.set_ylabel('Mean Episode Return', fontweight='bold')ax1.set_title('Average Performance', fontweight='bold', fontsize=12)ax1.grid(axis='y', alpha=0.3)# Add value labelsfor i, (m, s) in enumerate(zip(means, stds)):    ax1.text(i, m + s + 2, f"{m:.1f}±{s:.1f}", ha='center', fontweight='bold', fontsize=10)# ----------------------------------------------------------------# 2. CVaR (Worst 10%) Comparison (Top Middle)# ----------------------------------------------------------------ax2 = fig.add_subplot(gs[0, 1])cvars = [baseline_stats['cvar_0.1'], cvar_stats['cvar_0.1']]improvement = (cvar_stats['cvar_0.1'] - baseline_stats['cvar_0.1']) / abs(baseline_stats['cvar_0.1']) * 100 if baseline_stats['cvar_0.1'] != 0 else 0bars = ax2.bar(agent_names, cvars, color=colors, alpha=0.7, edgecolor='black', linewidth=1.2)ax2.set_ylabel('CVaR (10%)', fontweight='bold')ax2.set_title(f'Worst-Case Performance\n(CVaR: {improvement:+.1f}%)', fontweight='bold', fontsize=12)ax2.grid(axis='y', alpha=0.3)for i, c in enumerate(cvars):    ax2.text(i, c + 1, f"{c:.1f}", ha='center', fontweight='bold', fontsize=10)# ----------------------------------------------------------------# 3. Performance Gap (Mean - CVaR) (Top Right)# ----------------------------------------------------------------ax3 = fig.add_subplot(gs[0, 2])gaps = [    baseline_stats['mean_return'] - baseline_stats['cvar_0.1'],    cvar_stats['mean_return'] - cvar_stats['cvar_0.1']]bars = ax3.bar(agent_names, gaps, color=colors, alpha=0.7, edgecolor='black', linewidth=1.2)ax3.set_ylabel('Gap (Mean - CVaR)', fontweight='bold')ax3.set_title('Consistency (Lower = Better)', fontweight='bold', fontsize=12)ax3.grid(axis='y', alpha=0.3)for i, g in enumerate(gaps):    ax3.text(i, g + 1, f"{g:.1f}", ha='center', fontweight='bold', fontsize=10)# ----------------------------------------------------------------# 4. Return Distribution Histogram (Middle Row, Full Width)# ----------------------------------------------------------------ax4 = fig.add_subplot(gs[1, :])baseline_returns = np.array(baseline_stats['all_returns'])cvar_returns = np.array(cvar_stats['all_returns'])# Create overlapping histogramsbins = np.linspace(    min(baseline_returns.min(), cvar_returns.min()) - 10,    max(baseline_returns.max(), cvar_returns.max()) + 10,    30)ax4.hist(baseline_returns, bins=bins, alpha=0.6, label=f'Baseline (μ={baseline_stats["mean_return"]:.1f})', color=colors[0], edgecolor='black')ax4.hist(cvar_returns, bins=bins, alpha=0.6, label=f'CVaR (μ={cvar_stats["mean_return"]:.1f})', color=colors[1], edgecolor='black')# Add vertical lines for means and CVaR thresholdsax4.axvline(baseline_stats['mean_return'], color=colors[0], linestyle='-', linewidth=2, label=f'Baseline Mean')ax4.axvline(cvar_stats['mean_return'], color=colors[1], linestyle='-', linewidth=2, label=f'CVaR Mean')ax4.axvline(baseline_stats['cvar_0.1'], color=colors[0], linestyle='--', linewidth=2, label=f'Baseline CVaR₁₀')ax4.axvline(cvar_stats['cvar_0.1'], color=colors[1], linestyle='--', linewidth=2, label=f'CVaR Agent CVaR₁₀')ax4.set_xlabel('Episode Return', fontweight='bold')ax4.set_ylabel('Frequency', fontweight='bold')ax4.set_title('Return Distribution Comparison', fontweight='bold', fontsize=12)ax4.legend(loc='upper right', fontsize=9)ax4.grid(axis='y', alpha=0.3)# ----------------------------------------------------------------# 5. Box Plot Comparison (Bottom Left)# ----------------------------------------------------------------ax5 = fig.add_subplot(gs[2, 0])bp = ax5.boxplot(    [baseline_returns, cvar_returns],    labels=['Baseline', 'CVaR'],    patch_artist=True,    showmeans=True,    meanline=True,    meanprops={'color': 'black', 'linewidth': 2})for patch, color in zip(bp['boxes'], colors):    patch.set_facecolor(color)    patch.set_alpha(0.6)ax5.set_ylabel('Episode Return', fontweight='bold')ax5.set_title('Return Distribution (Box Plot)', fontweight='bold', fontsize=12)ax5.grid(axis='y', alpha=0.3)# ----------------------------------------------------------------# 6. Tail Performance Analysis (Bottom Middle)# ----------------------------------------------------------------ax6 = fig.add_subplot(gs[2, 1])# Show different percentilespercentiles = [5, 10, 25, 50]baseline_pcts = [np.percentile(baseline_returns, p) for p in percentiles]cvar_pcts = [np.percentile(cvar_returns, p) for p in percentiles]x = np.arange(len(percentiles))width = 0.35bars1 = ax6.bar(x - width/2, baseline_pcts, width, label='Baseline', color=colors[0], alpha=0.7, edgecolor='black')bars2 = ax6.bar(x + width/2, cvar_pcts, width, label='CVaR', color=colors[1], alpha=0.7, edgecolor='black')ax6.set_xlabel('Percentile', fontweight='bold')ax6.set_ylabel('Return Value', fontweight='bold')ax6.set_title('Tail Performance (Lower Percentiles)', fontweight='bold', fontsize=12)ax6.set_xticks(x)ax6.set_xticklabels([f'{p}th' for p in percentiles])ax6.legend()ax6.grid(axis='y', alpha=0.3)# ----------------------------------------------------------------# 7. Summary Statistics Table (Bottom Right)# ----------------------------------------------------------------ax7 = fig.add_subplot(gs[2, 2])ax7.axis('off')# Create summary tablesummary_data = [    ['Metric', 'Baseline', 'CVaR', 'Δ (%)'],    ['Mean Return', f'{baseline_stats["mean_return"]:.2f}', f'{cvar_stats["mean_return"]:.2f}',      f'{(cvar_stats["mean_return"] - baseline_stats["mean_return"])/abs(baseline_stats["mean_return"])*100:+.1f}%' if baseline_stats["mean_return"] != 0 else 'N/A'],    ['Std Dev', f'{baseline_stats["std_return"]:.2f}', f'{cvar_stats["std_return"]:.2f}',      f'{(cvar_stats["std_return"] - baseline_stats["std_return"])/abs(baseline_stats["std_return"])*100:+.1f}%' if baseline_stats["std_return"] != 0 else 'N/A'],    ['Min Return', f'{baseline_stats["min_return"]:.2f}', f'{cvar_stats["min_return"]:.2f}',      f'{(cvar_stats["min_return"] - baseline_stats["min_return"])/abs(baseline_stats["min_return"])*100:+.1f}%' if baseline_stats["min_return"] != 0 else 'N/A'],    ['CVaR (10%)', f'{baseline_stats["cvar_0.1"]:.2f}', f'{cvar_stats["cvar_0.1"]:.2f}',      f'{(cvar_stats["cvar_0.1"] - baseline_stats["cvar_0.1"])/abs(baseline_stats["cvar_0.1"])*100:+.1f}%' if baseline_stats["cvar_0.1"] != 0 else 'N/A'],    ['VaR (10%)', f'{baseline_stats["var_0.1"]:.2f}', f'{cvar_stats["var_0.1"]:.2f}',      f'{(cvar_stats["var_0.1"] - baseline_stats["var_0.1"])/abs(baseline_stats["var_0.1"])*100:+.1f}%' if baseline_stats["var_0.1"] != 0 else 'N/A'],    ['Median', f'{baseline_stats["median_return"]:.2f}', f'{cvar_stats["median_return"]:.2f}',      f'{(cvar_stats["median_return"] - baseline_stats["median_return"])/abs(baseline_stats["median_return"])*100:+.1f}%' if baseline_stats["median_return"] != 0 else 'N/A'],]table = ax7.table(    cellText=summary_data,    cellLoc='center',    loc='center',    colWidths=[0.3, 0.2, 0.2, 0.2])table.auto_set_font_size(False)table.set_fontsize(10)table.scale(1.2, 1.8)# Style header rowfor j in range(4):    table[(0, j)].set_facecolor('#404040')    table[(0, j)].set_text_props(color='white', fontweight='bold')ax7.set_title('Summary Statistics', fontweight='bold', fontsize=12, pad=20)# ----------------------------------------------------------------# Save figure# ----------------------------------------------------------------plt.suptitle('MaskablePPO vs CVaR-MaskablePPO: Comprehensive Comparison',              fontsize=14, fontweight='bold', y=0.98)# Save to Google Drivefig_path = os.path.join(COMPARISON_DIR, 'comprehensive_comparison.png')plt.savefig(fig_path, dpi=150, bbox_inches='tight', facecolor='white')print(f"\n✓ Figure saved to: {fig_path}")# Also save as PDF for publication qualitypdf_path = os.path.join(COMPARISON_DIR, 'comprehensive_comparison.pdf')plt.savefig(pdf_path, dpi=300, bbox_inches='tight', facecolor='white')print(f"✓ PDF saved to: {pdf_path}")plt.tight_layout()plt.show()

## 12. Summary Report

In [None]:
# ============================================================# EXPERIMENT SUMMARY REPORT# ============================================================print("="*70)print("EXPERIMENT SUMMARY")print("="*70)print(f"\n📁 All results saved to Google Drive:")print(f"   {EXPERIMENT_DIR}")print(f"\n📊 Evaluation Results (50 episodes each):")print(f"\n   BASELINE (MaskablePPO - No CVaR):")print(f"     Mean Return:  {baseline_stats['mean_return']:.2f} ± {baseline_stats['std_return']:.2f}")print(f"     Min Return:   {baseline_stats['min_return']:.2f}")print(f"     Max Return:   {baseline_stats['max_return']:.2f}")print(f"     CVaR (10%):   {baseline_stats['cvar_0.1']:.2f}")print(f"     VaR (10%):    {baseline_stats['var_0.1']:.2f}")print(f"     Gap:          {baseline_stats['mean_return'] - baseline_stats['cvar_0.1']:.2f}")print(f"\n   CVAR AGENT (CVaR-MaskablePPO):")print(f"     Mean Return:  {cvar_stats['mean_return']:.2f} ± {cvar_stats['std_return']:.2f}")print(f"     Min Return:   {cvar_stats['min_return']:.2f}")print(f"     Max Return:   {cvar_stats['max_return']:.2f}")print(f"     CVaR (10%):   {cvar_stats['cvar_0.1']:.2f}")print(f"     VaR (10%):    {cvar_stats['var_0.1']:.2f}")print(f"     Gap:          {cvar_stats['mean_return'] - cvar_stats['cvar_0.1']:.2f}")# Calculate improvementsmean_change = (cvar_stats['mean_return'] - baseline_stats['mean_return']) / abs(baseline_stats['mean_return']) * 100 if baseline_stats['mean_return'] != 0 else 0cvar_change = (cvar_stats['cvar_0.1'] - baseline_stats['cvar_0.1']) / abs(baseline_stats['cvar_0.1']) * 100 if baseline_stats['cvar_0.1'] != 0 else 0gap_baseline = baseline_stats['mean_return'] - baseline_stats['cvar_0.1']gap_cvar = cvar_stats['mean_return'] - cvar_stats['cvar_0.1']gap_change = (gap_cvar - gap_baseline) / abs(gap_baseline) * 100 if gap_baseline != 0 else 0print(f"\n   IMPROVEMENT (CVaR Agent vs Baseline):")print(f"     Mean Return:  {mean_change:+.2f}%")print(f"     CVaR (10%):   {cvar_change:+.2f}%  ← Most Important for Risk-Awareness!")print(f"     Gap:          {gap_change:+.2f}% (negative = more consistent)")# Interpretationprint(f"\n💡 Interpretation:")if cvar_change > 10:    print(f"   ✅ SUCCESS! CVaR (worst-case performance) improved by {cvar_change:.1f}%")    print(f"      The CVaR agent significantly reduces downside risk.")elif cvar_change > 0:    print(f"   ⚠️  MODERATE: CVaR improved by {cvar_change:.1f}%")    print(f"      There is some improvement in worst-case performance.")elif cvar_change < -5:    print(f"   ❌ CVaR actually decreased by {abs(cvar_change):.1f}%")    print(f"      The CVaR objective may need tuning (alpha, weight).")else:    print(f"   ➖ SIMILAR: CVaR changed by only {cvar_change:.1f}%")    print(f"      Both agents have similar worst-case performance.")if mean_change < -5 and cvar_change > 5:    print(f"\n   📊 Trade-off detected:")    print(f"      Mean decreased ({mean_change:.1f}%) but CVaR improved ({cvar_change:.1f}%)")    print(f"      This is the expected risk-return trade-off!")print(f"\n{'='*70}")print("Files saved to Google Drive:")print(f"  - {os.path.join(COMPARISON_DIR, 'evaluation_results.json')}")print(f"  - {os.path.join(COMPARISON_DIR, 'comprehensive_comparison.png')}")print(f"  - {os.path.join(COMPARISON_DIR, 'comprehensive_comparison.pdf')}")print("="*70)

In [None]:
# Example: Load a saved model from Drive and continue training

# Uncomment to use:
# from models.cvar_maskable_ppo import TrueCVaRMaskablePPO
# from sb3_contrib import MaskablePPO

# # Load baseline
# baseline_loaded = MaskablePPO.load(
#     os.path.join(BASELINE_DIR, 'final_model')
# )

# # Load CVaR agent
# cvar_loaded = TrueCVaRMaskablePPO.load(
#     os.path.join(CVAR_DIR, 'final_model'),
#     alpha=CVAR_ALPHA,
#     cvar_weight=CVAR_WEIGHT
# )

# # Continue training
# # cvar_loaded.learn(total_timesteps=500_000, reset_num_timesteps=False)

print("💡 To load and continue training, uncomment the code above.")