<a href="https://colab.research.google.com/github/FarStryke21/NaturalDreamer/blob/main/Dreamer_v3_RoboSuite.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Set up Colab Requirements

In [4]:
# Clone the Natural Dreamer repository

!git clone https://github.com/FarStryke21/NaturalDreamer.git

%cd /content/NaturalDreamer/

fatal: destination path 'NaturalDreamer' already exists and is not an empty directory.
[Errno 2] No such file or directory: '/content/NaturalDreamer/'
/home/achulawa/Projects/NaturalDreamer


In [5]:
!pip install robosuite attridict



## Robosuite Training

In [1]:
import gymnasium as gym # For wrappers
import torch
import argparse
import os
from tqdm import tqdm

# Robosuite imports
import robosuite as suite
from robosuite.wrappers import GymWrapper

# Project-specific imports
from dreamer import Dreamer
from utils import loadConfig, seedEverything, plotMetrics, saveLossesToCSV, ensureParentFolders
from envs import getEnvProperties, GymPixelsProcessingWrapper, CleanGymWrapper, ImageExtractWrapper # Using getEnvProperties from envs.py


Set logging level: Console=ERROR | File=None


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
# Load experiment config
configFile = "robosuite-lift-panda.yml"
config = loadConfig(configFile)

In [4]:
# Set seed through the environment
seedEverything(config.seed)
print(f"Loaded config from file: {configFile}")
print(f"Using seed: {config.seed}")

Loaded config from file: robosuite-lift-panda.yml
Using seed: 1234


In [5]:
# Setup files and folder logging
runName = f"{config.environmentName}_{config.robotName}_{config.runName}"

# Define paths for checkpoints, metrics, plots, and videos
checkpointFolder = os.path.join(config.folderNames.checkpointsFolder, runName)
metricsFilename = os.path.join(config.folderNames.metricsFolder, f"{runName}.csv")
plotFilename = os.path.join(config.folderNames.plotsFolder, f"{runName}.html")
checkpointFilenameBase = os.path.join(checkpointFolder, runName) # Checkpoints will be saved in a subfolder per run
videoFilenameBase = os.path.join(config.folderNames.videosFolder, runName)

# Ensure parent directories for all output files exist
ensureParentFolders(metricsFilename, plotFilename, checkpointFilenameBase, videoFilenameBase)
print(f"Run name: {runName}")
print(f"Metrics will be saved to: {metricsFilename}")
print(f"Plots will be saved to: {plotFilename}")
print(f"Checkpoints will be saved to: {checkpointFolder}")
print(f"Videos will be saved to: {config.folderNames.videosFolder}")


Run name: Lift_Panda_lift_panda_test
Metrics will be saved to: metrics/Lift_Panda_lift_panda_test.csv
Plots will be saved to: plots/Lift_Panda_lift_panda_test.html
Checkpoints will be saved to: checkpoints/Lift_Panda_lift_panda_test
Videos will be saved to: videos


In [6]:
print("Initializing Robosuite training environment...")
robosuite_env_train = suite.make(
    env_name=config.environmentName,
    robots=config.robotName,
    controller_configs=suite.load_composite_controller_config(controller=config.controllerName),
    has_renderer=False,  # No on-screen rendering for training
    has_offscreen_renderer=config.useCameraObs, # True if using camera observations for the encoder
    use_camera_obs=config.useCameraObs,
    camera_names=config.cameraName,
    camera_heights=config.cameraHeight, # Robosuite will render at this resolution
    camera_widths=config.cameraWidth,
    reward_shaping=config.rewardShaping,
    control_freq=config.controlFreq,
    horizon=config.horizon,
)

train_env_keys = [f"{config.cameraName}_image"]
env = GymWrapper(robosuite_env_train, keys=train_env_keys, flatten_obs=False)


Initializing Robosuite training environment...


  gym.logger.warn(
  gym.logger.warn(


In [7]:
# Apply observation wrappers
print(f"Original Observation Space: {env.observation_space}")
env = ImageExtractWrapper(env, image_key=train_env_keys[0]) # Convert Dictionary Observation to Image Observation -> Box(0, 255, (64, 64, 3), uint8)
env = gym.wrappers.ResizeObservation(env, (config.cameraHeight, config.cameraWidth)) # Resize image observation size -> Box(0, 255, (64, 64, 3), uint8)
env = GymPixelsProcessingWrapper(env) # Rearrange Observation space -> Box(0.0, 1.0, (3, 64, 64), float32
env = CleanGymWrapper(env) # Clean Gym Wrapper -> Box(0.0, 1.0, (3, 64, 64), float32)
print(f"New Observation Space{env.observation_space}")
print("Robosuite training environment initialized.")

Original Observation Space: Dict('agentview_image': Box(0, 255, (64, 64, 3), uint8))
New Observation SpaceBox(0.0, 1.0, (3, 64, 64), float32)
Robosuite training environment initialized.


In [8]:
print("Initializing Robosuite evaluation environment...")
robosuite_env_eval = suite.make(
    env_name=config.environmentName,
    robots=config.robotName,
    controller_configs=suite.load_composite_controller_config(controller=config.controllerName),
    has_renderer=config.evaluationRender,  # Enable for video saving if needed
    has_offscreen_renderer=config.useCameraObs or config.evaluationRender, # Offscreen needed for rgb_array for video
    use_camera_obs=config.useCameraObs,
    camera_names=config.cameraName,
    camera_heights=config.cameraHeight,
    camera_widths=config.cameraWidth,
    reward_shaping=config.rewardShaping, # Usually good to keep consistent with training
    control_freq=config.controlFreq,
    horizon=config.horizon,
    # render_camera=config.cameraName # Or a different camera like "frontview" for videos
)
eval_env_keys = [f"{config.cameraName}_image"]
envEvaluation = GymWrapper(robosuite_env_eval, keys=eval_env_keys, flatten_obs=False)


Initializing Robosuite evaluation environment...


In [9]:
# Apply observation wrappers
print(f"Original Observation Space: {envEvaluation.observation_space}")
envEvaluation = ImageExtractWrapper(envEvaluation, image_key=train_env_keys[0]) # Convert Dictionary Observation to Image Observation -> Box(0, 255, (64, 64, 3), uint8)
envEvaluation = gym.wrappers.ResizeObservation(envEvaluation, (config.cameraHeight, config.cameraWidth)) # Resize image observation size -> Box(0, 255, (64, 64, 3), uint8)
envEvaluation = GymPixelsProcessingWrapper(envEvaluation) # Rearrange Observation space -> Box(0.0, 1.0, (3, 64, 64), float32
envEvaluation = CleanGymWrapper(envEvaluation) # Clean Gym Wrapper -> Box(0.0, 1.0, (3, 64, 64), float32)
print(f"New Observation Space{envEvaluation.observation_space}")
print("Robosuite training environment initialized.")

Original Observation Space: Dict('agentview_image': Box(0, 255, (64, 64, 3), uint8))
New Observation SpaceBox(0.0, 1.0, (3, 64, 64), float32)
Robosuite training environment initialized.


In [10]:
# This function should work with the wrapped environment
observationShape, actionSize, actionLow, actionHigh = getEnvProperties(env)
print(f"Environment Properties: Observation Shape {observationShape}, Action Size {actionSize}, Action Low {actionLow}, Action High {actionHigh}")
if not config.useCameraObs:
    print("Warning: 'useCameraObs' is False. The current Dreamer implementation primarily expects image observations for its Encoder. Ensure your network setup is appropriate for state-based observations if this is intended.")

Environment Properties: Observation Shape (3, 64, 64), Action Size 7, Action Low [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0], Action High [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]


In [11]:
print("Initializing Dreamer agent...")
dreamer = Dreamer(observationShape, actionSize, actionLow, actionHigh, device, config.dreamer)

Initializing Dreamer agent...


In [12]:
if config.resume:
    checkpointToLoadPath = os.path.join(checkpointFolder, f"{runName}_{config.checkpointToLoad}.pth")
    try:
        dreamer.loadCheckpoint(checkpointToLoadPath)
        print(f"Resumed training from checkpoint: {checkpointToLoadPath}")
    except FileNotFoundError:
        print(f"Warning: Checkpoint not found at {checkpointToLoadPath}. Starting from scratch.")
else:
    print("Starting training from scratch.")

Resumed training from checkpoint: checkpoints/Lift_Panda_lift_panda_test/Lift_Panda_lift_panda_test_5k.pth


In [15]:
dreamer.environmentInteraction(env, config.episodesBeforeStart, seed=config.seed)
print(f"Buffer size after initial collection: {len(dreamer.buffer)}")

Buffer size after initial collection: 10000


In [16]:
iterationsNum = config.gradientSteps // config.replayRatio
print(f"Starting training for {config.gradientSteps} gradient steps, with {iterationsNum} outer iterations.")

for iter_idx in tqdm(range(iterationsNum), desc="Training Progress"):
    # Inner loop for gradient updates

    for _ in range(config.replayRatio):

        if len(dreamer.buffer) < dreamer.config.batchSize * dreamer.config.batchLength: # Ensure enough data for a full sequence batch
            print(f"Buffer has {len(dreamer.buffer)} samples, less than required {dreamer.config.batchSize * dreamer.config.batchLength}. Collecting more...")
            dreamer.environmentInteraction(env, 1, seed=(config.seed + dreamer.totalEpisodes if config.seed else None)) # Collect one more episode

            if len(dreamer.buffer) < dreamer.config.batchSize * dreamer.config.batchLength:
                continue # Skip training step if still not enough data

        sampledData = dreamer.buffer.sample(dreamer.config.batchSize, dreamer.config.batchLength)
        initialStates, worldModelMetrics = dreamer.worldModelTraining(sampledData)
        behaviorMetrics = dreamer.behaviorTraining(initialStates)
        dreamer.totalGradientSteps += 1

        # Checkpoint saving and evaluation
        if dreamer.totalGradientSteps % config.checkpointInterval == 0 and config.saveCheckpoints:
            suffix = f"{dreamer.totalGradientSteps // 1000:.0f}k"
            currentCheckpointPath = f"{checkpointFilenameBase}_{suffix}.pth"
            dreamer.saveCheckpoint(currentCheckpointPath)

            # Perform evaluation
            video_path_suffix = f"{runName}_{suffix}"
            evaluationScore = dreamer.environmentInteraction(
                envEvaluation,
                config.numEvaluationEpisodes,
                seed=(config.seed + dreamer.totalEpisodes if config.seed else None), # Use a different seed for eval if desired
                evaluation=True,
                saveVideo=True,
                filename=os.path.join(config.folderNames.videosFolder, video_path_suffix) # Pass full path
            )
            print(f"Iter {iter_idx+1}/{iterationsNum} | Grad Steps {dreamer.totalGradientSteps} | Saved Checkpoint: {currentCheckpointPath} | Eval Score: {evaluationScore if evaluationScore is not None else 'N/A':>8.2f}")
        
        # print(f"Replay Iteration {iter_idx+1}/{iterationsNum} | Grad Steps {dreamer.totalGradientSteps} | World Model Loss: {worldModelMetrics['loss']:.4f} | Behavior Loss: {behaviorMetrics['loss']:.4f}")

    # Collect more environment interactions
    mostRecentScore = dreamer.environmentInteraction(env, config.numInteractionEpisodes, seed=(config.seed + dreamer.totalEpisodes if config.seed else None))

    # Save metrics
    if config.saveMetrics:
        metricsBase = {
            "envSteps": dreamer.totalEnvSteps,
            "gradientSteps": dreamer.totalGradientSteps,
            "totalReward": mostRecentScore if mostRecentScore is not None else float('nan')
        }
        # Merge all metrics dictionaries
        all_metrics = {**metricsBase, **worldModelMetrics, **behaviorMetrics}
        saveLossesToCSV(metricsFilename, all_metrics)

        # Plot metrics (can be slow, consider doing it less frequently if performance is an issue)
        if dreamer.totalGradientSteps % (config.checkpointInterval * 5) == 0: # Plot less frequently
              plotMetrics(f"{metricsFilename}", savePath=f"{plotFilename}", title=f"{config.environmentName} {config.robotName} - {config.runName}")

print("Training finished.")

# Final plot
if config.saveMetrics:
    plotMetrics(f"{metricsFilename}", savePath=f"{plotFilename}", title=f"{config.environmentName} {config.robotName} - {config.runName} (Final)")
env.close()
envEvaluation.close()



Starting training for 100000 gradient steps, with 10000 outer iterations.


Training Progress:   5%|▍         | 499/10000 [41:31<13:10:38,  4.99s/it]


AttributeError: 'NoneType' object has no attribute 'shape'

## Git Management - Colab use only

In [None]:
# Sync with colab
!git config --global user.email "aman.chulawala@gmail.com"
!git add .
!git commit -m "Colab Push -> Added Image Wrapper for robosuite"
!git push https://<PAT>@github.com/FarStryke21/NaturalDreamer.git

In [15]:
# Video generation test
envEvaluation.reset()
frame = envEvaluation.render()

In [16]:
frame.shape

(64, 64, 3)

In [17]:
frame

array([[[247, 246, 241],
        [247, 247, 242],
        [247, 247, 242],
        ...,
        [252, 251, 247],
        [252, 251, 247],
        [252, 251, 247]],

       [[247, 246, 241],
        [247, 246, 242],
        [247, 247, 242],
        ...,
        [252, 251, 247],
        [252, 251, 247],
        [252, 251, 247]],

       [[247, 246, 241],
        [247, 246, 242],
        [247, 247, 242],
        ...,
        [252, 251, 247],
        [252, 251, 247],
        [252, 251, 247]],

       ...,

       [[115, 113, 110],
        [116, 113, 110],
        [116, 114, 110],
        ...,
        [115, 113, 110],
        [116, 113, 110],
        [116, 114, 110]],

       [[116, 113, 110],
        [116, 114, 110],
        [116, 114, 111],
        ...,
        [116, 113, 110],
        [116, 114, 110],
        [116, 114, 110]],

       [[116, 113, 110],
        [116, 114, 111],
        [116, 114, 111],
        ...,
        [116, 113, 110],
        [115, 113, 110],
        [116, 114, 111]]

In [18]:
dreamer.environmentInteraction(envEvaluation, 
                                1, 
                                seed=config.seed, 
                                evaluation=True,
                                saveVideo=True,
                                filename=os.path.join(config.folderNames.videosFolder, "VIDEO_TEST"))

np.float64(0.6235392166750375)