In [1]:
import gymnasium as gym
import torch

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [3]:
!pip install ipywidgets

[0m

In [4]:
# First, we create our environment called LunarLander-v2
env = gym.make("LunarLander-v2")

# Then we reset this environment
observation, info = env.reset()

for _ in range(20):
  # Take a random action
  action = env.action_space.sample()
  print("Action taken:", action)

  # Do this action in the environment and get
  # next_state, reward, terminated, truncated and info
  observation, reward, terminated, truncated, info = env.step(action)

  # If the game is terminated (in our case we land, crashed) or truncated (timeout)
  if terminated or truncated:
      # Reset the environment
      print("Environment is reset")
      observation, info = env.reset()

env.close()

Action taken: 1
Action taken: 3
Action taken: 1
Action taken: 1
Action taken: 1
Action taken: 2
Action taken: 3
Action taken: 1
Action taken: 2
Action taken: 3
Action taken: 2
Action taken: 1
Action taken: 1
Action taken: 0
Action taken: 2
Action taken: 3
Action taken: 3
Action taken: 0
Action taken: 3
Action taken: 2


In [5]:
# We create our environment with gym.make("<name_of_the_environment>")
env = gym.make("LunarLander-v2")
env.reset()
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample()) # Get a random observation

_____OBSERVATION SPACE_____ 

Observation Space Shape (8,)
Sample observation [31.284071   62.431763    1.5128325  -2.1333637   1.6591058   1.9360056
  0.39987114  0.9810344 ]


In [6]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample()) # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape 4
Action Space Sample 1


In [7]:
# Create the environment
env = make_vec_env('LunarLander-v2', n_envs=16)

In [8]:
import torch

In [9]:
if torch.backends.mps.is_available():
    mps_device = torch.device("gpu")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("GPU device not found.")

GPU device not found.


In [14]:
from sagemaker.session import Session
from sagemaker.experiments.run import Run
import os

# create an experiment and start a new run
experiment_name = "first-experiment"
run_name = "experiment-run"

params = {
    "learning_rate": 0.002,
    "gamma": 0.999,
    "gae_lambda": 0.97,
    "ent_coef": 0.001,
    "vf_coef": 0.51
}


with Run(experiment_name=experiment_name, 
         sagemaker_session=Session(), 
         run_name=run_name) as run:
    run.log_parameters(params)
    
    eval_env = Monitor(gym.make("LunarLander-v2"))
    
    model = PPO(
        policy = 'MlpPolicy',
        env = env,
        n_steps = 1024,
        learning_rate=params["learning_rate"],
        batch_size = 128,
        n_epochs = 2,
        gamma = params["gamma"], # Discount factor
        gae_lambda = params["gae_lambda"], # Factor for trade-off of bias vs variance for Generalized Advantage Estimator
        ent_coef = params["ent_coef"], # Entropy coefficient for the loss calculation
        verbose=1,
        vf_coef=params['vf_coef'],
        device=torch.device("cpu")
    )
        
    model.learn(total_timesteps=1_000_000, log_interval=1)
        
    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=100, deterministic=True)
    
    run.log_metric("mean_reward", mean_reward)
    run.log_metric("std_reward", std_reward)


INFO:sagemaker.experiments.run:The run (experiment-run) under experiment (first-experiment) already exists. Loading it.


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 92.1     |
|    ep_rew_mean     | -183     |
| time/              |          |
|    fps             | 4407     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 16384    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 89.1        |
|    ep_rew_mean          | -135        |
| time/                   |             |
|    fps                  | 3298        |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.007167495 |
|    clip_fraction        | 0.0812      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.000502    |
|    learning

In [21]:
!pip install --no-cache-dir sagemaker

[0m

In [8]:
import sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [10]:
pip install ipywidgets

Collecting ipywidgets
  Obtaining dependency information for ipywidgets from https://files.pythonhosted.org/packages/4a/0e/57ed498fafbc60419a9332d872e929879ceba2d73cb11d284d7112472b3e/ipywidgets-8.1.1-py3-none-any.whl.metadata
  Downloading ipywidgets-8.1.1-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.9 (from ipywidgets)
  Obtaining dependency information for widgetsnbextension~=4.0.9 from https://files.pythonhosted.org/packages/29/03/107d96077c4befed191f7ad1a12c7b52a8f9d2778a5836d59f9855c105f6/widgetsnbextension-4.0.9-py3-none-any.whl.metadata
  Downloading widgetsnbextension-4.0.9-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.9 (from ipywidgets)
  Obtaining dependency information for jupyterlab-widgets~=3.0.9 from https://files.pythonhosted.org/packages/e8/05/0ebab152288693b5ec7b339aab857362947031143b282853b4c2dd4b5b40/jupyterlab_widgets-3.0.9-py3-none-any.whl.metadata
  Downloading jupyterlab_widgets-3.0.9-py3-none-any.whl.metadata (4