# PPO
---

### 1. Import the Necessary Packages

In [1]:
from model.ppo_2 import PPO
from env.wrappers import LunarContinuous, LunarLanderWithUnknownWind,LunarLanderWithKnownWind
from utils.logger import WandbSummaryWritter

### 2. Instantiate the Model

Setup the hyperparameters in the code cell below.

In [2]:
# hyperparameters = {
#     'timesteps_per_batch': 1024 ,                # Number of timesteps to run per batch
#     'max_timesteps_per_episode': 1200,           # Max number of timesteps per episode
#     'n_updates_per_iteration': 5,                # Number of times to update actor/critic per iteration
#     'lr': 2.5e-4 ,                                # Learning rate of actor optimizer
#     'gamma': 0.95,                               # Discount factor to be applied when calculating Rewards-To-Go
# }
# hyperparameters = {'gamma': 0.999, 'lr_gamma': 0.995,
#                    'max_timesteps_per_episode': 1200,'lr': 0.005 }

hyperparameters = {}

misc_hyperparameters = {
    'env': LunarLanderWithUnknownWind
}

Initialise wandb session in the code cell below.

In [3]:
LOG = False
if LOG:
    logger = WandbSummaryWritter(project='lunar', config =hyperparameters)
else:
    logger=None

Initialise the model fo the desired timestamps. Alternatively can specify a checkpoint to continue training.

In [4]:
checkpoint = 'ppo_parallel_checkpoints/charmed-armadillo-108/ppo_policy_960.pth'
LOAD_MODEL = False

ppo = PPO(logger, **hyperparameters, **misc_hyperparameters)

if LOAD_MODEL:
    ppo.restore_savestate(checkpoint)

### 3. Train the Model

Train model for specified amount of timestamps.

In [5]:
ppo.train()


-------------------- Iteration #1 --------------------
Average Episodic Return: -364.59
Average Loss: 0.00021
Average KL Divergence: 0.005484048372062926
Iteration took: 10.8 secs
Current learning rate: 0.005
------------------------------------------------------



KeyboardInterrupt: 

### 4. Evaluate the Model

Run multiple episodes from pretrained model.

In [7]:
ppo.validate(max_iter=30, should_record=True, env_class=misc_hyperparameters['env'])


frame_index:   1%|▏         | 1/67 [00:30<33:41, 30.64s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video27-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video27-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:30<33:54, 30.83s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video27-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:31<34:35, 31.45s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video16-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video16-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:31<34:50, 31.68s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video16-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:32<35:17, 32.08s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video18-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video18-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:32<35:37, 32.39s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video18-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video9-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video9-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:32<35:54, 32.65s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video9-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video19-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video19-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:32<36:06, 32.83s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video19-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:33<36:20, 33.04s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video6-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video6-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:33<36:39, 33.33s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video6-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video22-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video22-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:33<36:56, 33.58s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video22-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video5-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video5-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:33<37:15, 33.87s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video5-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video12-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video12-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:34<37:38, 34.22s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video12-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video21-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video21-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:34<37:56, 34.50s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video21-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video28-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video28-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:34<38:11, 34.72s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video28-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video0-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video0-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:34<38:28, 34.98s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video0-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video3-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video3-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:35<38:52, 35.34s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video3-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video7-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video7-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:35<39:09, 35.60s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video7-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video8-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video8-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:35<39:28, 35.89s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video8-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video20-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video20-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:36<39:44, 36.12s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video20-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video24-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video24-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:36<39:58, 36.34s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video24-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:37<41:05, 37.36s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video23-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video23-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:37<41:31, 37.75s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video23-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video15-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video15-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:38<41:55, 38.11s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video15-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video10-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video10-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:38<42:16, 38.43s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video10-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video26-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video26-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:38<42:36, 38.73s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video26-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video11-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video11-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:39<43:03, 39.14s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video11-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video17-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video17-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:39<43:18, 39.37s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video17-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:39<43:39, 39.68s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video25-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video25-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:39<43:53, 39.91s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video25-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:40<44:35, 40.54s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video2-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video2-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:40<45:02, 40.94s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video2-episode-0.mp4
MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video29-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video29-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:41<45:21, 41.24s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video29-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:41<46:02, 41.86s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video13-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video13-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:42<46:22, 42.16s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video13-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:42<46:37, 42.39s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video1-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video1-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:42<46:55, 42.66s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video1-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:43<47:18, 43.01s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video14-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video14-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:43<47:38, 43.31s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video14-episode-0.mp4


frame_index:   1%|▏         | 1/67 [00:45<49:32, 45.05s/it, now=None]

MoviePy - Building video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video4-episode-0.mp4.
MoviePy - Writing video c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video4-episode-0.mp4



frame_index:   1%|▏         | 1/67 [00:45<49:52, 45.34s/it, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\pmsar\git\tum-adlr-ws25-16\videos\rl-video4-episode-0.mp4


([array([-221.44944172, -135.39961676, -175.87723678, -378.77812298,
          -75.77614309, -388.19137414, -209.81069693, -297.11376603,
         -151.2663757 , -388.03445019, -264.72227337, -406.11185865,
         -404.19043895, -368.43475355, -260.63443951, -408.15472698,
         -258.78628716, -367.89357428, -302.89456596,  -94.96354499,
         -229.29389602, -265.03150667, -147.9215722 , -204.17760474,
         -413.78087055, -386.24669132,  -98.97410665, -183.5513746 ,
         -182.30058888, -112.11257978])],
 [array([59, 84, 78, 59, 98, 57, 56, 60, 60, 54, 69, 70, 57, 82, 86, 68, 50,
         71, 53, 54, 60, 58, 56, 67, 60, 73, 69, 45, 58, 78])])