# PPO
---

### 1. Import the Necessary Packages

In [1]:
%matplotlib inline
import wandb

from model.ppo_2 import PPO
import numpy as np
from env.wrappers import LunarContinuous, LunarLanderWithUnknownWind,LunarLanderWithKnownWind
from utils.logger import WandbSummaryWritter

### 2. Instantiate the Model

Setup the hyperparameters in the code cell below.

In [2]:
misc_hyperparameters = {
    'save_freq': 0 ,  
    'val_freq': 10,
    'val_iter': 10,
    'env': LunarLanderWithKnownWind
}

Initialise wandb session in the code cell below.

In [None]:
MAX_TOTAL_TIMESTEPS_TO_TRAIN = 500
VAL_ITER = 30
MAX_RUN_COUNT = 30
sweep_config = {
    'method': 'bayes', 
    'metric': {
        'name': 'validation_rewards',
        'goal': 'maximize'
    },
    'parameters': {
        'actor_lr': {
            "distribution": "uniform",
            "min": 1e-5,
            "max": 0.1
        },
        'critic_lr': {
            "distribution": "uniform",
            "min": 1e-5,
            "max": 0.1
        },
        'adp_lr': {
            "distribution": "uniform",
            "min": 1e-5,
            "max": 0.1
        },
        'gamma': {
            'min': 0.9,
            'max': 1.
        },
        'lam': {
            'min': 0.9,
            'max': 1.
        },
        'max_grad_norm': {
            "min": 0.1,
            "max": 10.0, 
        },
        'n_updates_per_iteration': {
            'values': list(range(1, 21))
        },
        'num_envs': {
            'values': list(range(1, 100))
        },
        'anneal_lr': {
            'values': [True, False]
        },
        'num_steps': {
            'distribution': 'q_uniform',
            'min': 300,
            'max': 4000,
            'q': 100
        },
        'adp_num_steps': {
            'distribution': 'q_uniform',
            'min': 200,
            'max': 1000,
            'q': 10
        },
        'anneal_discount': {
            'distribution': 'q_uniform',
            'min': 1,
            'max': 1000,
            'q': 10
        },
        'batches': {
            'distribution': 'q_uniform',
            "min": 1,     # 2^0
            "max": 1024,  # 2^10
            "q": 2 
        }
    }
}

In [4]:
def train_model(config = None):
    logger = WandbSummaryWritter(project='lunar', config =config)
    ppo = ppo = PPO(logger,**misc_hyperparameters) if config is None else PPO(summary_writter=logger, **config, **misc_hyperparameters)
    ppo.train()

    val_rews, val_dur = ppo.validate(VAL_ITER, True, True)

    wandb.log({
        "validation_rewards": val_rews,
        "validation_duration": val_dur,
        "maximum_validation_reward": np.argmax(val_rews),
        "max_reward_video": wandb.Video(f"./videos/rl-video{np.argmax(val_rews)}-episode-{np.argmax(val_rews)}.mp4", fps=4, format="mp4")
    })

    # logger["delta_t"] = str(round(logger['delta_t'], 2))

Initialise the model fo the desired timestamps. Alternatively can specify a checkpoint to continue training.

In [5]:
sweep_id = wandb.sweep(sweep_config, project="lunar")
wandb.agent(sweep_id, function=train_model, count=MAX_RUN_COUNT)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: q44u0ys7
Sweep URL: https://wandb.ai/pmsaraiva2712-tum/lunar/sweeps/q44u0ys7


[34m[1mwandb[0m: Agent Starting Run: xd92oei9 with config:
[34m[1mwandb[0m: 	actor_lr: 0.07041309019796743
[34m[1mwandb[0m: 	adp_lr: 0.0496149131147736
[34m[1mwandb[0m: 	adp_num_steps: 260
[34m[1mwandb[0m: 	anneal_discount: 280
[34m[1mwandb[0m: 	anneal_lr: False
[34m[1mwandb[0m: 	batches: 70
[34m[1mwandb[0m: 	critic_lr: 0.03513488285322935
[34m[1mwandb[0m: 	gamma: 0.986623171458458
[34m[1mwandb[0m: 	lam: 0.9022184049586168
[34m[1mwandb[0m: 	max_grad_norm: 3.4527110463690436
[34m[1mwandb[0m: 	n_updates_per_iteration: 3
[34m[1mwandb[0m: 	num_envs: 94
[34m[1mwandb[0m: 	num_steps: 3100
[34m[1mwandb[0m: Currently logged in as: [33mmohamedrostom[0m ([33mpmsaraiva2712-tum[0m). Use [1m`wandb login --relogin`[0m to force relogin



-------------------- Iteration #1 --------------------
Average Episodic Return: -325.08
Average Actor Loss: -0.21142
Average Critic Loss: 1562.754366830679
Average KL Divergence: 0.012544951658212365
Iteration took: 3.01 secs, of which rollout took 2.47 secs and gradient updates took 0.53 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #2 --------------------
Average Episodic Return: -231.57
Average Actor Loss: -0.21272
Average Critic Loss: 1070.3282539661113
Average KL Divergence: 0.010671996380831562
Iteration took: 2.98 secs, of which rollout took 2.44 secs and gradient updates took 0.53 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #1 --------------------
Average adp Loss: 29.38668
Iteration took: 2.48 secs, of which rollout took 2.23 secs and gradi

  logger.warn(


MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4



                                                                       

MoviePy - Done !




MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4





MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4


0,1
actor_learning_rate,▁▁
adp_learning_rate,▁
average_actor_loss,█▁
average_adapt_loss,▁
average_critic_loss,█▁
average_episode_rewards,▁█
critic_learning_rate,▁▁
maximum_validation_reward,▁
simulated_iterations,▁█▁
validation_duration,▁

0,1
actor_learning_rate,0.005
adp_learning_rate,0.005
average_actor_loss,-0.21272
average_adapt_loss,29.38668
average_critic_loss,1070.32825
average_episode_rewards,-231.57248
critic_learning_rate,0.0075
iteration_compute,2.48
maximum_validation_reward,0.0
simulated_iterations,1.0


[34m[1mwandb[0m: Agent Starting Run: 7vsaeqdu with config:
[34m[1mwandb[0m: 	actor_lr: 0.015696755940711883
[34m[1mwandb[0m: 	adp_lr: 0.019674556068926857
[34m[1mwandb[0m: 	adp_num_steps: 730
[34m[1mwandb[0m: 	anneal_discount: 260
[34m[1mwandb[0m: 	anneal_lr: True
[34m[1mwandb[0m: 	batches: 884
[34m[1mwandb[0m: 	critic_lr: 0.06335645172114146
[34m[1mwandb[0m: 	gamma: 0.9143985497652444
[34m[1mwandb[0m: 	lam: 0.9631858123930576
[34m[1mwandb[0m: 	max_grad_norm: 3.295048308248568
[34m[1mwandb[0m: 	n_updates_per_iteration: 12
[34m[1mwandb[0m: 	num_envs: 32
[34m[1mwandb[0m: 	num_steps: 1300



-------------------- Iteration #1 --------------------
Average Episodic Return: -200.84
Average Actor Loss: -0.20636
Average Critic Loss: 1216.9585731506347
Average KL Divergence: 0.01254590061188747
Iteration took: 3.06 secs, of which rollout took 2.5 secs and gradient updates took 0.56 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #2 --------------------
Average Episodic Return: -211.34
Average Actor Loss: -0.21148
Average Critic Loss: 838.9398065420297
Average KL Divergence: 0.010680421232329814
Iteration took: 3.05 secs, of which rollout took 2.47 secs and gradient updates took 0.57 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #1 --------------------
Average adp Loss: 25.67567
Iteration took: 3.17 secs, of which rollout took 2.91 secs and gradien

  logger.warn(


MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4





MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4


0,1
actor_learning_rate,▁▁
adp_learning_rate,▁
average_actor_loss,█▁
average_adapt_loss,▁
average_critic_loss,█▁
average_episode_rewards,█▁
critic_learning_rate,▁▁
maximum_validation_reward,▁
simulated_iterations,▁█▁
validation_duration,▁

0,1
actor_learning_rate,0.005
adp_learning_rate,0.005
average_actor_loss,-0.21148
average_adapt_loss,25.67567
average_critic_loss,838.93981
average_episode_rewards,-211.3405
critic_learning_rate,0.0075
iteration_compute,3.17
maximum_validation_reward,0.0
simulated_iterations,1.0


[34m[1mwandb[0m: Agent Starting Run: noi0l1ny with config:
[34m[1mwandb[0m: 	actor_lr: 0.020620860360601415
[34m[1mwandb[0m: 	adp_lr: 0.03751154163613607
[34m[1mwandb[0m: 	adp_num_steps: 470
[34m[1mwandb[0m: 	anneal_discount: 140
[34m[1mwandb[0m: 	anneal_lr: False
[34m[1mwandb[0m: 	batches: 268
[34m[1mwandb[0m: 	critic_lr: 0.0779457820396566
[34m[1mwandb[0m: 	gamma: 0.9445757929097592
[34m[1mwandb[0m: 	lam: 0.9746994330879308
[34m[1mwandb[0m: 	max_grad_norm: 7.072838382419765
[34m[1mwandb[0m: 	n_updates_per_iteration: 19
[34m[1mwandb[0m: 	num_envs: 27
[34m[1mwandb[0m: 	num_steps: 1800



-------------------- Iteration #1 --------------------
Average Episodic Return: -308.93
Average Actor Loss: -0.21156
Average Critic Loss: 1609.3566147437461
Average KL Divergence: 0.010778687480174434
Iteration took: 3.14 secs, of which rollout took 2.56 secs and gradient updates took 0.58 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #2 --------------------
Average Episodic Return: -158.13
Average Actor Loss: -0.2142
Average Critic Loss: 1179.60073192303
Average KL Divergence: 0.009272936225883873
Iteration took: 3.13 secs, of which rollout took 2.55 secs and gradient updates took 0.57 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #1 --------------------
Average adp Loss: 28.15251
Iteration took: 2.46 secs, of which rollout took 2.2 secs and gradient

  logger.warn(


MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4



                                                                       

MoviePy - Done !




MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-1.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-1.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-1.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4





MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4


0,1
actor_learning_rate,▁▁
adp_learning_rate,▁
average_actor_loss,█▁
average_adapt_loss,▁
average_critic_loss,█▁
average_episode_rewards,▁█
critic_learning_rate,▁▁
maximum_validation_reward,▁
simulated_iterations,▁█▁
validation_duration,▁

0,1
actor_learning_rate,0.005
adp_learning_rate,0.005
average_actor_loss,-0.2142
average_adapt_loss,28.15251
average_critic_loss,1179.60073
average_episode_rewards,-158.13014
critic_learning_rate,0.0075
iteration_compute,2.46
maximum_validation_reward,0.0
simulated_iterations,1.0


[34m[1mwandb[0m: Agent Starting Run: cj89uwxo with config:
[34m[1mwandb[0m: 	actor_lr: 0.024606345234675257
[34m[1mwandb[0m: 	adp_lr: 0.025172456409093196
[34m[1mwandb[0m: 	adp_num_steps: 820
[34m[1mwandb[0m: 	anneal_discount: 180
[34m[1mwandb[0m: 	anneal_lr: False
[34m[1mwandb[0m: 	batches: 788
[34m[1mwandb[0m: 	critic_lr: 0.05257378764330456
[34m[1mwandb[0m: 	gamma: 0.9374913764547572
[34m[1mwandb[0m: 	lam: 0.9656347807649304
[34m[1mwandb[0m: 	max_grad_norm: 7.848039241143208
[34m[1mwandb[0m: 	n_updates_per_iteration: 19
[34m[1mwandb[0m: 	num_envs: 30
[34m[1mwandb[0m: 	num_steps: 1100



-------------------- Iteration #1 --------------------
Average Episodic Return: -313.75
Average Actor Loss: -0.21144
Average Critic Loss: 2039.8928665161134
Average KL Divergence: 0.00936701982595528
Iteration took: 3.2 secs, of which rollout took 2.61 secs and gradient updates took 0.59 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #2 --------------------
Average Episodic Return: -203.73
Average Actor Loss: -0.21258
Average Critic Loss: 1499.259181858943
Average KL Divergence: 0.008870134143412893
Iteration took: 3.19 secs, of which rollout took 2.61 secs and gradient updates took 0.57 secs
Current actor learning rate: 0.005
Current critic learning rate: 0.0075
------------------------------------------------------


-------------------- Iteration #1 --------------------
Average adp Loss: 27.28223
Iteration took: 2.51 secs, of which rollout took 2.24 secs and gradien

  logger.warn(


MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video14-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video10-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video15-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video18-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video8-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video4-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video1-episode-0.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video9-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video5-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video2-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video11-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video3-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video17-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video16-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video6-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video7-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video19-episode-0.mp4
MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video13-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-1.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-1.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video12-episode-1.mp4




MoviePy - Building video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4.
MoviePy - Writing video /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4





MoviePy - Done !
MoviePy - video ready /home/mohamedrostom62/ADLR/tum-adlr-ws25-16/videos/rl-video0-episode-0.mp4


0,1
actor_learning_rate,▁▁
adp_learning_rate,▁
average_actor_loss,█▁
average_adapt_loss,▁
average_critic_loss,█▁
average_episode_rewards,▁█
critic_learning_rate,▁▁
maximum_validation_reward,▁
simulated_iterations,▁█▁
validation_duration,▁

0,1
actor_learning_rate,0.005
adp_learning_rate,0.005
average_actor_loss,-0.21258
average_adapt_loss,27.28223
average_critic_loss,1499.25918
average_episode_rewards,-203.72722
critic_learning_rate,0.0075
iteration_compute,2.51
maximum_validation_reward,0.0
simulated_iterations,1.0


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
api = wandb.Api()

# Fetch the sweep object
sweep = api.sweep(f"pmsaraiva2712-tum/lunar/{sweep_id}")

# Fetch all runs from the sweep
runs = sweep.runs

# Sort runs by the metric you are optimizing for, e.g., 'val_loss'
best_run = sorted(runs, key=lambda run: run.summary.get('val_rewards', float('-inf')), reverse=True)[0]

# Extract best hyperparameters and metrics
best_params = best_run.config
best_metrics = best_run.summary

# Print the best hyperparameters and metrics
print("Best Hyperparameters:", best_params)
print("Best Metrics:", best_metrics)

Best Hyperparameters: {'lam': 0.9821062547914114, 'gamma': 0.913815261726064, 'adp_lr': 0.08626140546523067, 'batches': 516, 'actor_lr': 0.034561042911770035, 'num_envs': 59, 'anneal_lr': True, 'critic_lr': 0.002892075552063189, 'num_steps': 2000, 'adp_num_steps': 910, 'max_grad_norm': 1.084814579512531, 'anneal_discount': 50, 'n_updates_per_iteration': 20}
Best Metrics: {'_runtime': 2934.173429823, '_step': 303, '_timestamp': 1734325500.5551257, '_wandb': {'runtime': 2934}, 'actor_learning_rate': 0.005, 'adp_learning_rate': 0.005, 'average_actor_loss': -0.21090720078392097, 'average_adapt_loss': 4.623626652312884, 'average_critic_loss': 64.72636713956297, 'average_episode_rewards': 186.6697540283203, 'critic_learning_rate': 0.0075, 'iteration_compute': '12.23', 'simulated_iterations': 84, 'val_durs': 901, 'val_rews': 155.49781812688917}
