# Environment Setup

In [1]:
import yaml
import gymnasium as gym
import numpy as np 
from types import SimpleNamespace as SN
from pathlib import Path
import copy
import utils.common_utils as cu
from algos.ddpg_agent import DDPGAgent
from algos.ppo_agent import PPOAgent
from utils.recorder import RecordVideo
from algos.ddpg_extension import DDPGExtension
from algos.ppo_extension import PPOExtension

In [2]:

# Function to test a trained policy
def test(agent, env_name, algo_name):
    # Load model
    agent.load_model()
    print("Testing...")
    total_test_reward, total_test_len = 0, 0
    returns = []
    
    cur_dir=Path().cwd()
    cfg_path= cur_dir/'cfg'
    # read configuration parameters:
    cfg={'cfg_path': cfg_path, 'algo_name': algo_name}
    env_cfg=yaml.safe_load(open(cfg_path /'envs'/f'{env_name}_env.yaml', 'r'))
    
    # prepare folders to store results
    work_dir = cur_dir/'results'/env_cfg["env_name"]/algo_name
    video_test_dir=work_dir/"video"/"test"
    
    for ep in range(agent.cfg.test_episodes):
        frames = []
        seed = np.random.randint(low=1, high=1000)
        observation, _ = agent.env.reset(seed=seed)
        test_reward, test_len, done = 0, 0, False
        
        while not done and test_len < agent.cfg.max_episode_steps:
            action, _ = agent.get_action(observation, evaluation=True)
            observation, reward, done, truncated, info = agent.env.step(action.flatten())
            fs = agent.env.render()
            frames = frames+fs
            test_reward += reward
            test_len += 1
        total_test_reward += test_reward
        total_test_len += test_len
        returns.append(test_reward)
        
        if ep%100==0:
            cu.save_rgb_arrays_to_gif(frames, video_test_dir/('_seed_'+str(agent.seed)+'_ep_'+str(ep)+'.gif'))

    print(f"Average test reward over {len(returns)} episodes: {float(total_test_reward/agent.cfg.test_episodes)},+- {np.std(np.array(returns))}; \
        Average episode length: {total_test_len/agent.cfg.test_episodes}")


In [3]:
# Setup: read the configurations and generate the environment.
def setup(algo=None, env='easy', cfg_args={}, render=True, train_episodes=None):
    # set the paths
    cur_dir=Path().cwd()
    cfg_path= cur_dir/'cfg'
    
    # read configuration parameters:
    cfg={'cfg_path': cfg_path, 'algo_name': algo}
    env_cfg=yaml.safe_load(open(cfg_path /'envs'/f'{env}_env.yaml', 'r'))
    algo_cfg=yaml.safe_load(open(cfg_path /'algo'/f'{algo}.yaml', 'r'))
    cfg.update(env_cfg)
    cfg.update(algo_cfg)
    cfg.update(cfg_args)
    
    # forcely change train_episodes
    if train_episodes is None:
        True
    else:
        cfg["train_episodes"] = train_episodes
    
    # prepare folders to store results
    work_dir = cur_dir/'results'/cfg["env_name"]/str(algo)
    model_dir=work_dir/"model"
    logging_dir=work_dir/"logging"
    video_train_dir=work_dir/"video"/"train"
    video_test_dir=work_dir/"video"/"test"
    for dir in [work_dir, model_dir, logging_dir, video_train_dir, video_test_dir]:
        cu.make_dir(dir)
        
    cfg.update({'work_dir':work_dir, "model_dir":model_dir, "logging_dir": logging_dir, "video_train_dir": video_train_dir, "video_test_dir": video_test_dir})
    cfg = SN(**cfg)
    
    # set seed
    if cfg.seed == None:
        seed = np.random.randint(low=1, high=1000)
    else:
        seed = cfg.seed
    
    ## Create environment
    env=cu.create_env(cfg_path /'envs'/f'{env}_env.yaml')

   
    if cfg.save_video:
        # During testing, save every episode
        if cfg.testing:
            ep_trigger = 1
            video_path = cfg.video_test_dir
        # During training, save every 50th episode
        else:
            ep_trigger = 1000   # Save video every 50 episodes
            video_path = cfg.video_train_dir
        
        if render:
            env = RecordVideo(
                env, video_path,
                episode_trigger=lambda x: x % ep_trigger == 0,
                name_prefix=cfg.exp_name)


    eval_env=copy.deepcopy(env)
    env.reset(seed=seed) # we only set the seed here. During training, we don't have to set the seed when performing reset().
    eval_env.reset(seed=seed+1000)
    eval_env=None # For simplicity, we don't evaluate the performance during training.
        
    # Get dimensionalities of actions and observations
    action_space_dim = cu.get_space_dim(env.action_space)
    observation_space_dim = cu.get_space_dim(env.observation_space)
    
    config={
        "args": cfg,
        "env":env,
        "eval_env":eval_env,
        "action_space_dim": action_space_dim,
        "observation_space_dim": observation_space_dim,
        "seed":seed
    }
    return config


# DDPG

## Middle

### Training

In [4]:
# Choose either PPO or DDPG
implemented_algo ='ddpg' #'ppo' or 'ddpg'

# Loop over the three difficulty levels
# for environment in ['easy', 'middle', 'difficult']:
environment = 'middle'
training_seeds = []

# Train the algorithm with a specific random seed.
# In total, we train the algorithm with three random seeds [0, 1, 2].
for i in range(3):
    config=setup(algo=implemented_algo, env=environment)

    config["seed"] = i
    training_seeds.append(i)

    if config["args"].algo_name == 'ppo':
        agent=PPOAgent(config)
    elif config["args"].algo_name == 'ddpg':
        agent=DDPGAgent(config)
    else:
        raise Exception('Please use ppo or ddpg!')

    # Train the agent using selected algorithm    
    agent.train()


  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 1.0
Episode 100 Step 2020 finished. Average episode return: -0.05
Episode 200 Step 4020 finished. Average episode return: 0.06
Episode 300 Step 6020 finished. Average episode return: 0.06
Episode 400 Step 8020 finished. Average episode return: 0.06
Episode 500 Step 10020 finished. Average episode return: -0.11
Episode 600 Step 12020 finished. Average episode return: 0.08
Episode 700 Step 14020 finished. Average episode return: 0.22
Episode 800 Step 16020 finished. Average episode return: 0.22
Episode 900 Step 18020 finished. Average episode return: 0.24
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.33
Episode 1100 Step 22020 finished. Average episode return: 0.28
Episode 1200 Step 24020 finished. Average episode return: 0.5
Episode 1300 Step 26020 finished. Average episode return: 0.48
Episode 1400 Step 28020 finished. Average episode return: 0.53
Episode 1500 Step 30020 finished. Average episode return: 0.63
Episode 1600 Step 32020 finished. Average episode return: 0.56
Episode 1700 Step 34020 finished. Average episode return: 0.63
Episode 1800 Step 36020 finished. Average episode return: 0.58
Episode 1900 Step 38020 finished. Average episode return: 0.88
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.63
Episode 2100 Step 42020 finished. Average episode return: 0.72
Episode 2200 Step 44020 finished. Average episode return: 0.85
Episode 2300 Step 46020 finished. Average episode return: 0.9
Episode 2400 Step 48020 finished. Average episode return: 0.89
Episode 2500 Step 50020 finished. Average episode return: 0.89
Episode 2600 Step 52020 finished. Average episode return: 0.78
Episode 2700 Step 54020 finished. Average episode return: 0.81
Episode 2800 Step 56020 finished. Average episode return: 0.79
Episode 2900 Step 58020 finished. Average episode return: 0.88
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.85
Episode 3100 Step 62020 finished. Average episode return: 1.03
Episode 3200 Step 64020 finished. Average episode return: 0.87
Episode 3300 Step 66020 finished. Average episode return: 0.93
Episode 3400 Step 68020 finished. Average episode return: 0.71
Episode 3500 Step 70020 finished. Average episode return: 0.79
Episode 3600 Step 72020 finished. Average episode return: 0.82
Episode 3700 Step 74020 finished. Average episode return: 0.85
Episode 3800 Step 76020 finished. Average episode return: 0.9
Episode 3900 Step 78020 finished. Average episode return: 0.83
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.89
Episode 4100 Step 82020 finished. Average episode return: 0.91
Episode 4200 Step 84020 finished. Average episode return: 0.92
Episode 4300 Step 86020 finished. Average episode return: 0.8
Episode 4400 Step 88020 finished. Average episode return: 0.91
Episode 4500 Step 90020 finished. Average episode return: 0.77
Episode 4600 Step 92020 finished. Average episode return: 0.92
Episode 4700 Step 94020 finished. Average episode return: 0.95
Episode 4800 Step 96020 finished. Average episode return: 0.9
Episode 4900 Step 98020 finished. Average episode return: 0.86
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/G

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.76
Episode 5100 Step 102020 finished. Average episode return: 0.93
Episode 5200 Step 104020 finished. Average episode return: 0.91
Episode 5300 Step 106020 finished. Average episode return: 0.9
Episode 5400 Step 108020 finished. Average episode return: 0.88
Episode 5500 Step 110020 finished. Average episode return: 0.96
Episode 5600 Step 112020 finished. Average episode return: 0.69
Episode 5700 Step 114020 finished. Average episode return: 0.95
Episode 5800 Step 116020 finished. Average episode return: 0.75
Episode 5900 Step 118020 finished. Average episode return: 0.91
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.95
Episode 6100 Step 122020 finished. Average episode return: 0.96
Episode 6200 Step 124020 finished. Average episode return: 0.96
Episode 6300 Step 126020 finished. Average episode return: 0.93
Episode 6400 Step 128020 finished. Average episode return: 0.96
Episode 6500 Step 130020 finished. Average episode return: 0.88
Episode 6600 Step 132020 finished. Average episode return: 1.06
Episode 6700 Step 134020 finished. Average episode return: 0.86
Episode 6800 Step 136020 finished. Average episode return: 0.75
Episode 6900 Step 138020 finished. Average episode return: 0.93
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.87
Episode 7100 Step 142020 finished. Average episode return: 1.0
Episode 7200 Step 144020 finished. Average episode return: 1.02
Episode 7300 Step 146020 finished. Average episode return: 0.96
Episode 7400 Step 148020 finished. Average episode return: 0.93
Episode 7500 Step 150020 finished. Average episode return: 0.89
Episode 7600 Step 152020 finished. Average episode return: 0.88
Episode 7700 Step 154020 finished. Average episode return: 0.94
Episode 7800 Step 156020 finished. Average episode return: 0.83
Episode 7900 Step 158020 finished. Average episode return: 0.98
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 0.94
Episode 8100 Step 162020 finished. Average episode return: 1.0
Episode 8200 Step 164020 finished. Average episode return: 0.87
Episode 8300 Step 166020 finished. Average episode return: 0.87
Episode 8400 Step 168020 finished. Average episode return: 0.84
Episode 8500 Step 170020 finished. Average episode return: 0.84
Episode 8600 Step 172020 finished. Average episode return: 0.96
Episode 8700 Step 174020 finished. Average episode return: 0.77
Episode 8800 Step 176020 finished. Average episode return: 0.96
Episode 8900 Step 178020 finished. Average episode return: 0.99
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.92
Episode 9100 Step 182020 finished. Average episode return: 0.89
Episode 9200 Step 184020 finished. Average episode return: 0.78
Episode 9300 Step 186020 finished. Average episode return: 0.88
Episode 9400 Step 188020 finished. Average episode return: 1.06
Episode 9500 Step 190020 finished. Average episode return: 0.82
Episode 9600 Step 192020 finished. Average episode return: 0.99
Episode 9700 Step 194020 finished. Average episode return: 0.77
Episode 9800 Step 196020 finished. Average episode return: 0.93
Episode 9900 Step 198020 finished. Average episode return: 0.92
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 1.0
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/model/model_parameters_0.pt ...
logger and seed 0
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/logging/logs_0.csv
------ Training Finished ------
Total traning time is 10.740906685416606mins


  logger.warn(


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4




Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 0.0
Episode 100 Step 2020 finished. Average episode return: 0.05
Episode 200 Step 4020 finished. Average episode return: 0.01
Episode 300 Step 6020 finished. Average episode return: -0.16
Episode 400 Step 8020 finished. Average episode return: 0.1
Episode 500 Step 10020 finished. Average episode return: -0.05
Episode 600 Step 12020 finished. Average episode return: 0.32
Episode 700 Step 14020 finished. Average episode return: 0.52
Episode 800 Step 16020 finished. Average episode return: 0.41
Episode 900 Step 18020 finished. Average episode return: 0.48
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.53
Episode 1100 Step 22020 finished. Average episode return: 0.52
Episode 1200 Step 24020 finished. Average episode return: 0.67
Episode 1300 Step 26020 finished. Average episode return: 0.75
Episode 1400 Step 28020 finished. Average episode return: 0.67
Episode 1500 Step 30020 finished. Average episode return: 0.74
Episode 1600 Step 32020 finished. Average episode return: 0.65
Episode 1700 Step 34020 finished. Average episode return: 0.79
Episode 1800 Step 36020 finished. Average episode return: 0.67
Episode 1900 Step 38020 finished. Average episode return: 0.86
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.66
Episode 2100 Step 42020 finished. Average episode return: 0.72
Episode 2200 Step 44020 finished. Average episode return: 0.82
Episode 2300 Step 46020 finished. Average episode return: 0.71
Episode 2400 Step 48020 finished. Average episode return: 0.96
Episode 2500 Step 50020 finished. Average episode return: 0.76
Episode 2600 Step 52020 finished. Average episode return: 0.71
Episode 2700 Step 54020 finished. Average episode return: 0.68
Episode 2800 Step 56020 finished. Average episode return: 0.77
Episode 2900 Step 58020 finished. Average episode return: 0.75


In [None]:
## Code block for training and testing an agent using the implemented algorithm
## in the three different Tasks with different difficulty levels
import warnings
warnings.filterwarnings('ignore')

# NOTE: Uncomment the algorithm you implemented
implemented_algo ='ddpg' #'ddpg' or 'ppo'

environment = 'middle'
training_seeds = []

# for each algorithm, we will test the agent trained with specific random seed
for i in range(3):
    config=setup(algo=implemented_algo, env=environment, render=False)

    config["seed"] = i
    training_seeds.append(i)

    if config["args"].algo_name == 'ppo':
        agent=PPOAgent(config)
    elif config["args"].algo_name == 'ddpg':
        agent=DDPGAgent(config)
    else:
        raise Exception('Please use ppo or ddpg!')
    
    print('\n\n\nnow start testing for environment',environment,' agent:',implemented_algo,' seed:',i)
    # Test the agent in the selected environment
    test(agent, environment, implemented_algo)




now start testing for environment middle  agent: ddpg  seed: 0
model loaded: /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/model/model_parameters_0.pt
Testing...
Saved GIF to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/video/test/_seed_0_ep_0.gif
Average test reward over 10 episodes: 1.3,+- 0.6403124237432849;         Average episode length: 20.0



now start testing for environment middle  agent: ddpg  seed: 1
model loaded: /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg/model/model_parameters_1.pt
Testing...
Saved GIF to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Projec

# DDPG Extension

## Middle

### TD3

#### Training

In [4]:
# Implement your improved algorithm either in algo/ddpg_extension.py or algo/ppo_extension.py
# from algos.ddpg_extension import DDPGExtension
from algos.ddpg_extension_QR import DDPGExtension
# from algos.ddpg_extension_RS import DDPGExtension
# from algos.ddpg_extension_PER import DDPGExtension
from algos.ppo_extension import PPOExtension
import torch

implemented_algo = 'ddpg_extension'# choose 'ppo_extension' or 'ddpg_extension'
environment = 'middle'

training_seeds = []
for i in range(3):
    config=setup(algo=implemented_algo, env=environment)

    config["seed"] = i
    training_seeds.append(i)

    if config["args"].algo_name == 'ppo_extension':
        agent=PPOExtension(config)
    elif config["args"].algo_name == 'ddpg_extension':
        agent=DDPGExtension(config)
    else:
        raise Exception('Please use ppo or ddpg!')

    # Train the agent using selected algorithm    
    agent.train()


  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 1.0
Episode 100 Step 2020 finished. Average episode return: 0.0
Episode 200 Step 4020 finished. Average episode return: 0.05
Episode 300 Step 6020 finished. Average episode return: -0.06
Episode 400 Step 8020 finished. Average episode return: -0.05
Episode 500 Step 10020 finished. Average episode return: 0.14
Episode 600 Step 12020 finished. Average episode return: 0.44
Episode 700 Step 14020 finished. Average episode return: 0.48
Episode 800 Step 16020 finished. Average episode return: 0.52
Episode 900 Step 18020 finished. Average episode return: 0.43
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Rein

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.64
Episode 1100 Step 22020 finished. Average episode return: 0.63
Episode 1200 Step 24020 finished. Average episode return: 0.76
Episode 1300 Step 26020 finished. Average episode return: 0.62
Episode 1400 Step 28020 finished. Average episode return: 0.65
Episode 1500 Step 30020 finished. Average episode return: 0.54
Episode 1600 Step 32020 finished. Average episode return: 0.55
Episode 1700 Step 34020 finished. Average episode return: 0.66
Episode 1800 Step 36020 finished. Average episode return: 0.67
Episode 1900 Step 38020 finished. Average episode return: 0.78
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.65
Episode 2100 Step 42020 finished. Average episode return: 0.71
Episode 2200 Step 44020 finished. Average episode return: 0.72
Episode 2300 Step 46020 finished. Average episode return: 0.61
Episode 2400 Step 48020 finished. Average episode return: 0.87
Episode 2500 Step 50020 finished. Average episode return: 0.77
Episode 2600 Step 52020 finished. Average episode return: 0.76
Episode 2700 Step 54020 finished. Average episode return: 0.87
Episode 2800 Step 56020 finished. Average episode return: 0.76
Episode 2900 Step 58020 finished. Average episode return: 0.89
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.92
Episode 3100 Step 62020 finished. Average episode return: 1.08
Episode 3200 Step 64020 finished. Average episode return: 0.82
Episode 3300 Step 66020 finished. Average episode return: 1.01
Episode 3400 Step 68020 finished. Average episode return: 0.94
Episode 3500 Step 70020 finished. Average episode return: 0.85
Episode 3600 Step 72020 finished. Average episode return: 0.92
Episode 3700 Step 74020 finished. Average episode return: 0.84
Episode 3800 Step 76020 finished. Average episode return: 0.88
Episode 3900 Step 78020 finished. Average episode return: 1.02
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.86
Episode 4100 Step 82020 finished. Average episode return: 0.97
Episode 4200 Step 84020 finished. Average episode return: 0.86
Episode 4300 Step 86020 finished. Average episode return: 0.93
Episode 4400 Step 88020 finished. Average episode return: 0.97
Episode 4500 Step 90020 finished. Average episode return: 0.82
Episode 4600 Step 92020 finished. Average episode return: 0.92
Episode 4700 Step 94020 finished. Average episode return: 0.94
Episode 4800 Step 96020 finished. Average episode return: 0.9
Episode 4900 Step 98020 finished. Average episode return: 0.88
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.9
Episode 5100 Step 102020 finished. Average episode return: 0.91
Episode 5200 Step 104020 finished. Average episode return: 0.96
Episode 5300 Step 106020 finished. Average episode return: 0.93
Episode 5400 Step 108020 finished. Average episode return: 0.91
Episode 5500 Step 110020 finished. Average episode return: 0.84
Episode 5600 Step 112020 finished. Average episode return: 0.82
Episode 5700 Step 114020 finished. Average episode return: 0.95
Episode 5800 Step 116020 finished. Average episode return: 0.84
Episode 5900 Step 118020 finished. Average episode return: 0.9
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.82
Episode 6100 Step 122020 finished. Average episode return: 0.91
Episode 6200 Step 124020 finished. Average episode return: 0.8
Episode 6300 Step 126020 finished. Average episode return: 0.94
Episode 6400 Step 128020 finished. Average episode return: 0.93
Episode 6500 Step 130020 finished. Average episode return: 1.0
Episode 6600 Step 132020 finished. Average episode return: 1.0
Episode 6700 Step 134020 finished. Average episode return: 0.86
Episode 6800 Step 136020 finished. Average episode return: 0.86
Episode 6900 Step 138020 finished. Average episode return: 0.84
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcemen

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.91
Episode 7100 Step 142020 finished. Average episode return: 0.99
Episode 7200 Step 144020 finished. Average episode return: 0.91
Episode 7300 Step 146020 finished. Average episode return: 0.98
Episode 7400 Step 148020 finished. Average episode return: 0.96
Episode 7500 Step 150020 finished. Average episode return: 0.91
Episode 7600 Step 152020 finished. Average episode return: 0.73
Episode 7700 Step 154020 finished. Average episode return: 0.83
Episode 7800 Step 156020 finished. Average episode return: 0.91
Episode 7900 Step 158020 finished. Average episode return: 0.96
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 0.99
Episode 8100 Step 162020 finished. Average episode return: 0.88
Episode 8200 Step 164020 finished. Average episode return: 0.76
Episode 8300 Step 166020 finished. Average episode return: 0.88
Episode 8400 Step 168020 finished. Average episode return: 0.83
Episode 8500 Step 170020 finished. Average episode return: 0.9
Episode 8600 Step 172020 finished. Average episode return: 1.0
Episode 8700 Step 174020 finished. Average episode return: 0.83
Episode 8800 Step 176020 finished. Average episode return: 0.99
Episode 8900 Step 178020 finished. Average episode return: 0.92
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.84
Episode 9100 Step 182020 finished. Average episode return: 0.84
Episode 9200 Step 184020 finished. Average episode return: 0.9
Episode 9300 Step 186020 finished. Average episode return: 0.92
Episode 9400 Step 188020 finished. Average episode return: 1.0
Episode 9500 Step 190020 finished. Average episode return: 0.76
Episode 9600 Step 192020 finished. Average episode return: 0.93
Episode 9700 Step 194020 finished. Average episode return: 0.89
Episode 9800 Step 196020 finished. Average episode return: 0.89
Episode 9900 Step 198020 finished. Average episode return: 0.93
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 0.99
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_0.pt ...
logger and seed 0
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/logging/logs_0.csv
------ Training Finished ------
Total traning time is 7.105271961816664mins


  logger.warn(f"Overriding environment {new_spec.id} already in registry.")
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4




Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 0.0
Episode 100 Step 2020 finished. Average episode return: -0.04
Episode 200 Step 4020 finished. Average episode return: 0.02
Episode 300 Step 6020 finished. Average episode return: -0.05
Episode 400 Step 8020 finished. Average episode return: -0.05
Episode 500 Step 10020 finished. Average episode return: -0.18
Episode 600 Step 12020 finished. Average episode return: 0.17
Episode 700 Step 14020 finished. Average episode return: 0.22
Episode 800 Step 16020 finished. Average episode return: 0.18
Episode 900 Step 18020 finished. Average episode return: 0.0
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Re

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.17
Episode 1100 Step 22020 finished. Average episode return: 0.54
Episode 1200 Step 24020 finished. Average episode return: 0.43
Episode 1300 Step 26020 finished. Average episode return: 0.53
Episode 1400 Step 28020 finished. Average episode return: 0.5
Episode 1500 Step 30020 finished. Average episode return: 0.35
Episode 1600 Step 32020 finished. Average episode return: 0.53
Episode 1700 Step 34020 finished. Average episode return: 0.67
Episode 1800 Step 36020 finished. Average episode return: 0.48
Episode 1900 Step 38020 finished. Average episode return: 0.77
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.69
Episode 2100 Step 42020 finished. Average episode return: 0.7
Episode 2200 Step 44020 finished. Average episode return: 0.78
Episode 2300 Step 46020 finished. Average episode return: 0.86
Episode 2400 Step 48020 finished. Average episode return: 0.85
Episode 2500 Step 50020 finished. Average episode return: 0.68
Episode 2600 Step 52020 finished. Average episode return: 0.79
Episode 2700 Step 54020 finished. Average episode return: 0.81
Episode 2800 Step 56020 finished. Average episode return: 0.88
Episode 2900 Step 58020 finished. Average episode return: 0.91
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.88
Episode 3100 Step 62020 finished. Average episode return: 1.05
Episode 3200 Step 64020 finished. Average episode return: 0.89
Episode 3300 Step 66020 finished. Average episode return: 0.95
Episode 3400 Step 68020 finished. Average episode return: 0.83
Episode 3500 Step 70020 finished. Average episode return: 0.79
Episode 3600 Step 72020 finished. Average episode return: 0.88
Episode 3700 Step 74020 finished. Average episode return: 0.8
Episode 3800 Step 76020 finished. Average episode return: 0.84
Episode 3900 Step 78020 finished. Average episode return: 0.91
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.9
Episode 4100 Step 82020 finished. Average episode return: 0.91
Episode 4200 Step 84020 finished. Average episode return: 0.85
Episode 4300 Step 86020 finished. Average episode return: 0.93
Episode 4400 Step 88020 finished. Average episode return: 1.04
Episode 4500 Step 90020 finished. Average episode return: 0.95
Episode 4600 Step 92020 finished. Average episode return: 0.96
Episode 4700 Step 94020 finished. Average episode return: 1.06
Episode 4800 Step 96020 finished. Average episode return: 0.9
Episode 4900 Step 98020 finished. Average episode return: 0.92
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.8
Episode 5100 Step 102020 finished. Average episode return: 0.94
Episode 5200 Step 104020 finished. Average episode return: 0.96
Episode 5300 Step 106020 finished. Average episode return: 0.92
Episode 5400 Step 108020 finished. Average episode return: 0.97
Episode 5500 Step 110020 finished. Average episode return: 0.96
Episode 5600 Step 112020 finished. Average episode return: 0.83
Episode 5700 Step 114020 finished. Average episode return: 0.86
Episode 5800 Step 116020 finished. Average episode return: 0.8
Episode 5900 Step 118020 finished. Average episode return: 1.04
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.87
Episode 6100 Step 122020 finished. Average episode return: 0.96
Episode 6200 Step 124020 finished. Average episode return: 0.93
Episode 6300 Step 126020 finished. Average episode return: 1.02
Episode 6400 Step 128020 finished. Average episode return: 0.94
Episode 6500 Step 130020 finished. Average episode return: 0.94
Episode 6600 Step 132020 finished. Average episode return: 0.97
Episode 6700 Step 134020 finished. Average episode return: 0.9
Episode 6800 Step 136020 finished. Average episode return: 0.86
Episode 6900 Step 138020 finished. Average episode return: 0.86
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.83
Episode 7100 Step 142020 finished. Average episode return: 0.98
Episode 7200 Step 144020 finished. Average episode return: 0.9
Episode 7300 Step 146020 finished. Average episode return: 1.0
Episode 7400 Step 148020 finished. Average episode return: 0.89
Episode 7500 Step 150020 finished. Average episode return: 1.03
Episode 7600 Step 152020 finished. Average episode return: 0.85
Episode 7700 Step 154020 finished. Average episode return: 0.98
Episode 7800 Step 156020 finished. Average episode return: 0.89
Episode 7900 Step 158020 finished. Average episode return: 1.07
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 1.04
Episode 8100 Step 162020 finished. Average episode return: 0.93
Episode 8200 Step 164020 finished. Average episode return: 0.86
Episode 8300 Step 166020 finished. Average episode return: 0.87
Episode 8400 Step 168020 finished. Average episode return: 0.87
Episode 8500 Step 170020 finished. Average episode return: 0.94
Episode 8600 Step 172020 finished. Average episode return: 0.99
Episode 8700 Step 174020 finished. Average episode return: 0.85
Episode 8800 Step 176020 finished. Average episode return: 0.97
Episode 8900 Step 178020 finished. Average episode return: 1.01
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.91
Episode 9100 Step 182020 finished. Average episode return: 0.96
Episode 9200 Step 184020 finished. Average episode return: 0.91
Episode 9300 Step 186020 finished. Average episode return: 0.98
Episode 9400 Step 188020 finished. Average episode return: 1.04
Episode 9500 Step 190020 finished. Average episode return: 0.82
Episode 9600 Step 192020 finished. Average episode return: 1.0
Episode 9700 Step 194020 finished. Average episode return: 0.89
Episode 9800 Step 196020 finished. Average episode return: 0.84
Episode 9900 Step 198020 finished. Average episode return: 1.06
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 0.95
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_1.pt ...
logger and seed 1
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/logging/logs_1.csv
------ Training Finished ------
Total traning time is 8.73227087083333mins
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Projec

                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 1.0
Episode 100 Step 2020 finished. Average episode return: 0.03
Episode 200 Step 4020 finished. Average episode return: 0.02
Episode 300 Step 6020 finished. Average episode return: -0.18
Episode 400 Step 8020 finished. Average episode return: -0.15
Episode 500 Step 10020 finished. Average episode return: -0.07
Episode 600 Step 12020 finished. Average episode return: 0.03
Episode 700 Step 14020 finished. Average episode return: 0.14
Episode 800 Step 16020 finished. Average episode return: 0.21
Episode 900 Step 18020 finished. Average episode return: 0.34
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Re

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.48
Episode 1100 Step 22020 finished. Average episode return: 0.53
Episode 1200 Step 24020 finished. Average episode return: 0.63
Episode 1300 Step 26020 finished. Average episode return: 0.61
Episode 1400 Step 28020 finished. Average episode return: 0.61
Episode 1500 Step 30020 finished. Average episode return: 0.7
Episode 1600 Step 32020 finished. Average episode return: 0.57
Episode 1700 Step 34020 finished. Average episode return: 0.56
Episode 1800 Step 36020 finished. Average episode return: 0.7
Episode 1900 Step 38020 finished. Average episode return: 0.73
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.74
Episode 2100 Step 42020 finished. Average episode return: 0.76
Episode 2200 Step 44020 finished. Average episode return: 0.84
Episode 2300 Step 46020 finished. Average episode return: 0.82
Episode 2400 Step 48020 finished. Average episode return: 1.06
Episode 2500 Step 50020 finished. Average episode return: 0.87
Episode 2600 Step 52020 finished. Average episode return: 0.74
Episode 2700 Step 54020 finished. Average episode return: 0.9
Episode 2800 Step 56020 finished. Average episode return: 0.71
Episode 2900 Step 58020 finished. Average episode return: 0.94
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.84
Episode 3100 Step 62020 finished. Average episode return: 1.03
Episode 3200 Step 64020 finished. Average episode return: 0.83
Episode 3300 Step 66020 finished. Average episode return: 0.95
Episode 3400 Step 68020 finished. Average episode return: 0.81
Episode 3500 Step 70020 finished. Average episode return: 0.72
Episode 3600 Step 72020 finished. Average episode return: 0.83
Episode 3700 Step 74020 finished. Average episode return: 0.8
Episode 3800 Step 76020 finished. Average episode return: 0.86
Episode 3900 Step 78020 finished. Average episode return: 0.9
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.75
Episode 4100 Step 82020 finished. Average episode return: 0.85
Episode 4200 Step 84020 finished. Average episode return: 0.86
Episode 4300 Step 86020 finished. Average episode return: 0.82
Episode 4400 Step 88020 finished. Average episode return: 1.06
Episode 4500 Step 90020 finished. Average episode return: 0.91
Episode 4600 Step 92020 finished. Average episode return: 0.98
Episode 4700 Step 94020 finished. Average episode return: 1.01
Episode 4800 Step 96020 finished. Average episode return: 0.95
Episode 4900 Step 98020 finished. Average episode return: 0.93
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.81
Episode 5100 Step 102020 finished. Average episode return: 0.94
Episode 5200 Step 104020 finished. Average episode return: 1.04
Episode 5300 Step 106020 finished. Average episode return: 0.91
Episode 5400 Step 108020 finished. Average episode return: 0.87
Episode 5500 Step 110020 finished. Average episode return: 0.9
Episode 5600 Step 112020 finished. Average episode return: 0.9
Episode 5700 Step 114020 finished. Average episode return: 0.87
Episode 5800 Step 116020 finished. Average episode return: 0.79
Episode 5900 Step 118020 finished. Average episode return: 0.96
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.98
Episode 6100 Step 122020 finished. Average episode return: 0.98
Episode 6200 Step 124020 finished. Average episode return: 0.91
Episode 6300 Step 126020 finished. Average episode return: 1.01
Episode 6400 Step 128020 finished. Average episode return: 0.95
Episode 6500 Step 130020 finished. Average episode return: 0.96
Episode 6600 Step 132020 finished. Average episode return: 0.98
Episode 6700 Step 134020 finished. Average episode return: 0.94
Episode 6800 Step 136020 finished. Average episode return: 0.78
Episode 6900 Step 138020 finished. Average episode return: 0.79
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.85
Episode 7100 Step 142020 finished. Average episode return: 0.92
Episode 7200 Step 144020 finished. Average episode return: 0.97
Episode 7300 Step 146020 finished. Average episode return: 0.9
Episode 7400 Step 148020 finished. Average episode return: 0.95
Episode 7500 Step 150020 finished. Average episode return: 1.04
Episode 7600 Step 152020 finished. Average episode return: 0.9
Episode 7700 Step 154020 finished. Average episode return: 0.88
Episode 7800 Step 156020 finished. Average episode return: 0.78
Episode 7900 Step 158020 finished. Average episode return: 0.76
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 1.06
Episode 8100 Step 162020 finished. Average episode return: 0.9
Episode 8200 Step 164020 finished. Average episode return: 0.8
Episode 8300 Step 166020 finished. Average episode return: 0.76
Episode 8400 Step 168020 finished. Average episode return: 0.81
Episode 8500 Step 170020 finished. Average episode return: 0.8
Episode 8600 Step 172020 finished. Average episode return: 0.92
Episode 8700 Step 174020 finished. Average episode return: 0.92
Episode 8800 Step 176020 finished. Average episode return: 0.88
Episode 8900 Step 178020 finished. Average episode return: 0.85
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcemen

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.92
Episode 9100 Step 182020 finished. Average episode return: 1.02
Episode 9200 Step 184020 finished. Average episode return: 0.96
Episode 9300 Step 186020 finished. Average episode return: 1.03
Episode 9400 Step 188020 finished. Average episode return: 1.07
Episode 9500 Step 190020 finished. Average episode return: 0.81
Episode 9600 Step 192020 finished. Average episode return: 0.93
Episode 9700 Step 194020 finished. Average episode return: 0.93
Episode 9800 Step 196020 finished. Average episode return: 0.92
Episode 9900 Step 198020 finished. Average episode return: 1.06
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 1.01
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_2.pt ...
logger and seed 2
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/logging/logs_2.csv
------ Training Finished ------
Total traning time is 8.583087159716667mins


#### Testing

In [5]:
from algos.ddpg_extension import DDPGExtension
# from algos.ddpg_extension_RS import DDPGExtension
from algos.ppo_extension import PPOExtension

implemented_algo = 'ddpg_extension'# choose 'ppo_extension' or 'ddpg_extension'
environment = 'middle'
training_seeds = []
for i in range(3):
    config=setup(algo=implemented_algo, env=environment, render=False)

    config["seed"] = i
    training_seeds.append(i)


    if config["args"].algo_name == 'ppo_extension':
        agent=PPOExtension(config)
    elif config["args"].algo_name == 'ddpg_extension':
        agent=DDPGExtension(config)
    else:
        raise Exception('Please use ppo or ddpg!')

    # Test the agent in the selected environment
    test(agent, environment, implemented_algo)

model loaded: /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_0.pt
Testing...
Saved GIF to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/test/_seed_0_ep_0.gif
Average test reward over 10 episodes: 1.3,+- 0.6403124237432849;         Average episode length: 20.0
model loaded: /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_1.pt
Testing...
Saved GIF to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/test/_seed_1_ep_

### QR

#### Training

In [4]:
# Implement your improved algorithm either in algo/ddpg_extension.py or algo/ppo_extension.py
from algos.ddpg_extension_QR import DDPGExtension
from algos.ppo_extension import PPOExtension
import torch

implemented_algo = 'ddpg_extension'# choose 'ppo_extension' or 'ddpg_extension'
environment = 'middle'

training_seeds = []
for i in range(3):
    config=setup(algo=implemented_algo, env=environment)

    config["seed"] = i
    training_seeds.append(i)

    if config["args"].algo_name == 'ppo_extension':
        agent=PPOExtension(config)
    elif config["args"].algo_name == 'ddpg_extension':
        agent=DDPGExtension(config)
    else:
        raise Exception('Please use ppo or ddpg!')

    # Train the agent using selected algorithm    
    agent.train()


  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 1.0
Episode 100 Step 2020 finished. Average episode return: -0.01
Episode 200 Step 4020 finished. Average episode return: 0.12
Episode 300 Step 6020 finished. Average episode return: -0.11
Episode 400 Step 8020 finished. Average episode return: 0.03
Episode 500 Step 10020 finished. Average episode return: -0.07
Episode 600 Step 12020 finished. Average episode return: 0.3
Episode 700 Step 14020 finished. Average episode return: 0.42
Episode 800 Step 16020 finished. Average episode return: 0.46
Episode 900 Step 18020 finished. Average episode return: 0.36
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Rei

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.43
Episode 1100 Step 22020 finished. Average episode return: 0.46
Episode 1200 Step 24020 finished. Average episode return: 0.38
Episode 1300 Step 26020 finished. Average episode return: 0.53
Episode 1400 Step 28020 finished. Average episode return: 0.58
Episode 1500 Step 30020 finished. Average episode return: 0.27
Episode 1600 Step 32020 finished. Average episode return: 0.58
Episode 1700 Step 34020 finished. Average episode return: 0.38
Episode 1800 Step 36020 finished. Average episode return: 0.45
Episode 1900 Step 38020 finished. Average episode return: 0.55
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.42
Episode 2100 Step 42020 finished. Average episode return: 0.31
Episode 2200 Step 44020 finished. Average episode return: 0.33
Episode 2300 Step 46020 finished. Average episode return: 0.44
Episode 2400 Step 48020 finished. Average episode return: 0.6
Episode 2500 Step 50020 finished. Average episode return: 0.34
Episode 2600 Step 52020 finished. Average episode return: 0.48
Episode 2700 Step 54020 finished. Average episode return: 0.44
Episode 2800 Step 56020 finished. Average episode return: 0.38
Episode 2900 Step 58020 finished. Average episode return: 0.32
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.5
Episode 3100 Step 62020 finished. Average episode return: 0.56
Episode 3200 Step 64020 finished. Average episode return: 0.34
Episode 3300 Step 66020 finished. Average episode return: 0.41
Episode 3400 Step 68020 finished. Average episode return: 0.47
Episode 3500 Step 70020 finished. Average episode return: 0.43
Episode 3600 Step 72020 finished. Average episode return: 0.51
Episode 3700 Step 74020 finished. Average episode return: 0.34
Episode 3800 Step 76020 finished. Average episode return: 0.53
Episode 3900 Step 78020 finished. Average episode return: 0.54
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.23
Episode 4100 Step 82020 finished. Average episode return: 0.5
Episode 4200 Step 84020 finished. Average episode return: 0.52
Episode 4300 Step 86020 finished. Average episode return: 0.56
Episode 4400 Step 88020 finished. Average episode return: 0.58
Episode 4500 Step 90020 finished. Average episode return: 0.61
Episode 4600 Step 92020 finished. Average episode return: 0.54
Episode 4700 Step 94020 finished. Average episode return: 0.51
Episode 4800 Step 96020 finished. Average episode return: 0.52
Episode 4900 Step 98020 finished. Average episode return: 0.46
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.51
Episode 5100 Step 102020 finished. Average episode return: 0.55
Episode 5200 Step 104020 finished. Average episode return: 0.64
Episode 5300 Step 106020 finished. Average episode return: 0.45
Episode 5400 Step 108020 finished. Average episode return: 0.45
Episode 5500 Step 110020 finished. Average episode return: 0.51
Episode 5600 Step 112020 finished. Average episode return: 0.4
Episode 5700 Step 114020 finished. Average episode return: 0.43
Episode 5800 Step 116020 finished. Average episode return: 0.42
Episode 5900 Step 118020 finished. Average episode return: 0.64
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.4
Episode 6100 Step 122020 finished. Average episode return: 0.58
Episode 6200 Step 124020 finished. Average episode return: 0.48
Episode 6300 Step 126020 finished. Average episode return: 0.45
Episode 6400 Step 128020 finished. Average episode return: 0.51
Episode 6500 Step 130020 finished. Average episode return: 0.49
Episode 6600 Step 132020 finished. Average episode return: 0.56
Episode 6700 Step 134020 finished. Average episode return: 0.43
Episode 6800 Step 136020 finished. Average episode return: 0.42
Episode 6900 Step 138020 finished. Average episode return: 0.6
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.43
Episode 7100 Step 142020 finished. Average episode return: 0.59
Episode 7200 Step 144020 finished. Average episode return: 0.65
Episode 7300 Step 146020 finished. Average episode return: 0.61
Episode 7400 Step 148020 finished. Average episode return: 0.6
Episode 7500 Step 150020 finished. Average episode return: 0.52
Episode 7600 Step 152020 finished. Average episode return: 0.46
Episode 7700 Step 154020 finished. Average episode return: 0.55
Episode 7800 Step 156020 finished. Average episode return: 0.47
Episode 7900 Step 158020 finished. Average episode return: 0.65
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 0.63
Episode 8100 Step 162020 finished. Average episode return: 0.54
Episode 8200 Step 164020 finished. Average episode return: 0.53
Episode 8300 Step 166020 finished. Average episode return: 0.4
Episode 8400 Step 168020 finished. Average episode return: 0.39
Episode 8500 Step 170020 finished. Average episode return: 0.56
Episode 8600 Step 172020 finished. Average episode return: 0.5
Episode 8700 Step 174020 finished. Average episode return: 0.45
Episode 8800 Step 176020 finished. Average episode return: 0.55
Episode 8900 Step 178020 finished. Average episode return: 0.64
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforceme

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.52
Episode 9100 Step 182020 finished. Average episode return: 0.46
Episode 9200 Step 184020 finished. Average episode return: 0.53
Episode 9300 Step 186020 finished. Average episode return: 0.54
Episode 9400 Step 188020 finished. Average episode return: 0.65
Episode 9500 Step 190020 finished. Average episode return: 0.4
Episode 9600 Step 192020 finished. Average episode return: 0.53
Episode 9700 Step 194020 finished. Average episode return: 0.55
Episode 9800 Step 196020 finished. Average episode return: 0.44
Episode 9900 Step 198020 finished. Average episode return: 0.52
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 0.54
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_0.pt ...
logger and seed 0
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/logging/logs_0.csv
------ Training Finished ------
Total traning time is 6.91790126111667mins
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Projec

                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 1.0
Episode 100 Step 2020 finished. Average episode return: 0.09
Episode 200 Step 4020 finished. Average episode return: 0.03
Episode 300 Step 6020 finished. Average episode return: 0.0
Episode 400 Step 8020 finished. Average episode return: -0.04
Episode 500 Step 10020 finished. Average episode return: 0.05
Episode 600 Step 12020 finished. Average episode return: -0.03
Episode 700 Step 14020 finished. Average episode return: 0.39
Episode 800 Step 16020 finished. Average episode return: 0.32
Episode 900 Step 18020 finished. Average episode return: 0.3
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinf

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.55
Episode 1100 Step 22020 finished. Average episode return: 0.44
Episode 1200 Step 24020 finished. Average episode return: 0.51
Episode 1300 Step 26020 finished. Average episode return: 0.51
Episode 1400 Step 28020 finished. Average episode return: 0.52
Episode 1500 Step 30020 finished. Average episode return: 0.3
Episode 1600 Step 32020 finished. Average episode return: 0.49
Episode 1700 Step 34020 finished. Average episode return: 0.44
Episode 1800 Step 36020 finished. Average episode return: 0.44
Episode 1900 Step 38020 finished. Average episode return: 0.56
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.48
Episode 2100 Step 42020 finished. Average episode return: 0.47
Episode 2200 Step 44020 finished. Average episode return: 0.45
Episode 2300 Step 46020 finished. Average episode return: 0.53
Episode 2400 Step 48020 finished. Average episode return: 0.66
Episode 2500 Step 50020 finished. Average episode return: 0.42
Episode 2600 Step 52020 finished. Average episode return: 0.44
Episode 2700 Step 54020 finished. Average episode return: 0.39
Episode 2800 Step 56020 finished. Average episode return: 0.37
Episode 2900 Step 58020 finished. Average episode return: 0.43
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.55
Episode 3100 Step 62020 finished. Average episode return: 0.64
Episode 3200 Step 64020 finished. Average episode return: 0.46
Episode 3300 Step 66020 finished. Average episode return: 0.52
Episode 3400 Step 68020 finished. Average episode return: 0.5
Episode 3500 Step 70020 finished. Average episode return: 0.39
Episode 3600 Step 72020 finished. Average episode return: 0.61
Episode 3700 Step 74020 finished. Average episode return: 0.5
Episode 3800 Step 76020 finished. Average episode return: 0.51
Episode 3900 Step 78020 finished. Average episode return: 0.42
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.41
Episode 4100 Step 82020 finished. Average episode return: 0.51
Episode 4200 Step 84020 finished. Average episode return: 0.49
Episode 4300 Step 86020 finished. Average episode return: 0.48
Episode 4400 Step 88020 finished. Average episode return: 0.6
Episode 4500 Step 90020 finished. Average episode return: 0.56
Episode 4600 Step 92020 finished. Average episode return: 0.59
Episode 4700 Step 94020 finished. Average episode return: 0.52
Episode 4800 Step 96020 finished. Average episode return: 0.4
Episode 4900 Step 98020 finished. Average episode return: 0.54
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.48
Episode 5100 Step 102020 finished. Average episode return: 0.46
Episode 5200 Step 104020 finished. Average episode return: 0.62
Episode 5300 Step 106020 finished. Average episode return: 0.54
Episode 5400 Step 108020 finished. Average episode return: 0.43
Episode 5500 Step 110020 finished. Average episode return: 0.41
Episode 5600 Step 112020 finished. Average episode return: 0.49
Episode 5700 Step 114020 finished. Average episode return: 0.45
Episode 5800 Step 116020 finished. Average episode return: 0.43
Episode 5900 Step 118020 finished. Average episode return: 0.38
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.44
Episode 6100 Step 122020 finished. Average episode return: 0.47
Episode 6200 Step 124020 finished. Average episode return: 0.5
Episode 6300 Step 126020 finished. Average episode return: 0.56
Episode 6400 Step 128020 finished. Average episode return: 0.36
Episode 6500 Step 130020 finished. Average episode return: 0.51
Episode 6600 Step 132020 finished. Average episode return: 0.54
Episode 6700 Step 134020 finished. Average episode return: 0.56
Episode 6800 Step 136020 finished. Average episode return: 0.32
Episode 6900 Step 138020 finished. Average episode return: 0.45
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.39
Episode 7100 Step 142020 finished. Average episode return: 0.54
Episode 7200 Step 144020 finished. Average episode return: 0.51
Episode 7300 Step 146020 finished. Average episode return: 0.54
Episode 7400 Step 148020 finished. Average episode return: 0.43
Episode 7500 Step 150020 finished. Average episode return: 0.6
Episode 7600 Step 152020 finished. Average episode return: 0.38
Episode 7700 Step 154020 finished. Average episode return: 0.53
Episode 7800 Step 156020 finished. Average episode return: 0.37
Episode 7900 Step 158020 finished. Average episode return: 0.56
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 0.5
Episode 8100 Step 162020 finished. Average episode return: 0.62
Episode 8200 Step 164020 finished. Average episode return: 0.53
Episode 8300 Step 166020 finished. Average episode return: 0.49
Episode 8400 Step 168020 finished. Average episode return: 0.35
Episode 8500 Step 170020 finished. Average episode return: 0.54
Episode 8600 Step 172020 finished. Average episode return: 0.53
Episode 8700 Step 174020 finished. Average episode return: 0.47
Episode 8800 Step 176020 finished. Average episode return: 0.63
Episode 8900 Step 178020 finished. Average episode return: 0.47
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.59
Episode 9100 Step 182020 finished. Average episode return: 0.59
Episode 9200 Step 184020 finished. Average episode return: 0.51
Episode 9300 Step 186020 finished. Average episode return: 0.51
Episode 9400 Step 188020 finished. Average episode return: 0.62
Episode 9500 Step 190020 finished. Average episode return: 0.35
Episode 9600 Step 192020 finished. Average episode return: 0.53
Episode 9700 Step 194020 finished. Average episode return: 0.61
Episode 9800 Step 196020 finished. Average episode return: 0.56
Episode 9900 Step 198020 finished. Average episode return: 0.6
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 0.64
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_1.pt ...
logger and seed 1
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/logging/logs_1.csv
------ Training Finished ------
Total traning time is 6.783494686100009mins
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Proje

                                                  

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4.
Moviepy - Writing video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-0.mp4
Episode 0 Step 20 finished. Average episode return: 1.0
Episode 100 Step 2020 finished. Average episode return: -0.05
Episode 200 Step 4020 finished. Average episode return: 0.08
Episode 300 Step 6020 finished. Average episode return: -0.19
Episode 400 Step 8020 finished. Average episode return: -0.02
Episode 500 Step 10020 finished. Average episode return: -0.21
Episode 600 Step 12020 finished. Average episode return: 0.32
Episode 700 Step 14020 finished. Average episode return: 0.41
Episode 800 Step 16020 finished. Average episode return: 0.47
Episode 900 Step 18020 finished. Average episode return: 0.39
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/R

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-1000.mp4
Episode 1000 Step 20020 finished. Average episode return: 0.6
Episode 1100 Step 22020 finished. Average episode return: 0.36
Episode 1200 Step 24020 finished. Average episode return: 0.35
Episode 1300 Step 26020 finished. Average episode return: 0.4
Episode 1400 Step 28020 finished. Average episode return: 0.19
Episode 1500 Step 30020 finished. Average episode return: 0.21
Episode 1600 Step 32020 finished. Average episode return: 0.43
Episode 1700 Step 34020 finished. Average episode return: 0.27
Episode 1800 Step 36020 finished. Average episode return: 0.33
Episode 1900 Step 38020 finished. Average episode return: 0.33
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-2000.mp4
Episode 2000 Step 40020 finished. Average episode return: 0.27
Episode 2100 Step 42020 finished. Average episode return: 0.33
Episode 2200 Step 44020 finished. Average episode return: 0.3
Episode 2300 Step 46020 finished. Average episode return: 0.44
Episode 2400 Step 48020 finished. Average episode return: 0.57
Episode 2500 Step 50020 finished. Average episode return: 0.32
Episode 2600 Step 52020 finished. Average episode return: 0.41
Episode 2700 Step 54020 finished. Average episode return: 0.24
Episode 2800 Step 56020 finished. Average episode return: 0.27
Episode 2900 Step 58020 finished. Average episode return: 0.28
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-3000.mp4
Episode 3000 Step 60020 finished. Average episode return: 0.5
Episode 3100 Step 62020 finished. Average episode return: 0.49
Episode 3200 Step 64020 finished. Average episode return: 0.48
Episode 3300 Step 66020 finished. Average episode return: 0.51
Episode 3400 Step 68020 finished. Average episode return: 0.41
Episode 3500 Step 70020 finished. Average episode return: 0.35
Episode 3600 Step 72020 finished. Average episode return: 0.46
Episode 3700 Step 74020 finished. Average episode return: 0.33
Episode 3800 Step 76020 finished. Average episode return: 0.61
Episode 3900 Step 78020 finished. Average episode return: 0.37
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearnin

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-4000.mp4
Episode 4000 Step 80020 finished. Average episode return: 0.47
Episode 4100 Step 82020 finished. Average episode return: 0.52
Episode 4200 Step 84020 finished. Average episode return: 0.45
Episode 4300 Step 86020 finished. Average episode return: 0.52
Episode 4400 Step 88020 finished. Average episode return: 0.48
Episode 4500 Step 90020 finished. Average episode return: 0.35
Episode 4600 Step 92020 finished. Average episode return: 0.49
Episode 4700 Step 94020 finished. Average episode return: 0.57
Episode 4800 Step 96020 finished. Average episode return: 0.39
Episode 4900 Step 98020 finished. Average episode return: 0.39
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearni

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-5000.mp4
Episode 5000 Step 100020 finished. Average episode return: 0.35
Episode 5100 Step 102020 finished. Average episode return: 0.45
Episode 5200 Step 104020 finished. Average episode return: 0.52
Episode 5300 Step 106020 finished. Average episode return: 0.58
Episode 5400 Step 108020 finished. Average episode return: 0.43
Episode 5500 Step 110020 finished. Average episode return: 0.52
Episode 5600 Step 112020 finished. Average episode return: 0.39
Episode 5700 Step 114020 finished. Average episode return: 0.53
Episode 5800 Step 116020 finished. Average episode return: 0.36
Episode 5900 Step 118020 finished. Average episode return: 0.53
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-6000.mp4
Episode 6000 Step 120020 finished. Average episode return: 0.4
Episode 6100 Step 122020 finished. Average episode return: 0.53
Episode 6200 Step 124020 finished. Average episode return: 0.48
Episode 6300 Step 126020 finished. Average episode return: 0.44
Episode 6400 Step 128020 finished. Average episode return: 0.41
Episode 6500 Step 130020 finished. Average episode return: 0.51
Episode 6600 Step 132020 finished. Average episode return: 0.57
Episode 6700 Step 134020 finished. Average episode return: 0.39
Episode 6800 Step 136020 finished. Average episode return: 0.32
Episode 6900 Step 138020 finished. Average episode return: 0.26
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-7000.mp4
Episode 7000 Step 140020 finished. Average episode return: 0.41
Episode 7100 Step 142020 finished. Average episode return: 0.51
Episode 7200 Step 144020 finished. Average episode return: 0.38
Episode 7300 Step 146020 finished. Average episode return: 0.6
Episode 7400 Step 148020 finished. Average episode return: 0.47
Episode 7500 Step 150020 finished. Average episode return: 0.55
Episode 7600 Step 152020 finished. Average episode return: 0.31
Episode 7700 Step 154020 finished. Average episode return: 0.46
Episode 7800 Step 156020 finished. Average episode return: 0.36
Episode 7900 Step 158020 finished. Average episode return: 0.46
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforcem

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-8000.mp4
Episode 8000 Step 160020 finished. Average episode return: 0.44
Episode 8100 Step 162020 finished. Average episode return: 0.55
Episode 8200 Step 164020 finished. Average episode return: 0.34
Episode 8300 Step 166020 finished. Average episode return: 0.35
Episode 8400 Step 168020 finished. Average episode return: 0.21
Episode 8500 Step 170020 finished. Average episode return: 0.42
Episode 8600 Step 172020 finished. Average episode return: 0.51
Episode 8700 Step 174020 finished. Average episode return: 0.48
Episode 8800 Step 176020 finished. Average episode return: 0.48
Episode 8900 Step 178020 finished. Average episode return: 0.39
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-9000.mp4
Episode 9000 Step 180020 finished. Average episode return: 0.43
Episode 9100 Step 182020 finished. Average episode return: 0.48
Episode 9200 Step 184020 finished. Average episode return: 0.52
Episode 9300 Step 186020 finished. Average episode return: 0.45
Episode 9400 Step 188020 finished. Average episode return: 0.59
Episode 9500 Step 190020 finished. Average episode return: 0.45
Episode 9600 Step 192020 finished. Average episode return: 0.57
Episode 9700 Step 194020 finished. Average episode return: 0.51
Episode 9800 Step 196020 finished. Average episode return: 0.47
Episode 9900 Step 198020 finished. Average episode return: 0.53
Moviepy - Building video /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/Reinforce

                                                               

Moviepy - Done !
Moviepy - video ready /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/video/train/project-episode-10000.mp4
Episode 10000 Step 200020 finished. Average episode return: 0.48
Saved model to /Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/model/model_parameters_2.pt ...
logger and seed 2
/Users/lgk1910/Library/CloudStorage/OneDrive-AaltoUniversity/Learning2/ReinforcementLearning/Project/GitHub/Reinforcement-Learning-Project/results/SandingEnvMiddle/ddpg_extension/logging/logs_2.csv
------ Training Finished ------
Total traning time is 6.943362038900007mins
