# Gyroscope DDPG/TD3/SAC training of baseline with normalized reward with observation = state (spinup library)

In [1]:
import gym
from gym import spaces
from gym.utils import seeding
from gym.envs.registration import register

from custom_functions.custom_functions import env_fn 
from custom_functions.custom_functions import test_agent
from custom_functions.custom_functions import plot_test

import spinup

from os import path
from scipy.integrate import solve_ivp
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline
from vpython import *
from functools import partial

register(id='gyroscopeenv-v0',entry_point='gym_GyroscopeEnv.envs:GyroscopeEnv')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


<IPython.core.display.Javascript object>

## Environment Class and Modules

In [2]:
reward_args = {'k':0.2}
env_fn = partial(env_fn,env_name = 'gyroscopeenv-v0',reward_type = 'Normalized', reward_args = reward_args)

## Training

#### DDPG

In [4]:
# Setup baseline 0
logger_kwargs = dict(output_dir='ddpg_b0_norm', exp_name='ddpg_b0_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 10000
args_b = dict(hidden_sizes=[300,], activation=torch.nn.ReLU)
actnoise_b = 0.1
pilr_b = 0.001
qlr_b = 0.001

# Baseline 0 training
spinup.ddpg_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, act_noise = actnoise_b, pi_lr = pilr_b, q_lr = qlr_b)

[32;1mLogging data to ddpg_b0_norm/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            300
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x7f5839fc6598>",
    "epochs":	100,
    "exp_name":	"ddpg_b0_norm",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f5839f7a6a0>":	{
            "epoch_dict":	{},
            "exp_name":	"ddpg_b0_norm",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"ddpg_b0_norm",
            "output_file":	{
                "<_io.TextIOWrapper name='ddpg_b0_norm/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
                }
            }
        }
    },
    "logger_kwargs":	{
        "exp_name":	"ddpg_b0_norm",
        "output_dir":	"ddpg_b0_norm"
 



---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -180 |
|          StdEpRet |            5.68 |
|          MaxEpRet |            -167 |
|          MinEpRet |            -187 |
|  AverageTestEpRet |            -180 |
|      StdTestEpRet |            8.65 |
|      MaxTestEpRet |            -159 |
|      MinTestEpRet |            -189 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+03 |
|      AverageQVals |           -3.59 |
|          StdQVals |            1.99 |
|          MaxQVals |            3.29 |
|          MinQVals |           -10.7 |
|            LossPi |             3.4 |
|             LossQ |           0.127 |
|              Time |            4.22 |
---------------------------------------
---------------------------------------
|             Epoch |               2 |
|      AverageEpRet |            -182 |
|          StdEpRet |            6.94 |


---------------------------------------
|             Epoch |              11 |
|      AverageEpRet |            -158 |
|          StdEpRet |            15.8 |
|          MaxEpRet |            -128 |
|          MinEpRet |            -184 |
|  AverageTestEpRet |            -169 |
|      StdTestEpRet |            12.1 |
|      MaxTestEpRet |            -139 |
|      MinTestEpRet |            -180 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.81e+04 |
|      AverageQVals |           -71.1 |
|          StdQVals |            5.93 |
|          MaxQVals |           -59.1 |
|          MinQVals |           -93.7 |
|            LossPi |            70.8 |
|             LossQ |           0.297 |
|              Time |            91.8 |
---------------------------------------
---------------------------------------
|             Epoch |              12 |
|      AverageEpRet |            -162 |
|          StdEpRet |            18.9 |


---------------------------------------
|             Epoch |              21 |
|      AverageEpRet |            -131 |
|          StdEpRet |            33.2 |
|          MaxEpRet |           -53.4 |
|          MinEpRet |            -181 |
|  AverageTestEpRet |            -130 |
|      StdTestEpRet |            24.7 |
|      MaxTestEpRet |           -83.9 |
|      MinTestEpRet |            -160 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        3.46e+04 |
|      AverageQVals |           -96.6 |
|          StdQVals |            5.96 |
|          MaxQVals |           -81.3 |
|          MinQVals |            -121 |
|            LossPi |            96.2 |
|             LossQ |           0.338 |
|              Time |             173 |
---------------------------------------
---------------------------------------
|             Epoch |              22 |
|      AverageEpRet |            -114 |
|          StdEpRet |            32.3 |


---------------------------------------
|             Epoch |              31 |
|      AverageEpRet |           -91.3 |
|          StdEpRet |            15.4 |
|          MaxEpRet |           -64.4 |
|          MinEpRet |            -120 |
|  AverageTestEpRet |           -94.8 |
|      StdTestEpRet |            20.1 |
|      MaxTestEpRet |           -56.6 |
|      MinTestEpRet |            -130 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        5.11e+04 |
|      AverageQVals |           -90.7 |
|          StdQVals |            5.89 |
|          MaxQVals |           -78.2 |
|          MinQVals |            -114 |
|            LossPi |            90.1 |
|             LossQ |           0.288 |
|              Time |             281 |
---------------------------------------
---------------------------------------
|             Epoch |              32 |
|      AverageEpRet |           -87.1 |
|          StdEpRet |            18.9 |


---------------------------------------
|             Epoch |              41 |
|      AverageEpRet |           -48.5 |
|          StdEpRet |             9.2 |
|          MaxEpRet |           -30.8 |
|          MinEpRet |           -67.2 |
|  AverageTestEpRet |           -51.7 |
|      StdTestEpRet |              22 |
|      MaxTestEpRet |           -21.5 |
|      MinTestEpRet |           -96.3 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        6.76e+04 |
|      AverageQVals |           -73.2 |
|          StdQVals |            5.77 |
|          MaxQVals |           -61.7 |
|          MinQVals |           -97.3 |
|            LossPi |            72.6 |
|             LossQ |           0.294 |
|              Time |             354 |
---------------------------------------
---------------------------------------
|             Epoch |              42 |
|      AverageEpRet |           -48.2 |
|          StdEpRet |              11 |


---------------------------------------
|             Epoch |              51 |
|      AverageEpRet |           -38.3 |
|          StdEpRet |             5.8 |
|          MaxEpRet |           -26.1 |
|          MinEpRet |           -51.6 |
|  AverageTestEpRet |           -42.1 |
|      StdTestEpRet |            10.6 |
|      MaxTestEpRet |           -29.2 |
|      MinTestEpRet |           -71.1 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        8.41e+04 |
|      AverageQVals |           -54.2 |
|          StdQVals |            5.65 |
|          MaxQVals |           -44.3 |
|          MinQVals |           -79.7 |
|            LossPi |            53.6 |
|             LossQ |           0.271 |
|              Time |             425 |
---------------------------------------
---------------------------------------
|             Epoch |              52 |
|      AverageEpRet |           -31.7 |
|          StdEpRet |            6.03 |


---------------------------------------
|             Epoch |              61 |
|      AverageEpRet |           -37.1 |
|          StdEpRet |            9.21 |
|          MaxEpRet |           -22.2 |
|          MinEpRet |           -52.5 |
|  AverageTestEpRet |             -34 |
|      StdTestEpRet |            7.91 |
|      MaxTestEpRet |             -22 |
|      MinTestEpRet |             -46 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.01e+05 |
|      AverageQVals |           -39.6 |
|          StdQVals |            5.86 |
|          MaxQVals |           -29.5 |
|          MinQVals |           -64.5 |
|            LossPi |            39.1 |
|             LossQ |           0.266 |
|              Time |             521 |
---------------------------------------
---------------------------------------
|             Epoch |              62 |
|      AverageEpRet |           -36.2 |
|          StdEpRet |            7.23 |


---------------------------------------
|             Epoch |              71 |
|      AverageEpRet |             -30 |
|          StdEpRet |            7.93 |
|          MaxEpRet |           -18.4 |
|          MinEpRet |             -44 |
|  AverageTestEpRet |           -31.6 |
|      StdTestEpRet |            10.2 |
|      MaxTestEpRet |           -13.3 |
|      MinTestEpRet |           -44.2 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.17e+05 |
|      AverageQVals |           -31.1 |
|          StdQVals |            6.47 |
|          MaxQVals |             -21 |
|          MinQVals |           -60.7 |
|            LossPi |            30.6 |
|             LossQ |           0.262 |
|              Time |             620 |
---------------------------------------
---------------------------------------
|             Epoch |              72 |
|      AverageEpRet |             -31 |
|          StdEpRet |            6.98 |


---------------------------------------
|             Epoch |              81 |
|      AverageEpRet |           -27.2 |
|          StdEpRet |            6.24 |
|          MaxEpRet |           -16.2 |
|          MinEpRet |           -36.9 |
|  AverageTestEpRet |           -26.5 |
|      StdTestEpRet |            6.06 |
|      MaxTestEpRet |           -17.9 |
|      MinTestEpRet |           -39.6 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.34e+05 |
|      AverageQVals |           -24.5 |
|          StdQVals |            6.52 |
|          MaxQVals |           -14.6 |
|          MinQVals |           -56.9 |
|            LossPi |            24.1 |
|             LossQ |           0.283 |
|              Time |             717 |
---------------------------------------
---------------------------------------
|             Epoch |              82 |
|      AverageEpRet |           -28.7 |
|          StdEpRet |            5.66 |


---------------------------------------
|             Epoch |              91 |
|      AverageEpRet |           -29.5 |
|          StdEpRet |            7.97 |
|          MaxEpRet |           -15.9 |
|          MinEpRet |             -43 |
|  AverageTestEpRet |           -30.8 |
|      StdTestEpRet |            6.48 |
|      MaxTestEpRet |           -20.1 |
|      MinTestEpRet |           -42.1 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.5e+05 |
|      AverageQVals |           -19.8 |
|          StdQVals |            6.56 |
|          MaxQVals |           -9.46 |
|          MinQVals |           -54.5 |
|            LossPi |            19.3 |
|             LossQ |           0.262 |
|              Time |             815 |
---------------------------------------
---------------------------------------
|             Epoch |              92 |
|      AverageEpRet |           -30.8 |
|          StdEpRet |            9.74 |


In [5]:
# Setup baseline 1
logger_kwargs = dict(output_dir='ddpg_b1_norm', exp_name='ddpg_b1_norm')
seed_b = 10
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 20000
args_b = dict(hidden_sizes=[1000,], activation=torch.nn.ReLU)
actnoise_b = 0.1
pilr_b = 0.001
qlr_b = 0.001

# Baseline 1 training
spinup.ddpg_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, act_noise = actnoise_b, pi_lr = pilr_b, q_lr = qlr_b)

[32;1mLogging data to ddpg_b1_norm/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            1000
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x7f5839fc6598>",
    "epochs":	100,
    "exp_name":	"ddpg_b1_norm",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f583a2ef128>":	{
            "epoch_dict":	{},
            "exp_name":	"ddpg_b1_norm",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"ddpg_b1_norm",
            "output_file":	{
                "<_io.TextIOWrapper name='ddpg_b1_norm/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
                }
            }
        }
    },
    "logger_kwargs":	{
        "exp_name":	"ddpg_b1_norm",
        "output_dir":	"ddpg_b1_norm"




---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -181 |
|          StdEpRet |            8.17 |
|          MaxEpRet |            -166 |
|          MinEpRet |            -192 |
|  AverageTestEpRet |            -181 |
|      StdTestEpRet |            3.11 |
|      MaxTestEpRet |            -175 |
|      MinTestEpRet |            -185 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+03 |
|      AverageQVals |            -3.1 |
|          StdQVals |            1.83 |
|          MaxQVals |            2.81 |
|          MinQVals |           -11.1 |
|            LossPi |            3.25 |
|             LossQ |           0.116 |
|              Time |            6.46 |
---------------------------------------
---------------------------------------
|             Epoch |               2 |
|      AverageEpRet |            -179 |
|          StdEpRet |            8.25 |


---------------------------------------
|             Epoch |              11 |
|      AverageEpRet |            -180 |
|          StdEpRet |            5.75 |
|          MaxEpRet |            -169 |
|          MinEpRet |            -188 |
|  AverageTestEpRet |            -160 |
|      StdTestEpRet |            21.6 |
|      MaxTestEpRet |            -119 |
|      MinTestEpRet |            -188 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.81e+04 |
|      AverageQVals |           -80.2 |
|          StdQVals |            3.75 |
|          MaxQVals |           -67.2 |
|          MinQVals |           -92.8 |
|            LossPi |            79.8 |
|             LossQ |           0.467 |
|              Time |             131 |
---------------------------------------
---------------------------------------
|             Epoch |              12 |
|      AverageEpRet |            -181 |
|          StdEpRet |            5.95 |


---------------------------------------
|             Epoch |              21 |
|      AverageEpRet |            -108 |
|          StdEpRet |            26.8 |
|          MaxEpRet |           -55.6 |
|          MinEpRet |            -151 |
|  AverageTestEpRet |            -119 |
|      StdTestEpRet |            39.6 |
|      MaxTestEpRet |           -51.5 |
|      MinTestEpRet |            -170 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        3.46e+04 |
|      AverageQVals |             -94 |
|          StdQVals |            5.11 |
|          MaxQVals |           -79.6 |
|          MinQVals |            -114 |
|            LossPi |            93.5 |
|             LossQ |           0.431 |
|              Time |             257 |
---------------------------------------
---------------------------------------
|             Epoch |              22 |
|      AverageEpRet |            -107 |
|          StdEpRet |            22.3 |


---------------------------------------
|             Epoch |              31 |
|      AverageEpRet |           -72.7 |
|          StdEpRet |            14.6 |
|          MaxEpRet |           -43.7 |
|          MinEpRet |           -96.7 |
|  AverageTestEpRet |           -72.2 |
|      StdTestEpRet |            19.1 |
|      MaxTestEpRet |           -40.2 |
|      MinTestEpRet |           -97.6 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        5.11e+04 |
|      AverageQVals |           -78.7 |
|          StdQVals |            5.82 |
|          MaxQVals |           -64.7 |
|          MinQVals |            -102 |
|            LossPi |            78.1 |
|             LossQ |           0.409 |
|              Time |             374 |
---------------------------------------
---------------------------------------
|             Epoch |              32 |
|      AverageEpRet |           -72.3 |
|          StdEpRet |            21.6 |


---------------------------------------
|             Epoch |              41 |
|      AverageEpRet |           -54.8 |
|          StdEpRet |            12.7 |
|          MaxEpRet |           -38.5 |
|          MinEpRet |           -79.2 |
|  AverageTestEpRet |             -40 |
|      StdTestEpRet |            11.9 |
|      MaxTestEpRet |           -14.9 |
|      MinTestEpRet |           -58.2 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        6.76e+04 |
|      AverageQVals |           -61.4 |
|          StdQVals |            6.13 |
|          MaxQVals |           -47.1 |
|          MinQVals |             -84 |
|            LossPi |            60.8 |
|             LossQ |           0.362 |
|              Time |             498 |
---------------------------------------
---------------------------------------
|             Epoch |              42 |
|      AverageEpRet |           -50.3 |
|          StdEpRet |            14.3 |


---------------------------------------
|             Epoch |              51 |
|      AverageEpRet |           -35.9 |
|          StdEpRet |            7.97 |
|          MaxEpRet |           -22.5 |
|          MinEpRet |           -48.9 |
|  AverageTestEpRet |           -43.8 |
|      StdTestEpRet |            12.9 |
|      MaxTestEpRet |           -27.3 |
|      MinTestEpRet |           -65.6 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        8.41e+04 |
|      AverageQVals |           -46.6 |
|          StdQVals |            6.69 |
|          MaxQVals |           -31.7 |
|          MinQVals |             -73 |
|            LossPi |              46 |
|             LossQ |           0.315 |
|              Time |             639 |
---------------------------------------
---------------------------------------
|             Epoch |              52 |
|      AverageEpRet |           -39.2 |
|          StdEpRet |            8.27 |


---------------------------------------
|             Epoch |              61 |
|      AverageEpRet |           -33.5 |
|          StdEpRet |            14.8 |
|          MaxEpRet |             -15 |
|          MinEpRet |           -82.8 |
|  AverageTestEpRet |           -33.8 |
|      StdTestEpRet |            11.4 |
|      MaxTestEpRet |           -14.4 |
|      MinTestEpRet |           -50.5 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.01e+05 |
|      AverageQVals |           -35.2 |
|          StdQVals |            7.05 |
|          MaxQVals |           -21.4 |
|          MinQVals |           -66.1 |
|            LossPi |            34.7 |
|             LossQ |           0.269 |
|              Time |             780 |
---------------------------------------
---------------------------------------
|             Epoch |              62 |
|      AverageEpRet |           -31.6 |
|          StdEpRet |             7.2 |


---------------------------------------
|             Epoch |              71 |
|      AverageEpRet |           -37.1 |
|          StdEpRet |            10.9 |
|          MaxEpRet |             -23 |
|          MinEpRet |             -68 |
|  AverageTestEpRet |           -32.9 |
|      StdTestEpRet |            7.54 |
|      MaxTestEpRet |           -22.5 |
|      MinTestEpRet |           -47.3 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.17e+05 |
|      AverageQVals |           -28.3 |
|          StdQVals |            7.43 |
|          MaxQVals |             -13 |
|          MinQVals |           -63.1 |
|            LossPi |            27.8 |
|             LossQ |           0.234 |
|              Time |             904 |
---------------------------------------
---------------------------------------
|             Epoch |              72 |
|      AverageEpRet |             -33 |
|          StdEpRet |            6.64 |


---------------------------------------
|             Epoch |              81 |
|      AverageEpRet |           -29.2 |
|          StdEpRet |            6.66 |
|          MaxEpRet |           -12.7 |
|          MinEpRet |           -38.2 |
|  AverageTestEpRet |             -33 |
|      StdTestEpRet |            10.7 |
|      MaxTestEpRet |           -16.5 |
|      MinTestEpRet |           -50.5 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.34e+05 |
|      AverageQVals |           -24.1 |
|          StdQVals |            7.51 |
|          MaxQVals |           -9.29 |
|          MinQVals |           -61.6 |
|            LossPi |            23.7 |
|             LossQ |           0.225 |
|              Time |        1.01e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              82 |
|      AverageEpRet |             -27 |
|          StdEpRet |            8.41 |


---------------------------------------
|             Epoch |              91 |
|      AverageEpRet |           -27.7 |
|          StdEpRet |             6.2 |
|          MaxEpRet |           -15.9 |
|          MinEpRet |           -36.8 |
|  AverageTestEpRet |           -25.4 |
|      StdTestEpRet |            7.98 |
|      MaxTestEpRet |           -12.3 |
|      MinTestEpRet |           -37.7 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.5e+05 |
|      AverageQVals |           -20.5 |
|          StdQVals |            7.63 |
|          MaxQVals |           -7.09 |
|          MinQVals |           -61.6 |
|            LossPi |            20.2 |
|             LossQ |           0.219 |
|              Time |        1.12e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              92 |
|      AverageEpRet |           -26.3 |
|          StdEpRet |            8.63 |


In [6]:
# Setup baseline 2
logger_kwargs = dict(output_dir='ddpg_b2_norm', exp_name='ddpg_b2_norm')
seed_b = 10
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 20000
args_b = dict(hidden_sizes=[400,400], activation=torch.nn.ReLU)
actnoise_b = 0.1
pilr_b = 0.001
qlr_b = 0.001

# Baseline 2 training
spinup.ddpg_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, act_noise = actnoise_b, pi_lr = pilr_b, q_lr = qlr_b)

[32;1mLogging data to ddpg_b2_norm/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            400,
            400
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x7f5839fc6598>",
    "epochs":	100,
    "exp_name":	"ddpg_b2_norm",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f58340ea780>":	{
            "epoch_dict":	{},
            "exp_name":	"ddpg_b2_norm",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"ddpg_b2_norm",
            "output_file":	{
                "<_io.TextIOWrapper name='ddpg_b2_norm/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
                }
            }
        }
    },
    "logger_kwargs":	{
        "exp_name":	"ddpg_b2_norm",
        "output_dir":



---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -178 |
|          StdEpRet |            5.48 |
|          MaxEpRet |            -166 |
|          MinEpRet |            -186 |
|  AverageTestEpRet |            -180 |
|      StdTestEpRet |            4.87 |
|      MaxTestEpRet |            -174 |
|      MinTestEpRet |            -190 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+03 |
|      AverageQVals |           -4.89 |
|          StdQVals |            1.61 |
|          MaxQVals |            1.47 |
|          MinQVals |             -11 |
|            LossPi |            4.71 |
|             LossQ |          0.0976 |
|              Time |            10.3 |
---------------------------------------
---------------------------------------
|             Epoch |               2 |
|      AverageEpRet |            -179 |
|          StdEpRet |            7.66 |


---------------------------------------
|             Epoch |              11 |
|      AverageEpRet |            -179 |
|          StdEpRet |            6.24 |
|          MaxEpRet |            -164 |
|          MinEpRet |            -189 |
|  AverageTestEpRet |            -139 |
|      StdTestEpRet |            33.4 |
|      MaxTestEpRet |           -87.6 |
|      MinTestEpRet |            -184 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.81e+04 |
|      AverageQVals |           -59.5 |
|          StdQVals |            8.27 |
|          MaxQVals |           -38.1 |
|          MinQVals |           -86.8 |
|            LossPi |            58.9 |
|             LossQ |           0.439 |
|              Time |             242 |
---------------------------------------
---------------------------------------
|             Epoch |              12 |
|      AverageEpRet |            -181 |
|          StdEpRet |            5.93 |


---------------------------------------
|             Epoch |              21 |
|      AverageEpRet |            -113 |
|          StdEpRet |            26.7 |
|          MaxEpRet |           -48.2 |
|          MinEpRet |            -169 |
|  AverageTestEpRet |           -99.7 |
|      StdTestEpRet |            25.7 |
|      MaxTestEpRet |             -68 |
|      MinTestEpRet |            -145 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        3.46e+04 |
|      AverageQVals |           -70.1 |
|          StdQVals |            9.37 |
|          MaxQVals |             -42 |
|          MinQVals |           -98.8 |
|            LossPi |            69.4 |
|             LossQ |           0.446 |
|              Time |             476 |
---------------------------------------
---------------------------------------
|             Epoch |              22 |
|      AverageEpRet |            -110 |
|          StdEpRet |            22.4 |


---------------------------------------
|             Epoch |              31 |
|      AverageEpRet |             -88 |
|          StdEpRet |            22.9 |
|          MaxEpRet |           -47.6 |
|          MinEpRet |            -131 |
|  AverageTestEpRet |             -89 |
|      StdTestEpRet |            21.7 |
|      MaxTestEpRet |           -53.4 |
|      MinTestEpRet |            -115 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        5.11e+04 |
|      AverageQVals |           -65.1 |
|          StdQVals |            10.6 |
|          MaxQVals |           -38.2 |
|          MinQVals |            -101 |
|            LossPi |            64.6 |
|             LossQ |           0.355 |
|              Time |             735 |
---------------------------------------
---------------------------------------
|             Epoch |              32 |
|      AverageEpRet |           -86.9 |
|          StdEpRet |            27.8 |


---------------------------------------
|             Epoch |              41 |
|      AverageEpRet |           -65.8 |
|          StdEpRet |              19 |
|          MaxEpRet |           -27.3 |
|          MinEpRet |           -95.9 |
|  AverageTestEpRet |           -56.5 |
|      StdTestEpRet |              17 |
|      MaxTestEpRet |           -26.8 |
|      MinTestEpRet |           -93.4 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        6.76e+04 |
|      AverageQVals |           -56.2 |
|          StdQVals |            10.4 |
|          MaxQVals |           -30.6 |
|          MinQVals |           -97.5 |
|            LossPi |            55.7 |
|             LossQ |           0.426 |
|              Time |           1e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              42 |
|      AverageEpRet |           -52.7 |
|          StdEpRet |            10.6 |


---------------------------------------
|             Epoch |              51 |
|      AverageEpRet |           -50.4 |
|          StdEpRet |            14.2 |
|          MaxEpRet |           -22.5 |
|          MinEpRet |           -79.3 |
|  AverageTestEpRet |           -55.5 |
|      StdTestEpRet |            19.8 |
|      MaxTestEpRet |           -19.3 |
|      MinTestEpRet |             -86 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        8.41e+04 |
|      AverageQVals |           -44.9 |
|          StdQVals |            9.93 |
|          MaxQVals |           -9.78 |
|          MinQVals |           -78.6 |
|            LossPi |            44.4 |
|             LossQ |           0.426 |
|              Time |        1.34e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              52 |
|      AverageEpRet |           -48.6 |
|          StdEpRet |            10.7 |


---------------------------------------
|             Epoch |              61 |
|      AverageEpRet |           -42.7 |
|          StdEpRet |            13.2 |
|          MaxEpRet |           -18.4 |
|          MinEpRet |           -63.2 |
|  AverageTestEpRet |           -49.9 |
|      StdTestEpRet |            16.1 |
|      MaxTestEpRet |           -26.3 |
|      MinTestEpRet |           -75.6 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.01e+05 |
|      AverageQVals |           -36.4 |
|          StdQVals |            9.57 |
|          MaxQVals |           -8.25 |
|          MinQVals |           -78.3 |
|            LossPi |            35.9 |
|             LossQ |           0.415 |
|              Time |        1.74e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              62 |
|      AverageEpRet |           -48.7 |
|          StdEpRet |            15.1 |


---------------------------------------
|             Epoch |              71 |
|      AverageEpRet |           -42.6 |
|          StdEpRet |            9.33 |
|          MaxEpRet |           -29.8 |
|          MinEpRet |           -65.1 |
|  AverageTestEpRet |           -40.8 |
|      StdTestEpRet |            6.38 |
|      MaxTestEpRet |           -28.1 |
|      MinTestEpRet |           -52.8 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.17e+05 |
|      AverageQVals |           -30.1 |
|          StdQVals |            9.12 |
|          MaxQVals |            0.54 |
|          MinQVals |           -69.4 |
|            LossPi |            29.7 |
|             LossQ |           0.369 |
|              Time |        2.17e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              72 |
|      AverageEpRet |           -41.2 |
|          StdEpRet |            11.6 |


---------------------------------------
|             Epoch |              81 |
|      AverageEpRet |             -39 |
|          StdEpRet |            10.2 |
|          MaxEpRet |           -19.7 |
|          MinEpRet |           -55.8 |
|  AverageTestEpRet |           -40.6 |
|      StdTestEpRet |            9.49 |
|      MaxTestEpRet |           -22.8 |
|      MinTestEpRet |           -51.7 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.34e+05 |
|      AverageQVals |           -25.3 |
|          StdQVals |            8.26 |
|          MaxQVals |            8.45 |
|          MinQVals |           -59.2 |
|            LossPi |            24.8 |
|             LossQ |           0.309 |
|              Time |        2.62e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              82 |
|      AverageEpRet |           -41.1 |
|          StdEpRet |            12.7 |


---------------------------------------
|             Epoch |              91 |
|      AverageEpRet |           -36.4 |
|          StdEpRet |            8.18 |
|          MaxEpRet |           -23.5 |
|          MinEpRet |           -53.6 |
|  AverageTestEpRet |           -39.4 |
|      StdTestEpRet |            9.17 |
|      MaxTestEpRet |           -24.7 |
|      MinTestEpRet |           -53.4 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.5e+05 |
|      AverageQVals |           -21.7 |
|          StdQVals |            7.52 |
|          MaxQVals |            5.46 |
|          MinQVals |           -58.2 |
|            LossPi |            21.3 |
|             LossQ |           0.299 |
|              Time |        3.12e+03 |
---------------------------------------
---------------------------------------
|             Epoch |              92 |
|      AverageEpRet |           -35.7 |
|          StdEpRet |              12 |


#### TD3

In [7]:
# Setup baseline 0
logger_kwargs = dict(output_dir='td3_b0_norm', exp_name='td3_b0_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 10000
args_b = dict(hidden_sizes=[300,], activation=torch.nn.ReLU)
actnoise_b = 0.1
pilr_b = 0.001
qlr_b = 0.001

# TD3 specific params
pd_b = 2
targnoise_b = 0.2
noiseclip_b = 0.5


# Baseline 0 training
spinup.td3_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, act_noise = actnoise_b, pi_lr = pilr_b, q_lr = qlr_b, policy_delay = pd_b, target_noise = targnoise_b, noise_clip = noiseclip_b)

[32;1mLogging data to td3_b0_norm/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            300
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x7f5839fc6598>",
    "epochs":	100,
    "exp_name":	"td3_b0_norm",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f582c24dba8>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_b0_norm",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"td3_b0_norm",
            "output_file":	{
                "<_io.TextIOWrapper name='td3_b0_norm/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
                }
            }
        }
    },
    "logger_kwargs":	{
        "exp_name":	"td3_b0_norm",
        "output_dir":	"td3_b0_norm"
    },
 



---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -181 |
|          StdEpRet |            8.78 |
|          MaxEpRet |            -164 |
|          MinEpRet |            -194 |
|  AverageTestEpRet |            -181 |
|      StdTestEpRet |            4.59 |
|      MaxTestEpRet |            -170 |
|      MinTestEpRet |            -186 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+03 |
|     AverageQ1Vals |           -2.62 |
|         StdQ1Vals |            1.56 |
|         MaxQ1Vals |            2.81 |
|         MinQ1Vals |           -7.87 |
|     AverageQ2Vals |           -2.61 |
|         StdQ2Vals |            1.57 |
|         MaxQ2Vals |             3.7 |
|         MinQ2Vals |            -8.2 |
|            LossPi |            2.39 |
|             LossQ |           0.424 |
|              Time |             5.6 |
---------------------------------------


---------------------------------------
|             Epoch |              10 |
|      AverageEpRet |            -166 |
|          StdEpRet |            20.6 |
|          MaxEpRet |            -128 |
|          MinEpRet |            -194 |
|  AverageTestEpRet |            -164 |
|      StdTestEpRet |            22.3 |
|      MaxTestEpRet |            -135 |
|      MinTestEpRet |            -190 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+04 |
|     AverageQ1Vals |           -48.8 |
|         StdQ1Vals |            3.07 |
|         MaxQ1Vals |             -40 |
|         MinQ1Vals |           -59.7 |
|     AverageQ2Vals |           -48.8 |
|         StdQ2Vals |            3.07 |
|         MaxQ2Vals |           -40.2 |
|         MinQ2Vals |           -59.6 |
|            LossPi |            48.5 |
|             LossQ |           0.537 |
|              Time |            94.7 |
---------------------------------------


---------------------------------------
|             Epoch |              19 |
|      AverageEpRet |            -112 |
|          StdEpRet |            23.9 |
|          MaxEpRet |           -76.4 |
|          MinEpRet |            -162 |
|  AverageTestEpRet |            -110 |
|      StdTestEpRet |            5.83 |
|      MaxTestEpRet |           -98.7 |
|      MinTestEpRet |            -122 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        3.13e+04 |
|     AverageQ1Vals |             -74 |
|         StdQ1Vals |            5.69 |
|         MaxQ1Vals |           -59.5 |
|         MinQ1Vals |           -92.2 |
|     AverageQ2Vals |             -74 |
|         StdQ2Vals |            5.69 |
|         MaxQ2Vals |           -59.1 |
|         MinQ2Vals |           -92.9 |
|            LossPi |            73.6 |
|             LossQ |           0.892 |
|              Time |             203 |
---------------------------------------


---------------------------------------
|             Epoch |              28 |
|      AverageEpRet |           -83.5 |
|          StdEpRet |            19.5 |
|          MaxEpRet |             -53 |
|          MinEpRet |            -125 |
|  AverageTestEpRet |           -94.4 |
|      StdTestEpRet |              20 |
|      MaxTestEpRet |           -55.5 |
|      MinTestEpRet |            -131 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        4.62e+04 |
|     AverageQ1Vals |           -82.1 |
|         StdQ1Vals |            8.05 |
|         MaxQ1Vals |           -64.3 |
|         MinQ1Vals |            -109 |
|     AverageQ2Vals |           -82.1 |
|         StdQ2Vals |            8.05 |
|         MaxQ2Vals |           -64.4 |
|         MinQ2Vals |            -109 |
|            LossPi |            81.5 |
|             LossQ |           0.855 |
|              Time |             292 |
---------------------------------------


---------------------------------------
|             Epoch |              37 |
|      AverageEpRet |           -69.7 |
|          StdEpRet |            19.7 |
|          MaxEpRet |           -34.6 |
|          MinEpRet |            -106 |
|  AverageTestEpRet |             -75 |
|      StdTestEpRet |            16.2 |
|      MaxTestEpRet |           -51.1 |
|      MinTestEpRet |            -105 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         6.1e+04 |
|     AverageQ1Vals |           -81.3 |
|         StdQ1Vals |            9.17 |
|         MaxQ1Vals |           -60.1 |
|         MinQ1Vals |            -114 |
|     AverageQ2Vals |           -81.3 |
|         StdQ2Vals |            9.17 |
|         MaxQ2Vals |           -60.1 |
|         MinQ2Vals |            -115 |
|            LossPi |            80.8 |
|             LossQ |           0.825 |
|              Time |             370 |
---------------------------------------


---------------------------------------
|             Epoch |              46 |
|      AverageEpRet |           -50.2 |
|          StdEpRet |            9.95 |
|          MaxEpRet |           -36.5 |
|          MinEpRet |           -68.4 |
|  AverageTestEpRet |           -59.9 |
|      StdTestEpRet |            14.2 |
|      MaxTestEpRet |             -41 |
|      MinTestEpRet |           -79.8 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        7.59e+04 |
|     AverageQ1Vals |           -75.8 |
|         StdQ1Vals |            9.97 |
|         MaxQ1Vals |           -53.8 |
|         MinQ1Vals |            -115 |
|     AverageQ2Vals |           -75.8 |
|         StdQ2Vals |            9.97 |
|         MaxQ2Vals |           -53.9 |
|         MinQ2Vals |            -115 |
|            LossPi |            75.3 |
|             LossQ |           0.766 |
|              Time |             458 |
---------------------------------------


---------------------------------------
|             Epoch |              55 |
|      AverageEpRet |           -37.3 |
|          StdEpRet |            14.4 |
|          MaxEpRet |             -17 |
|          MinEpRet |             -68 |
|  AverageTestEpRet |           -40.5 |
|      StdTestEpRet |            14.4 |
|      MaxTestEpRet |           -23.8 |
|      MinTestEpRet |           -66.4 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        9.07e+04 |
|     AverageQ1Vals |           -66.6 |
|         StdQ1Vals |            9.93 |
|         MaxQ1Vals |           -45.5 |
|         MinQ1Vals |            -108 |
|     AverageQ2Vals |           -66.6 |
|         StdQ2Vals |            9.93 |
|         MaxQ2Vals |           -45.5 |
|         MinQ2Vals |            -108 |
|            LossPi |              66 |
|             LossQ |           0.761 |
|              Time |             553 |
---------------------------------------


---------------------------------------
|             Epoch |              64 |
|      AverageEpRet |           -38.7 |
|          StdEpRet |            8.11 |
|          MaxEpRet |           -21.4 |
|          MinEpRet |           -55.9 |
|  AverageTestEpRet |           -37.8 |
|      StdTestEpRet |            11.7 |
|      MaxTestEpRet |           -22.3 |
|      MinTestEpRet |             -60 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.06e+05 |
|     AverageQ1Vals |           -55.9 |
|         StdQ1Vals |            9.47 |
|         MaxQ1Vals |             -37 |
|         MinQ1Vals |           -99.3 |
|     AverageQ2Vals |           -55.9 |
|         StdQ2Vals |            9.47 |
|         MaxQ2Vals |           -37.2 |
|         MinQ2Vals |           -98.9 |
|            LossPi |            55.4 |
|             LossQ |           0.755 |
|              Time |             636 |
---------------------------------------


---------------------------------------
|             Epoch |              73 |
|      AverageEpRet |           -35.8 |
|          StdEpRet |            8.53 |
|          MaxEpRet |           -25.1 |
|          MinEpRet |           -55.3 |
|  AverageTestEpRet |           -28.2 |
|      StdTestEpRet |            8.03 |
|      MaxTestEpRet |           -15.9 |
|      MinTestEpRet |             -41 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.2e+05 |
|     AverageQ1Vals |           -47.3 |
|         StdQ1Vals |            8.93 |
|         MaxQ1Vals |           -30.8 |
|         MinQ1Vals |           -87.4 |
|     AverageQ2Vals |           -47.3 |
|         StdQ2Vals |            8.94 |
|         MaxQ2Vals |           -30.7 |
|         MinQ2Vals |           -87.8 |
|            LossPi |            46.8 |
|             LossQ |           0.654 |
|              Time |             705 |
---------------------------------------


---------------------------------------
|             Epoch |              82 |
|      AverageEpRet |           -36.8 |
|          StdEpRet |            9.34 |
|          MaxEpRet |           -22.3 |
|          MinEpRet |           -52.7 |
|  AverageTestEpRet |           -44.7 |
|      StdTestEpRet |              10 |
|      MaxTestEpRet |           -27.2 |
|      MinTestEpRet |           -59.4 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.35e+05 |
|     AverageQ1Vals |           -40.9 |
|         StdQ1Vals |            8.45 |
|         MaxQ1Vals |           -26.3 |
|         MinQ1Vals |           -82.7 |
|     AverageQ2Vals |           -40.9 |
|         StdQ2Vals |            8.45 |
|         MaxQ2Vals |           -26.3 |
|         MinQ2Vals |           -82.1 |
|            LossPi |            40.4 |
|             LossQ |           0.581 |
|              Time |             789 |
---------------------------------------


---------------------------------------
|             Epoch |              91 |
|      AverageEpRet |           -37.1 |
|          StdEpRet |            24.6 |
|          MaxEpRet |           -21.3 |
|          MinEpRet |            -121 |
|  AverageTestEpRet |           -29.4 |
|      StdTestEpRet |            10.5 |
|      MaxTestEpRet |           -12.4 |
|      MinTestEpRet |           -50.3 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.5e+05 |
|     AverageQ1Vals |           -36.2 |
|         StdQ1Vals |            8.06 |
|         MaxQ1Vals |             -23 |
|         MinQ1Vals |           -76.4 |
|     AverageQ2Vals |           -36.2 |
|         StdQ2Vals |            8.06 |
|         MaxQ2Vals |           -22.8 |
|         MinQ2Vals |           -75.3 |
|            LossPi |            35.8 |
|             LossQ |           0.532 |
|              Time |             884 |
---------------------------------------


---------------------------------------
|             Epoch |             100 |
|      AverageEpRet |           -28.8 |
|          StdEpRet |               9 |
|          MaxEpRet |           -17.1 |
|          MinEpRet |           -48.2 |
|  AverageTestEpRet |           -25.4 |
|      StdTestEpRet |            7.88 |
|      MaxTestEpRet |           -10.5 |
|      MinTestEpRet |             -38 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+05 |
|     AverageQ1Vals |           -32.8 |
|         StdQ1Vals |             7.9 |
|         MaxQ1Vals |           -20.3 |
|         MinQ1Vals |           -73.2 |
|     AverageQ2Vals |           -32.8 |
|         StdQ2Vals |             7.9 |
|         MaxQ2Vals |           -20.4 |
|         MinQ2Vals |           -72.7 |
|            LossPi |            32.5 |
|             LossQ |           0.492 |
|              Time |             984 |
---------------------------------------


In [8]:
# Setup baseline 1
logger_kwargs = dict(output_dir='td3_b1_norm', exp_name='td3_b1_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 10000
args_b = dict(hidden_sizes=[300,], activation=torch.nn.ReLU)
actnoise_b = 0.1
pilr_b = 0.001
qlr_b = 0.001

# TD3 specific params
pd_b = 2
targnoise_b = 0.2
noiseclip_b = 0.1


# Baseline 1 training
spinup.td3_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, act_noise = actnoise_b, pi_lr = pilr_b, q_lr = qlr_b, policy_delay = pd_b, target_noise = targnoise_b, noise_clip = noiseclip_b)

[32;1mLogging data to td3_b1_norm/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            300
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x7f5839fc6598>",
    "epochs":	100,
    "exp_name":	"td3_b1_norm",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f582c24fc88>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_b1_norm",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"td3_b1_norm",
            "output_file":	{
                "<_io.TextIOWrapper name='td3_b1_norm/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
                }
            }
        }
    },
    "logger_kwargs":	{
        "exp_name":	"td3_b1_norm",
        "output_dir":	"td3_b1_norm"
    },
 



---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -179 |
|          StdEpRet |            8.24 |
|          MaxEpRet |            -158 |
|          MinEpRet |            -191 |
|  AverageTestEpRet |            -179 |
|      StdTestEpRet |            8.69 |
|      MaxTestEpRet |            -164 |
|      MinTestEpRet |            -192 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+03 |
|     AverageQ1Vals |           -3.34 |
|         StdQ1Vals |            1.64 |
|         MaxQ1Vals |            1.69 |
|         MinQ1Vals |           -7.91 |
|     AverageQ2Vals |           -3.33 |
|         StdQ2Vals |            1.66 |
|         MaxQ2Vals |            4.54 |
|         MinQ2Vals |           -8.02 |
|            LossPi |            3.08 |
|             LossQ |           0.417 |
|              Time |            5.73 |
---------------------------------------


---------------------------------------
|             Epoch |              10 |
|      AverageEpRet |            -141 |
|          StdEpRet |            16.1 |
|          MaxEpRet |            -111 |
|          MinEpRet |            -170 |
|  AverageTestEpRet |            -163 |
|      StdTestEpRet |            14.8 |
|      MaxTestEpRet |            -130 |
|      MinTestEpRet |            -182 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+04 |
|     AverageQ1Vals |           -47.3 |
|         StdQ1Vals |             2.8 |
|         MaxQ1Vals |           -38.1 |
|         MinQ1Vals |           -58.5 |
|     AverageQ2Vals |           -47.3 |
|         StdQ2Vals |            2.81 |
|         MaxQ2Vals |           -38.5 |
|         MinQ2Vals |           -58.4 |
|            LossPi |            46.9 |
|             LossQ |           0.549 |
|              Time |             101 |
---------------------------------------


---------------------------------------
|             Epoch |              19 |
|      AverageEpRet |            -126 |
|          StdEpRet |            25.4 |
|          MaxEpRet |           -73.3 |
|          MinEpRet |            -183 |
|  AverageTestEpRet |            -119 |
|      StdTestEpRet |            16.7 |
|      MaxTestEpRet |           -96.7 |
|      MinTestEpRet |            -156 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        3.13e+04 |
|     AverageQ1Vals |           -71.5 |
|         StdQ1Vals |            4.91 |
|         MaxQ1Vals |             -59 |
|         MinQ1Vals |           -90.3 |
|     AverageQ2Vals |           -71.5 |
|         StdQ2Vals |            4.91 |
|         MaxQ2Vals |           -59.1 |
|         MinQ2Vals |           -90.4 |
|            LossPi |              71 |
|             LossQ |           0.833 |
|              Time |             198 |
---------------------------------------


---------------------------------------
|             Epoch |              28 |
|      AverageEpRet |           -81.9 |
|          StdEpRet |            22.1 |
|          MaxEpRet |           -29.4 |
|          MinEpRet |            -131 |
|  AverageTestEpRet |           -96.2 |
|      StdTestEpRet |            24.5 |
|      MaxTestEpRet |           -50.9 |
|      MinTestEpRet |            -136 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        4.62e+04 |
|     AverageQ1Vals |           -80.8 |
|         StdQ1Vals |            8.55 |
|         MaxQ1Vals |           -60.5 |
|         MinQ1Vals |            -114 |
|     AverageQ2Vals |           -80.8 |
|         StdQ2Vals |            8.55 |
|         MaxQ2Vals |           -60.2 |
|         MinQ2Vals |            -114 |
|            LossPi |            80.2 |
|             LossQ |            1.24 |
|              Time |             287 |
---------------------------------------


---------------------------------------
|             Epoch |              37 |
|      AverageEpRet |           -85.6 |
|          StdEpRet |            22.1 |
|          MaxEpRet |           -47.5 |
|          MinEpRet |            -126 |
|  AverageTestEpRet |           -86.9 |
|      StdTestEpRet |            25.1 |
|      MaxTestEpRet |           -32.2 |
|      MinTestEpRet |            -112 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         6.1e+04 |
|     AverageQ1Vals |           -78.8 |
|         StdQ1Vals |            9.82 |
|         MaxQ1Vals |           -57.7 |
|         MinQ1Vals |            -119 |
|     AverageQ2Vals |           -78.8 |
|         StdQ2Vals |            9.81 |
|         MaxQ2Vals |           -57.8 |
|         MinQ2Vals |            -117 |
|            LossPi |            78.3 |
|             LossQ |           0.914 |
|              Time |             365 |
---------------------------------------


---------------------------------------
|             Epoch |              46 |
|      AverageEpRet |           -85.1 |
|          StdEpRet |            21.4 |
|          MaxEpRet |           -48.1 |
|          MinEpRet |            -124 |
|  AverageTestEpRet |           -89.6 |
|      StdTestEpRet |            30.3 |
|      MaxTestEpRet |           -27.9 |
|      MinTestEpRet |            -127 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        7.59e+04 |
|     AverageQ1Vals |             -76 |
|         StdQ1Vals |             9.7 |
|         MaxQ1Vals |           -56.5 |
|         MinQ1Vals |            -114 |
|     AverageQ2Vals |             -76 |
|         StdQ2Vals |             9.7 |
|         MaxQ2Vals |           -56.5 |
|         MinQ2Vals |            -113 |
|            LossPi |            75.4 |
|             LossQ |           0.748 |
|              Time |             436 |
---------------------------------------


---------------------------------------
|             Epoch |              55 |
|      AverageEpRet |             -65 |
|          StdEpRet |            21.7 |
|          MaxEpRet |           -41.9 |
|          MinEpRet |            -102 |
|  AverageTestEpRet |             -52 |
|      StdTestEpRet |            18.5 |
|      MaxTestEpRet |           -29.6 |
|      MinTestEpRet |           -94.3 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        9.07e+04 |
|     AverageQ1Vals |           -71.4 |
|         StdQ1Vals |            9.29 |
|         MaxQ1Vals |           -54.8 |
|         MinQ1Vals |            -109 |
|     AverageQ2Vals |           -71.4 |
|         StdQ2Vals |            9.29 |
|         MaxQ2Vals |           -54.6 |
|         MinQ2Vals |            -109 |
|            LossPi |            70.9 |
|             LossQ |           0.791 |
|              Time |             509 |
---------------------------------------


---------------------------------------
|             Epoch |              64 |
|      AverageEpRet |           -81.5 |
|          StdEpRet |            21.4 |
|          MaxEpRet |           -49.1 |
|          MinEpRet |            -119 |
|  AverageTestEpRet |           -64.3 |
|      StdTestEpRet |            23.4 |
|      MaxTestEpRet |           -18.8 |
|      MinTestEpRet |            -103 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.06e+05 |
|     AverageQ1Vals |           -67.4 |
|         StdQ1Vals |            8.87 |
|         MaxQ1Vals |           -51.8 |
|         MinQ1Vals |            -104 |
|     AverageQ2Vals |           -67.4 |
|         StdQ2Vals |            8.87 |
|         MaxQ2Vals |           -51.7 |
|         MinQ2Vals |            -104 |
|            LossPi |              67 |
|             LossQ |           0.671 |
|              Time |             576 |
---------------------------------------


---------------------------------------
|             Epoch |              73 |
|      AverageEpRet |           -43.2 |
|          StdEpRet |            8.46 |
|          MaxEpRet |           -21.2 |
|          MinEpRet |           -53.4 |
|  AverageTestEpRet |           -41.4 |
|      StdTestEpRet |            12.9 |
|      MaxTestEpRet |           -20.8 |
|      MinTestEpRet |           -63.9 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.2e+05 |
|     AverageQ1Vals |           -65.9 |
|         StdQ1Vals |            9.38 |
|         MaxQ1Vals |           -49.5 |
|         MinQ1Vals |            -106 |
|     AverageQ2Vals |           -65.9 |
|         StdQ2Vals |            9.37 |
|         MaxQ2Vals |           -49.8 |
|         MinQ2Vals |            -107 |
|            LossPi |            65.4 |
|             LossQ |           0.662 |
|              Time |             650 |
---------------------------------------


---------------------------------------
|             Epoch |              82 |
|      AverageEpRet |           -37.3 |
|          StdEpRet |            8.57 |
|          MaxEpRet |           -22.8 |
|          MinEpRet |           -51.6 |
|  AverageTestEpRet |             -38 |
|      StdTestEpRet |            8.68 |
|      MaxTestEpRet |           -26.1 |
|      MinTestEpRet |           -49.9 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.35e+05 |
|     AverageQ1Vals |           -58.3 |
|         StdQ1Vals |            8.67 |
|         MaxQ1Vals |           -44.8 |
|         MinQ1Vals |           -97.1 |
|     AverageQ2Vals |           -58.3 |
|         StdQ2Vals |            8.67 |
|         MaxQ2Vals |           -44.6 |
|         MinQ2Vals |           -97.6 |
|            LossPi |            57.8 |
|             LossQ |           0.748 |
|              Time |             716 |
---------------------------------------


---------------------------------------
|             Epoch |              91 |
|      AverageEpRet |           -32.3 |
|          StdEpRet |            5.73 |
|          MaxEpRet |           -23.6 |
|          MinEpRet |           -42.6 |
|  AverageTestEpRet |             -27 |
|      StdTestEpRet |            5.23 |
|      MaxTestEpRet |           -20.3 |
|      MinTestEpRet |           -37.8 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         1.5e+05 |
|     AverageQ1Vals |           -49.5 |
|         StdQ1Vals |            8.22 |
|         MaxQ1Vals |           -37.8 |
|         MinQ1Vals |           -87.2 |
|     AverageQ2Vals |           -49.5 |
|         StdQ2Vals |            8.22 |
|         MaxQ2Vals |           -37.7 |
|         MinQ2Vals |           -87.9 |
|            LossPi |              49 |
|             LossQ |           0.694 |
|              Time |             782 |
---------------------------------------


---------------------------------------
|             Epoch |             100 |
|      AverageEpRet |             -41 |
|          StdEpRet |            10.9 |
|          MaxEpRet |           -28.9 |
|          MinEpRet |             -68 |
|  AverageTestEpRet |           -40.1 |
|      StdTestEpRet |            9.76 |
|      MaxTestEpRet |           -29.8 |
|      MinTestEpRet |           -64.5 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+05 |
|     AverageQ1Vals |           -42.7 |
|         StdQ1Vals |            7.53 |
|         MaxQ1Vals |           -30.9 |
|         MinQ1Vals |             -78 |
|     AverageQ2Vals |           -42.7 |
|         StdQ2Vals |            7.53 |
|         MaxQ2Vals |           -30.8 |
|         MinQ2Vals |           -78.5 |
|            LossPi |            42.2 |
|             LossQ |           0.652 |
|              Time |             848 |
---------------------------------------


In [9]:
# Setup baseline 2
logger_kwargs = dict(output_dir='td3_b2_norm', exp_name='td3_b2_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 10000
args_b = dict(hidden_sizes=[400,400], activation=torch.nn.ReLU)
actnoise_b = 0.1
pilr_b = 0.001
qlr_b = 0.001

# TD3 specific params
pd_b = 2
targnoise_b = 0.2
noiseclip_b = 0.1


# Baseline 2 training
spinup.td3_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, act_noise = actnoise_b, pi_lr = pilr_b, q_lr = qlr_b, policy_delay = pd_b, target_noise = targnoise_b, noise_clip = noiseclip_b)

[32;1mLogging data to td3_b2_norm/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "activation":	"ReLU",
        "hidden_sizes":	[
            400,
            400
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x7f5839fc6598>",
    "epochs":	100,
    "exp_name":	"td3_b2_norm",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x7f582c24fe48>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_b2_norm",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"td3_b2_norm",
            "output_file":	{
                "<_io.TextIOWrapper name='td3_b2_norm/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
                }
            }
        }
    },
    "logger_kwargs":	{
        "exp_name":	"td3_b2_norm",
        "output_dir":	"td3_



---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -178 |
|          StdEpRet |            9.44 |
|          MaxEpRet |            -153 |
|          MinEpRet |            -189 |
|  AverageTestEpRet |            -183 |
|      StdTestEpRet |            8.56 |
|      MaxTestEpRet |            -160 |
|      MinTestEpRet |            -192 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+03 |
|     AverageQ1Vals |           -3.77 |
|         StdQ1Vals |            1.09 |
|         MaxQ1Vals |            2.14 |
|         MinQ1Vals |           -7.03 |
|     AverageQ2Vals |           -3.77 |
|         StdQ2Vals |             1.1 |
|         MaxQ2Vals |            1.49 |
|         MinQ2Vals |           -7.01 |
|            LossPi |            3.76 |
|             LossQ |           0.263 |
|              Time |            11.5 |
---------------------------------------


---------------------------------------
|             Epoch |              10 |
|      AverageEpRet |            -151 |
|          StdEpRet |            27.3 |
|          MaxEpRet |            -101 |
|          MinEpRet |            -187 |
|  AverageTestEpRet |            -160 |
|      StdTestEpRet |            22.4 |
|      MaxTestEpRet |            -117 |
|      MinTestEpRet |            -187 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        1.65e+04 |
|     AverageQ1Vals |           -43.6 |
|         StdQ1Vals |            4.11 |
|         MaxQ1Vals |           -30.7 |
|         MinQ1Vals |           -59.7 |
|     AverageQ2Vals |           -43.6 |
|         StdQ2Vals |            4.11 |
|         MaxQ2Vals |           -30.7 |
|         MinQ2Vals |             -60 |
|            LossPi |            43.1 |
|             LossQ |           0.649 |
|              Time |             250 |
---------------------------------------


---------------------------------------
|             Epoch |              19 |
|      AverageEpRet |            -112 |
|          StdEpRet |            27.4 |
|          MaxEpRet |           -75.5 |
|          MinEpRet |            -169 |
|  AverageTestEpRet |            -115 |
|      StdTestEpRet |            21.4 |
|      MaxTestEpRet |             -77 |
|      MinTestEpRet |            -157 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        3.13e+04 |
|     AverageQ1Vals |           -65.1 |
|         StdQ1Vals |            5.59 |
|         MaxQ1Vals |           -49.6 |
|         MinQ1Vals |           -88.3 |
|     AverageQ2Vals |           -65.1 |
|         StdQ2Vals |            5.59 |
|         MaxQ2Vals |           -49.8 |
|         MinQ2Vals |           -88.4 |
|            LossPi |            64.5 |
|             LossQ |           0.736 |
|              Time |             489 |
---------------------------------------


---------------------------------------
|             Epoch |              28 |
|      AverageEpRet |            -102 |
|          StdEpRet |            29.6 |
|          MaxEpRet |           -47.3 |
|          MinEpRet |            -144 |
|  AverageTestEpRet |           -96.8 |
|      StdTestEpRet |            19.7 |
|      MaxTestEpRet |           -68.7 |
|      MinTestEpRet |            -137 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |        4.62e+04 |
|     AverageQ1Vals |           -74.2 |
|         StdQ1Vals |            7.79 |
|         MaxQ1Vals |           -50.4 |
|         MinQ1Vals |            -101 |
|     AverageQ2Vals |           -74.2 |
|         StdQ2Vals |            7.79 |
|         MaxQ2Vals |           -50.3 |
|         MinQ2Vals |            -100 |
|            LossPi |            73.5 |
|             LossQ |           0.771 |
|              Time |             722 |
---------------------------------------


---------------------------------------
|             Epoch |              37 |
|      AverageEpRet |           -73.7 |
|          StdEpRet |            15.4 |
|          MaxEpRet |           -44.6 |
|          MinEpRet |           -98.5 |
|  AverageTestEpRet |           -67.9 |
|      StdTestEpRet |            17.7 |
|      MaxTestEpRet |           -42.2 |
|      MinTestEpRet |            -103 |
|             EpLen |             110 |
|         TestEpLen |             110 |
| TotalEnvInteracts |         6.1e+04 |
|     AverageQ1Vals |           -72.7 |
|         StdQ1Vals |            9.36 |
|         MaxQ1Vals |           -46.3 |
|         MinQ1Vals |            -109 |
|     AverageQ2Vals |           -72.7 |
|         StdQ2Vals |            9.37 |
|         MaxQ2Vals |           -46.2 |
|         MinQ2Vals |            -109 |
|            LossPi |            72.1 |
|             LossQ |           0.781 |
|              Time |             965 |
---------------------------------------


KeyboardInterrupt: 

#### SAC

In [None]:
# Setup baseline 0
logger_kwargs = dict(output_dir='sac_b0_norm', exp_name='sac_b0_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.995
batchsize_b = 100
startsteps_b = 10000
args_b = dict(hidden_sizes=[300,], activation=torch.nn.ReLU)
lr_b = 0.001
alpha_b = 0.2


# Baseline 0 training
spinup.sac_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, lr = lr_b, alpha = alpha_b)

In [None]:
# Setup baseline 1
logger_kwargs = dict(output_dir='sac_b1_norm', exp_name='sac_b_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.9
batchsize_b = 100
startsteps_b = 5000
args_b = dict(hidden_sizes=[300,], activation=torch.nn.ReLU)
lr_b = 0.001
alpha_b = 0.2


# Baseline 1 training
spinup.sac_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, lr = lr_b, alpha = alpha_b)

In [None]:
# Setup baseline 2
logger_kwargs = dict(output_dir='sac_b2_norm', exp_name='sac_b2_norm')
seed_b = 0
epochs_b = 100
maxeplen_b = 110

spe_b = maxeplen_b * 15
repsize_b = 1000000
gamma_b = 0.99
polyak_b = 0.9
batchsize_b = 100
startsteps_b = 5000
args_b = dict(hidden_sizes=[400,400], activation=torch.nn.ReLU)
lr_b = 0.001
alpha_b = 0.2


# Baseline 2 training
spinup.sac_pytorch(env_fn, ac_kwargs = args_b, seed = seed_b, steps_per_epoch = spe_b, epochs = epochs_b, replay_size = repsize_b, gamma = gamma_b,
polyak = polyak_b, batch_size = batchsize_b, start_steps = startsteps_b, max_ep_len = maxeplen_b,logger_kwargs = logger_kwargs, lr = lr_b, alpha = alpha_b)

## Test

In [3]:
# Test paramaters
env_name = 'gyroscopeenv-v0'
reward_type = 'Quadratic'
reward_args = {'qx1':9,'qx2':0.04,'qx3':9,'qx4':0.04,'pu1':0.01,'pu2':0.01}
seed = 2
agent_path = 'ddpg_reward_normalized'
init_state = [0,0,0,0,3.10,2,20]
dt = 0.02
t_end = 4

# Perform testing
r,score,val,act = test_agent(env_name,reward_type,reward_args,seed,agent_path,init_state,dt,t_end)



-59.706557908864376


## Plot

In [None]:
plot_test(dt,t_end,val,act)