In [1]:
#Import GYM stuff
import gym
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

#Import helpers
import numpy as np
import random
import os
import pandas as pd

# Import stable baselines stuff
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [2]:
def get_data(start = '2017-01-01 00:00:00', end = '2017-12-12 23:55:00'):

    # import standard load profiles

    slp = pd.read_csv('df_p.csv', index_col=0, parse_dates=True)['0'] / 1000
    slp = slp.resample('15min').mean() * 3

    pv = pd.read_csv('Solar_Data-2011.csv', delimiter=';',
                    index_col=0, parse_dates=False)["Generation"] * 3
    pv.index = slp.index
    start = pd.to_datetime(start)
    end = pd.to_datetime(end)
    
    return slp[start:end], pv[start:end]

In [3]:
class CostEnv(Env):
    def __init__(self):
        # Actions we can take increase in cost, lowering of cost
        self.action_space = Discrete(2)    
        ### Get input data, just choose one day for now
        self.load, self.pv = get_data(start = '2017-01-01 00:00:00', end = '2017-12-12 23:55:00')
        self.pv_price = 0.1
        self.grid_price = 0.4
        self.line_max = 15
        self.grid_penalty = 200
        self.battery_max = 20
        self.battery_state = 0
        ###
        
        # Set episode length
        self.episode_length = len(self.load)
        
        self.observation_space = Dict(
            {
                "load": Box(0, self.episode_length - 1, shape=(2,), dtype=int),
                #"pv": Box(0, self.episode_length - 1, shape=(2,), dtype=int),
            }
        )
        
    def step(self, action):
        # Apply action
        # 0 -1 = -1 decrease in cost
        # 1 -1 = 0 Increase in cost
        # self.state += action -1 
        # Reduce episode length by 1 second
        # self.episode_length -= 1 ###Move to the end of the action
        ### We calculate the reward based on the price for the electricity, lower price, "higher" reward
        if action == 0:
            # Take all electricity from grid
            if self.load[len(self.load)-self.episode_length] > self.line_max:
                reward = self.load[len(self.load)-self.episode_length] * self.grid_price * -1 - abs(self.load[len(self.load)-self.episode_length] - self.line_max) * self.grid_penalty
            else:
                reward = self.load[len(self.load)-self.episode_length] * self.grid_price * -1
            
        elif action == 1:
            # Take all electricity from pv
            if self.pv[len(self.pv)-self.episode_length] >= self.load[len(self.load)-self.episode_length]:
                if abs(self.pv[len(self.pv)-self.episode_length] - self.load[len(self.load)-self.episode_length]) > self.line_max:
                    reward = self.load[len(self.load)-self.episode_length] * self.pv_price * -1 - (self.pv[len(self.pv)-self.episode_length] - self.load[len(self.load)-self.episode_length]) * self.grid_penalty
                else:
                    reward = self.load[len(self.load)-self.episode_length] * self.pv_price * -1
            else:
                if (self.load[len(self.load)-self.episode_length] - self.pv[len(self.pv)-self.episode_length]) > self.line_max:
                    reward = self.pv[len(self.pv)-self.episode_length] * self.pv_price * -1 - (self.load[len(self.load)-self.episode_length] - self.pv[len(self.pv)-self.episode_length]) * self.grid_price - (self.load[len(self.load)-self.episode_length] - self.pv[len(self.pv)-self.episode_length]) * self.grid_penalty
                else:
                    reward = self.pv[len(self.pv)-self.episode_length] * self.pv_price * -1
            ### This may lead the agent to always choose action 1, 
            ###because it will always supply the demand and will always be cheaper.
        
        else:
            # Invalid action
            raise(f'Invalid action: {action}')

        # Apply cost fluctuation
        # self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
        
        ### Observation
        observation = {
            "load": self.load[len(self.load)-self.episode_length],
            "pv": self.pv[len(self.pv)-self.episode_length],
            }
        
        ### Either here or before checking self.episode_length
        self.episode_length -= 1
        ### Check if timeseries is over
        if self.episode_length <= 0: 
            done = True
        else:
            done = False
        
        # Return step information
        # return self.state, reward, done, info
        return observation, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        self.done=False
        # Set episode length
        self.episode_length = len(self.load)
        
        observation = {
            "load": 0, #self.load[len(self.load)-self.episode_length],
            "pv": 0, #self.pv[len(self.pv)-self.episode_length],
            }
        
        return observation





In [25]:
env = CostEnv()

In [26]:
episodes = 20
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        # n_state, reward, done, info = env.step(action)
        observation, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()


NotImplementedError: 

In [None]:
log_path = os.path.join('Training', 'Logs')

model = A2C("MultiInputPolicy", env, verbose=1, tensorboard_log=log_path)

model.learn(total_timesteps=500000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to Training\Logs\A2C_5
------------------------------------
| time/                 |          |
|    fps                | 14       |
|    iterations         | 100      |
|    time_elapsed       | 34       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.134   |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.189   |
|    value_loss         | 0.0358   |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 27       |
|    iterations         | 200      |
|    time_elapsed       | 36       |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -0.00771 |
|    explained_variance | -3.53    |
|    learning_rate      

-------------------------------------
| time/                 |           |
|    fps                | 132       |
|    iterations         | 1700      |
|    time_elapsed       | 64        |
|    total_timesteps    | 8500      |
| train/                |           |
|    entropy_loss       | -0.00096  |
|    explained_variance | -0.219    |
|    learning_rate      | 0.0007    |
|    n_updates          | 1699      |
|    policy_loss        | -2.05e-05 |
|    value_loss         | 0.0383    |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 136       |
|    iterations         | 1800      |
|    time_elapsed       | 65        |
|    total_timesteps    | 9000      |
| train/                |           |
|    entropy_loss       | -0.00197  |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 1799      |
|    policy_loss        | -2.16e-05 |
|    value_l

------------------------------------
| time/                 |          |
|    fps                | 172      |
|    iterations         | 3300     |
|    time_elapsed       | 95       |
|    total_timesteps    | 16500    |
| train/                |          |
|    entropy_loss       | -0.00052 |
|    explained_variance | -10.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3299     |
|    policy_loss        | 2.87e-06 |
|    value_loss         | 0.0513   |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 173      |
|    iterations         | 3400     |
|    time_elapsed       | 97       |
|    total_timesteps    | 17000    |
| train/                |          |
|    entropy_loss       | -0.00125 |
|    explained_variance | -1.45    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3399     |
|    policy_loss        | 8.85e-06 |
|    value_loss         | 0.00574  |
-

-------------------------------------
| time/                 |           |
|    fps                | 188       |
|    iterations         | 4900      |
|    time_elapsed       | 129       |
|    total_timesteps    | 24500     |
| train/                |           |
|    entropy_loss       | -0.000727 |
|    explained_variance | -2.38e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 4899      |
|    policy_loss        | 3.74e-06  |
|    value_loss         | 0.00361   |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 188       |
|    iterations         | 5000      |
|    time_elapsed       | 132       |
|    total_timesteps    | 25000     |
| train/                |           |
|    entropy_loss       | -0.000781 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 4999      |
|    policy_loss        | 3.46e-06  |
|    value_l

-------------------------------------
| time/                 |           |
|    fps                | 199       |
|    iterations         | 6500      |
|    time_elapsed       | 162       |
|    total_timesteps    | 32500     |
| train/                |           |
|    entropy_loss       | -0.000407 |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0007    |
|    n_updates          | 6499      |
|    policy_loss        | -4.67e-06 |
|    value_loss         | 0.0199    |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 200       |
|    iterations         | 6600      |
|    time_elapsed       | 164       |
|    total_timesteps    | 33000     |
| train/                |           |
|    entropy_loss       | -5.12e-05 |
|    explained_variance | -6.75     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6599      |
|    policy_loss        | 2.29e-08  |
|    value_l

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 212       |
|    iterations         | 7900      |
|    time_elapsed       | 186       |
|    total_timesteps    | 39500     |
| train/                |           |
|    entropy_loss       | -0.000378 |
|    explained_variance | 0.0236    |
|    learning_rate      | 0.0007    |
|    n_updates          | 7899      |
|    policy_loss        | -5.72e-06 |
|    value_loss         | 0.0319    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 213       |
|    iterations         | 8000      |
|    time_elapsed       | 187       |
|    total_timesteps    | 40000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 220       |
|    iterations         | 9200      |
|    time_elapsed       | 208       |
|    total_timesteps    | 46000     |
| train/                |           |
|    entropy_loss       | -0.00104  |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 9199      |
|    policy_loss        | 9.41e-06  |
|    value_loss         | 0.0103    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 221       |
|    iterations         | 9300      |
|    time_elapsed       | 209       |
|    total_timesteps    | 46500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 228       |
|    iterations         | 10500     |
|    time_elapsed       | 229       |
|    total_timesteps    | 52500     |
| train/                |           |
|    entropy_loss       | -3.19e-05 |
|    explained_variance | 0.739     |
|    learning_rate      | 0.0007    |
|    n_updates          | 10499     |
|    policy_loss        | -3.84e-08 |
|    value_loss         | 0.000516  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 228       |
|    iterations         | 10600     |
|    time_elapsed       | 231       |
|    total_timesteps    | 53000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 234       |
|    iterations         | 11800     |
|    time_elapsed       | 251       |
|    total_timesteps    | 59000     |
| train/                |           |
|    entropy_loss       | -0.00025  |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 11799     |
|    policy_loss        | -2.01e-06 |
|    value_loss         | 0.0113    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 235       |
|    iterations         | 11900     |
|    time_elapsed       | 252       |
|    total_timesteps    | 59500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 239       |
|    iterations         | 13100     |
|    time_elapsed       | 273       |
|    total_timesteps    | 65500     |
| train/                |           |
|    entropy_loss       | -0.000217 |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 13099     |
|    policy_loss        | 3.69e-07  |
|    value_loss         | 0.000506  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.44e+04 |
| time/                 |           |
|    fps                | 240       |
|    iterations         | 13200     |
|    time_elapsed       | 274       |
|    total_timesteps    | 66000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 245       |
|    iterations         | 14400     |
|    time_elapsed       | 293       |
|    total_timesteps    | 72000     |
| train/                |           |
|    entropy_loss       | -0.000218 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 14399     |
|    policy_loss        | 5.15e-07  |
|    value_loss         | 0.000984  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 245       |
|    iterations         | 14500     |
|    time_elapsed       | 295       |
|    total_timesteps    | 72500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 15700     |
|    time_elapsed       | 315       |
|    total_timesteps    | 78500     |
| train/                |           |
|    entropy_loss       | -0.000147 |
|    explained_variance | 0.0799    |
|    learning_rate      | 0.0007    |
|    n_updates          | 15699     |
|    policy_loss        | -6.5e-07  |
|    value_loss         | 0.0313    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 15800     |
|    time_elapsed       | 316       |
|    total_timesteps    | 79000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 17000     |
|    time_elapsed       | 336       |
|    total_timesteps    | 85000     |
| train/                |           |
|    entropy_loss       | -0.000291 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 16999     |
|    policy_loss        | 1.88e-06  |
|    value_loss         | 0.00692   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 17100     |
|    time_elapsed       | 337       |
|    total_timesteps    | 85500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 18300     |
|    time_elapsed       | 356       |
|    total_timesteps    | 91500     |
| train/                |           |
|    entropy_loss       | -0.000247 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 18299     |
|    policy_loss        | 1.3e-06   |
|    value_loss         | 0.0047    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 256       |
|    iterations         | 18400     |
|    time_elapsed       | 358       |
|    total_timesteps    | 92000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 259       |
|    iterations         | 19600     |
|    time_elapsed       | 377       |
|    total_timesteps    | 98000     |
| train/                |           |
|    entropy_loss       | -9.91e-05 |
|    explained_variance | -2.89     |
|    learning_rate      | 0.0007    |
|    n_updates          | 19599     |
|    policy_loss        | 3.75e-07  |
|    value_loss         | 0.0013    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 259       |
|    iterations         | 19700     |
|    time_elapsed       | 379       |
|    total_timesteps    | 98500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 261       |
|    iterations         | 20900     |
|    time_elapsed       | 398       |
|    total_timesteps    | 104500    |
| train/                |           |
|    entropy_loss       | -0.000206 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 20899     |
|    policy_loss        | -2.34e-06 |
|    value_loss         | 0.0216    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 262       |
|    iterations         | 21000     |
|    time_elapsed       | 400       |
|    total_timesteps    | 105000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 22200     |
|    time_elapsed       | 420       |
|    total_timesteps    | 111000    |
| train/                |           |
|    entropy_loss       | -0.000242 |
|    explained_variance | 1.19e-07  |
|    learning_rate      | 0.0007    |
|    n_updates          | 22199     |
|    policy_loss        | 1.43e-06  |
|    value_loss         | 0.00588   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 22300     |
|    time_elapsed       | 421       |
|    total_timesteps    | 111500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 23500     |
|    time_elapsed       | 441       |
|    total_timesteps    | 117500    |
| train/                |           |
|    entropy_loss       | -0.000266 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 23499     |
|    policy_loss        | 1.73e-06  |
|    value_loss         | 0.00659   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 23600     |
|    time_elapsed       | 443       |
|    total_timesteps    | 118000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 265       |
|    iterations         | 24800     |
|    time_elapsed       | 466       |
|    total_timesteps    | 124000    |
| train/                |           |
|    entropy_loss       | -0.000217 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 24799     |
|    policy_loss        | -1.47e-06 |
|    value_loss         | 0.00805   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 265       |
|    iterations         | 24900     |
|    time_elapsed       | 468       |
|    total_timesteps    | 124500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 26100     |
|    time_elapsed       | 489       |
|    total_timesteps    | 130500    |
| train/                |           |
|    entropy_loss       | -5.34e-05 |
|    explained_variance | 0.502     |
|    learning_rate      | 0.0007    |
|    n_updates          | 26099     |
|    policy_loss        | 2.36e-08  |
|    value_loss         | 0.000113  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 26200     |
|    time_elapsed       | 491       |
|    total_timesteps    | 131000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 27400     |
|    time_elapsed       | 513       |
|    total_timesteps    | 137000    |
| train/                |           |
|    entropy_loss       | -0.000188 |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 27399     |
|    policy_loss        | 4.25e-07  |
|    value_loss         | 0.000887  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 27500     |
|    time_elapsed       | 515       |
|    total_timesteps    | 137500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 28700     |
|    time_elapsed       | 537       |
|    total_timesteps    | 143500    |
| train/                |           |
|    entropy_loss       | -6.77e-05 |
|    explained_variance | -0.311    |
|    learning_rate      | 0.0007    |
|    n_updates          | 28699     |
|    policy_loss        | -1.98e-06 |
|    value_loss         | 0.104     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 28800     |
|    time_elapsed       | 539       |
|    total_timesteps    | 144000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 30000     |
|    time_elapsed       | 559       |
|    total_timesteps    | 150000    |
| train/                |           |
|    entropy_loss       | -6.51e-05 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 29999     |
|    policy_loss        | -4.52e-07 |
|    value_loss         | 0.0109    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 30100     |
|    time_elapsed       | 561       |
|    total_timesteps    | 150500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 31300     |
|    time_elapsed       | 583       |
|    total_timesteps    | 156500    |
| train/                |           |
|    entropy_loss       | -6.52e-05 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 31299     |
|    policy_loss        | 3.06e-07  |
|    value_loss         | 0.005     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 31400     |
|    time_elapsed       | 585       |
|    total_timesteps    | 157000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 32600     |
|    time_elapsed       | 608       |
|    total_timesteps    | 163000    |
| train/                |           |
|    entropy_loss       | -2e-05    |
|    explained_variance | -0.000565 |
|    learning_rate      | 0.0007    |
|    n_updates          | 32599     |
|    policy_loss        | 4.38e-08  |
|    value_loss         | 0.00135   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 32700     |
|    time_elapsed       | 610       |
|    total_timesteps    | 163500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 33900     |
|    time_elapsed       | 632       |
|    total_timesteps    | 169500    |
| train/                |           |
|    entropy_loss       | -3e-05    |
|    explained_variance | 0.477     |
|    learning_rate      | 0.0007    |
|    n_updates          | 33899     |
|    policy_loss        | -8.61e-08 |
|    value_loss         | 0.00122   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 34000     |
|    time_elapsed       | 634       |
|    total_timesteps    | 170000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 35200     |
|    time_elapsed       | 656       |
|    total_timesteps    | 176000    |
| train/                |           |
|    entropy_loss       | -4.3e-05  |
|    explained_variance | 0.236     |
|    learning_rate      | 0.0007    |
|    n_updates          | 35199     |
|    policy_loss        | 1.89e-07  |
|    value_loss         | 0.0029    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 268       |
|    iterations         | 35300     |
|    time_elapsed       | 658       |
|    total_timesteps    | 176500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 36500     |
|    time_elapsed       | 682       |
|    total_timesteps    | 182500    |
| train/                |           |
|    entropy_loss       | -4.43e-05 |
|    explained_variance | -1.08     |
|    learning_rate      | 0.0007    |
|    n_updates          | 36499     |
|    policy_loss        | 2.28e-07  |
|    value_loss         | 0.0129    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 267       |
|    iterations         | 36600     |
|    time_elapsed       | 684       |
|    total_timesteps    | 183000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 37800     |
|    time_elapsed       | 710       |
|    total_timesteps    | 189000    |
| train/                |           |
|    entropy_loss       | -3.01e-05 |
|    explained_variance | 0.488     |
|    learning_rate      | 0.0007    |
|    n_updates          | 37799     |
|    policy_loss        | -9.92e-08 |
|    value_loss         | 0.0118    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 266       |
|    iterations         | 37900     |
|    time_elapsed       | 712       |
|    total_timesteps    | 189500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 265       |
|    iterations         | 39100     |
|    time_elapsed       | 736       |
|    total_timesteps    | 195500    |
| train/                |           |
|    entropy_loss       | -2.97e-05 |
|    explained_variance | -0.451    |
|    learning_rate      | 0.0007    |
|    n_updates          | 39099     |
|    policy_loss        | -1.05e-07 |
|    value_loss         | 0.00364   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 265       |
|    iterations         | 39200     |
|    time_elapsed       | 738       |
|    total_timesteps    | 196000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 40400     |
|    time_elapsed       | 762       |
|    total_timesteps    | 202000    |
| train/                |           |
|    entropy_loss       | -6.07e-05 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 40399     |
|    policy_loss        | 1.15e-07  |
|    value_loss         | 0.000874  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 40500     |
|    time_elapsed       | 764       |
|    total_timesteps    | 202500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 41700     |
|    time_elapsed       | 788       |
|    total_timesteps    | 208500    |
| train/                |           |
|    entropy_loss       | -6.29e-05 |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 41699     |
|    policy_loss        | -1.57e-07 |
|    value_loss         | 0.0019    |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 3.32e+04  |
|    ep_rew_mean        | -1.43e+04 |
| time/                 |           |
|    fps                | 264       |
|    iterations         | 41800     |
|    time_elapsed       | 790       |
|    total_timesteps    | 209000    |
| train/    

In [None]:
### Through this we evaluate our policy and observe we evaluation shall be proceeded with what level.
from stable_baselines3.common.evaluation import evaluate_policy
evaluate_policy(model, env, n_eval_episodes=20)

In [None]:
A2C_path = os.path.join("Training","Saved Models","A2C_Model_MultiInputPolicy_long_data")

In [None]:
model.save(PPO_path)