In [6]:
import gym
import numpy as np
from stable_baselines3 import A2C, DDPG, SAC, PPO
from matplotlib import axis
import numpy as np
from floris.tools import FlorisInterface
from floris.tools.visualization import visualize_cut_plane
import matplotlib.pyplot as plt
from floris.tools.optimization.yaw_optimization.yaw_optimizer_sr import YawOptimizationSR

from gym import spaces

from stable_baselines3.common.env_checker import check_env
import os
import time
from tqdm import tqdm_notebook
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import BaseCallback

from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed

from matplotlib.pyplot import figure

#loading optimization package:
from scipy.optimize import minimize

from floris.tools.optimization.yaw_optimization.yaw_optimizer_scipy import (
    YawOptimizationScipy
)

from time import perf_counter as timerpc

import yaml
import math
from scipy import interpolate
import random

In [39]:
class CustomEnv(gym.Env):
    """
    The inputs are:
    t_steps     = number of timesteps pr simulation
    dist        ª= rotor diameters between the turbines
    nx          = number of turbines along x axis
    ny          = number of turbines along y axis
    turb_type   = The type of turbine used for the environment
    combination = The combination model
    deflection  = The deflection model
    turbulence  = The turbulence model
    velocity    = The wake velocity model
    VS_min      = minimum wind speed [m/s]
    VS_max      = maximum wind speed [m/s]
    TI_min      = minimum turbulence intensity
    TI_max      = maximum turbulence intensity
    wd_min      = minimum wind direction
    wd_max      = maximum wind direction
    yaw_max     = Is the maximum yaw offset allowed in degrees.
    
    """
    #Custom Environment that follows gym interface
    metadata = {'render.modes': ['human']}

    def __init__(self, t_steps = 10, dist = 5, nx = 3, ny = 3,
               turb_type = 'nrel_5MW', combination = 'sosfs', deflection = 'gauss',
               turbulence = 'crespo_hernandez', velocity = 'gauss',
               VS_min = 4, VS_max = 20, TI_min = 0.01, TI_max = 0.15, wd_min = 270, wd_max = 360, 
               yaw_max = 25, rho = 1.225, seed = 0):
        super(CustomEnv, self).__init__()
        
        random.seed(seed)
        np.random.seed(seed)
        
        self.t_max = t_steps   #The number of "simulations" pr episode.
        self.wind_speed_min = VS_min
        self.wind_speed_max = VS_max
        self.TI_min         = TI_min
        self.TI_max         = TI_max
        self.wd_min         = wd_min
        self.wd_max         = wd_max
        self.n_turb         = nx * ny
        self.yaw_max        = yaw_max
        self.rho            = rho
                     
        #Creates the base for the farm
        fi = FlorisInterface("gch.yaml")   
        
        #Turns it into a dictionary and then does the changes to the model
        fi_dict = fi.floris.as_dict()
        
        fi_dict["farm"]["turbine_type"] = [turb_type]
        fi_dict["wake"]["model_strings"]["combination_model"] = combination
        fi_dict["wake"]["model_strings"]["deflection_model"]  = deflection
        fi_dict["wake"]["model_strings"]["turbulence_model"]  = turbulence
        fi_dict["wake"]["model_strings"]["velocity_model"]    = velocity
        fi_dict["flow_field"]["air_density"]                  = rho
        
        # Turns it back into a floris object:
        self.fi = FlorisInterface(fi_dict)

        D = self.fi.floris.farm.rotor_diameters[0]

        x = np.linspace(0, D*dist*nx, nx)
        y = np.linspace(0, D*dist*ny, ny)

        xv, yv = np.meshgrid(x, y, indexing='xy')
        
        self.layout_x =  xv.flatten()
        self.layout_y = yv.flatten()
        
        #Reads and saves the power curve for one turbine:
        
        with open(turb_type+".yaml", 'r') as stream:
            try:
                parsed_yaml=yaml.safe_load(stream)
                #print(parsed_yaml)
            except yaml.YAMLError as exc:
                print(exc)

        ws_curve = parsed_yaml["power_thrust_table"]["wind_speed"]
        power_curve = parsed_yaml["power_thrust_table"]["power"]
        
        self.A = 3.14 * (D/2)**2
        self.power_curve = interpolate.interp1d(ws_curve, power_curve)
        

        # Define action and observation space
        
        # The actionspace is the 9 yaw angles.
        self.action_space = spaces.Box(low=-1, high=1,
                                            shape=(nx*ny,), dtype=np.float32)
        
        # The observationspace is WS, TI, WD:
        high = np.array([self.wind_speed_max, self.TI_max, self.wd_max], dtype = np.float32)
        low = np.array([self.wind_speed_min, self.TI_min, self.wd_min], dtype = np.float32)
        
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.reset()
        
    def step(self, action):
        #print("we did a step")
        if self.time > self.t_max:
            done =  True
        else:
            self.time += 1
            done = False
            
        self.fi.calculate_wake(yaw_angles=np.array([[action]]))  #weird format, but it's okay
        
        power_farm = self.fi.get_farm_power()
        
        #Choose if you want ideal farm, or greedy farm for reward calculation.
        #rew = self.fi.get_farm_power()/self.power_ideal_farm
        rew = self.fi.get_farm_power()/self.power_greey_farm
        
        reward = (rew[0][0]-1)*1000   
            
        info = {}
        observation = np.array([self.ws, self.TI, self.wd], dtype = np.float32)
        return observation, reward, done, info
    
    def reset(self):
        
        #
        self.ws = np.random.uniform(self.wind_speed_min, self.wind_speed_max)
        self.wd = np.random.uniform(self.wd_min, self.wd_max)
        self.TI = np.random.uniform(self.TI_min, self.TI_max)
        

        
        self.fi.reinitialize(
            layout=(self.layout_x, self.layout_y),
            wind_directions=[self.wd],
            turbulence_intensity= self.TI,
            wind_speeds=[self.ws]
            )
        
        
        #Calculate greedy power. Used for normalization
        self.fi.calculate_wake()
        self.power_greey_farm = self.fi.get_farm_power()
        
        #calculate the power for the ideal farm. Used for normalization
        self.power_ideal_farm = self.power_curve(self.ws)*self.n_turb* (1/2) * self.rho * self.A * self.ws**3
        
        self.time = 0
        done = False
        
        observation = np.array([self.ws, self.TI, self.wd], dtype = np.float32)
        return observation  # reward, done, info can't be included
    
    def render(self, mode='human'):
        pass
    
    def close (self):
        pass

In [40]:
model_name = "PPO3"
SEED = 0

In [88]:
env = CustomEnv(t_steps = 10, dist = 5, nx = 3, ny = 3,
               turb_type = 'nrel_5MW', combination = 'sosfs', deflection = 'gauss',
               turbulence = 'crespo_hernandez', velocity = 'gauss',
               VS_min = 4, VS_max = 15, TI_min = 0.07, TI_max = 0.07, wd_min = 270, wd_max = 315, 
               yaw_max = 25, rho = 1.225, seed = SEED)
check_env(env)

In [89]:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log='logs', seed = SEED)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [90]:
models_dir = "models/"+model_name
log_dir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [91]:
TIMESTEPS = 20_000

for i in range(1,50):
    model.learn(total_timesteps = TIMESTEPS, reset_num_timesteps = False, tb_log_name=model_name)
    model.save(f"{models_dir}/{TIMESTEPS*i}")


Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | -0.554   |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | -0.638      |
| time/                   |             |
|    fps                  | 72          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010051884 |
|    clip_fraction        | 0.156       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.8       |
|    explained_variance   | 0.00356     |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 0.254    |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 22528    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 3.52        |
| time/                   |             |
|    fps                  | 72          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.014316825 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.5       |
|    explained_variance   | 0.00626     |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 2.79     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 43008    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 3.43        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.009394087 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.4       |
|    explained_variance   | 0.126       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 2.11     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 63488    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 4.85        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.007948115 |
|    clip_fraction        | 0.0991      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.3       |
|    explained_variance   | 0.305       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 2.47     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 83968    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 7.25        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.011325892 |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.3       |
|    explained_variance   | 0.371       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | -0.0118  |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 104448   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 2.71        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 106496      |
| train/                  |             |
|    approx_kl            | 0.012009031 |
|    clip_fraction        | 0.264       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.2       |
|    explained_variance   | 0.482       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 5.87     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 124928   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 6.97        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.008603991 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12         |
|    explained_variance   | 0.435       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.93     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 145408   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 3.95        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 147456      |
| train/                  |             |
|    approx_kl            | 0.006368897 |
|    clip_fraction        | 0.0928      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12         |
|    explained_variance   | 0.46        |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 6.74     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 165888   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 5.58        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 167936      |
| train/                  |             |
|    approx_kl            | 0.008200708 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.9       |
|    explained_variance   | 0.467       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.64     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 186368   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 4.18        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.011577595 |
|    clip_fraction        | 0.0865      |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.9       |
|    explained_variance   | 0.524       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.28     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 206848   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.783       |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 208896      |
| train/                  |             |
|    approx_kl            | 0.022028815 |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.9       |
|    explained_variance   | 0.536       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 3.44     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 227328   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 5.63        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 229376      |
| train/                  |             |
|    approx_kl            | 0.011580983 |
|    clip_fraction        | 0.369       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.9       |
|    explained_variance   | 0.532       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 6.8      |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 247808   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 6.26         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 55           |
|    total_timesteps      | 249856       |
| train/                  |              |
|    approx_kl            | 0.0060607754 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.9        |
|    explained_variance   | 0.549  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 2.95     |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 268288   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 8.13         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 56           |
|    total_timesteps      | 270336       |
| train/                  |              |
|    approx_kl            | 0.0032132957 |
|    clip_fraction        | 0.1          |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.8        |
|    explained_variance   | 0.339  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 3.67     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 288768   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 4.99        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 290816      |
| train/                  |             |
|    approx_kl            | 0.025329255 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.8       |
|    explained_variance   | 0.511       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 2.65     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 309248   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 4.31        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 311296      |
| train/                  |             |
|    approx_kl            | 0.012296317 |
|    clip_fraction        | 0.0851      |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.8       |
|    explained_variance   | 0.559       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.87     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 329728   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 9           |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 331776      |
| train/                  |             |
|    approx_kl            | 0.008401541 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.7       |
|    explained_variance   | 0.501       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 5.21     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 350208   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 1.87         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 56           |
|    total_timesteps      | 352256       |
| train/                  |              |
|    approx_kl            | 0.0035662372 |
|    clip_fraction        | 0.0378       |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.7        |
|    explained_variance   | 0.578  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 11       |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 370688   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 6.9         |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 372736      |
| train/                  |             |
|    approx_kl            | 0.044655487 |
|    clip_fraction        | 0.31        |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.7       |
|    explained_variance   | 0.642       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 3.36     |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 391168   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 4.02         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 56           |
|    total_timesteps      | 393216       |
| train/                  |              |
|    approx_kl            | 0.0071835285 |
|    clip_fraction        | 0.195        |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.6        |
|    explained_variance   | 0.563  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.07     |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 411648   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 5.55        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 413696      |
| train/                  |             |
|    approx_kl            | 0.058552857 |
|    clip_fraction        | 0.148       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.6       |
|    explained_variance   | 0.589       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.3      |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 432128   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 12.6        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 434176      |
| train/                  |             |
|    approx_kl            | 0.004306907 |
|    clip_fraction        | 0.16        |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.6       |
|    explained_variance   | 0.643       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.89     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 452608   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 3.37       |
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 2          |
|    time_elapsed         | 55         |
|    total_timesteps      | 454656     |
| train/                  |            |
|    approx_kl            | 0.00798166 |
|    clip_fraction        | 0.417      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.6      |
|    explained_variance   | 0.603      |
|    learning_rate    

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.38     |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 473088   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 5.1         |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 475136      |
| train/                  |             |
|    approx_kl            | 0.018362166 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.6       |
|    explained_variance   | 0.633       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.94     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 493568   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 7.41        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 495616      |
| train/                  |             |
|    approx_kl            | 0.009317918 |
|    clip_fraction        | 0.0586      |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.6       |
|    explained_variance   | 0.645       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 6.62     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 514048   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 2.63        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 516096      |
| train/                  |             |
|    approx_kl            | 0.006842047 |
|    clip_fraction        | 0.123       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.5       |
|    explained_variance   | 0.603       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.74     |
| time/              |          |
|    fps             | 73       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 534528   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 6.49       |
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 2          |
|    time_elapsed         | 55         |
|    total_timesteps      | 536576     |
| train/                  |            |
|    approx_kl            | 0.07933606 |
|    clip_fraction        | 0.193      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.4      |
|    explained_variance   | 0.594      |
|    learning_rate    

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.03     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 555008   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 6.31        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 557056      |
| train/                  |             |
|    approx_kl            | 0.009858338 |
|    clip_fraction        | 0.0365      |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.5       |
|    explained_variance   | 0.662       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 5.54     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 575488   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 9.28         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 56           |
|    total_timesteps      | 577536       |
| train/                  |              |
|    approx_kl            | 0.0124235265 |
|    clip_fraction        | 0.258        |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.5        |
|    explained_variance   | 0.581  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 10.5     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 595968   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 8.7         |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 598016      |
| train/                  |             |
|    approx_kl            | 0.017341848 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.4       |
|    explained_variance   | 0.613       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.94     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 616448   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 4.58        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 618496      |
| train/                  |             |
|    approx_kl            | 0.013611092 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.4       |
|    explained_variance   | 0.676       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 7.46     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 636928   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 3.42        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 638976      |
| train/                  |             |
|    approx_kl            | 0.022918703 |
|    clip_fraction        | 0.087       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.4       |
|    explained_variance   | 0.614       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 7.73     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 657408   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 6.73        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 659456      |
| train/                  |             |
|    approx_kl            | 0.035347015 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.4       |
|    explained_variance   | 0.617       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.9      |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 677888   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 12.4         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 55           |
|    total_timesteps      | 679936       |
| train/                  |              |
|    approx_kl            | 0.0062564276 |
|    clip_fraction        | 0.0424       |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.4        |
|    explained_variance   | 0.638  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.26     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 698368   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 4.11         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 55           |
|    total_timesteps      | 700416       |
| train/                  |              |
|    approx_kl            | 0.0049410323 |
|    clip_fraction        | 0.2          |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.3        |
|    explained_variance   | 0.598  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 5.95     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 718848   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 8.5        |
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 2          |
|    time_elapsed         | 55         |
|    total_timesteps      | 720896     |
| train/                  |            |
|    approx_kl            | 0.01878695 |
|    clip_fraction        | 0.288      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.3      |
|    explained_variance   | 0.631      |
|    learning_rate    

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 6.77     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 739328   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 6.75         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 2            |
|    time_elapsed         | 55           |
|    total_timesteps      | 741376       |
| train/                  |              |
|    approx_kl            | 0.0038586375 |
|    clip_fraction        | 0.0943       |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.3        |
|    explained_variance   | 0.632  

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.39     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 759808   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 5.16       |
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 2          |
|    time_elapsed         | 56         |
|    total_timesteps      | 761856     |
| train/                  |            |
|    approx_kl            | 0.10571469 |
|    clip_fraction        | 0.683      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.3      |
|    explained_variance   | 0.492      |
|    learning_rate    

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 4.6      |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 780288   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 4.16        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 782336      |
| train/                  |             |
|    approx_kl            | 0.005531242 |
|    clip_fraction        | 0.0438      |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.3       |
|    explained_variance   | 0.662       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 6.04     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 800768   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 6.86       |
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 2          |
|    time_elapsed         | 56         |
|    total_timesteps      | 802816     |
| train/                  |            |
|    approx_kl            | 0.00818242 |
|    clip_fraction        | 0.095      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.3      |
|    explained_variance   | 0.608      |
|    learning_rate    

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 2.51     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 821248   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 11          |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 823296      |
| train/                  |             |
|    approx_kl            | 0.007028637 |
|    clip_fraction        | 0.122       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.3       |
|    explained_variance   | 0.617       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.47     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 841728   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 11.6        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 55          |
|    total_timesteps      | 843776      |
| train/                  |             |
|    approx_kl            | 0.021128725 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.3       |
|    explained_variance   | 0.659       |
|    le

Logging to logs/PPO3_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 8.94     |
| time/              |          |
|    fps             | 74       |
|    iterations      | 1        |
|    time_elapsed    | 27       |
|    total_timesteps | 862208   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 9.52        |
| time/                   |             |
|    fps                  | 73          |
|    iterations           | 2           |
|    time_elapsed         | 56          |
|    total_timesteps      | 864256      |
| train/                  |             |
|    approx_kl            | 0.004649082 |
|    clip_fraction        | 0.0814      |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.3       |
|    explained_variance   | 0.589       |
|    le

KeyboardInterrupt: 

In [None]:
for _ in range(10):
    obs = env.reset()
    done = False
    while not done:
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())

In [92]:
obs = env.reset()

In [103]:
obs, reward, done, info = env.step(env.action_space.sample())
print(obs)
print(reward)

[1.4739102e+01 7.0000000e-02 2.8011847e+02]
2.8876856461579337e-07
