In [1]:
import gym
import numpy as np
from stable_baselines3 import A2C, DDPG, SAC, PPO
from matplotlib import axis
import numpy as np
from floris.tools import FlorisInterface
from floris.tools.visualization import visualize_cut_plane
import matplotlib.pyplot as plt
from floris.tools.optimization.yaw_optimization.yaw_optimizer_sr import YawOptimizationSR

from gym import spaces

from stable_baselines3.common.env_checker import check_env
import os
import time
from tqdm import tqdm_notebook
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import BaseCallback

from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed

from matplotlib.pyplot import figure

#loading optimization package:
from scipy.optimize import minimize

from floris.tools.optimization.yaw_optimization.yaw_optimizer_scipy import (
    YawOptimizationScipy
)

from time import perf_counter as timerpc

import yaml
import math
from scipy import interpolate


In [86]:
class CustomEnv(gym.Env):
    """
    The inputs are:
    t_steps     = number of timesteps pr simulation
    dist        ª= rotor diameters between the turbines
    nx          = number of turbines along x axis
    ny          = number of turbines along y axis
    turb_type   = The type of turbine used for the environment
    combination = The combination model
    deflection  = The deflection model
    turbulence  = The turbulence model
    velocity    = The wake velocity model
    VS_min      = minimum wind speed [m/s]
    VS_max      = maximum wind speed [m/s]
    TI_min      = minimum turbulence intensity
    TI_max      = maximum turbulence intensity
    wd_min      = minimum wind direction
    wd_max      = maximum wind direction
    yaw_max     = Is the maximum yaw offset allowed in degrees.
    
    """
    #Custom Environment that follows gym interface
    metadata = {'render.modes': ['human']}

    def __init__(self, t_steps = 10, dist = 5, nx = 3, ny = 3,
               turb_type = 'nrel_5MW', combination = 'sosfs', deflection = 'gauss',
               turbulence = 'crespo_hernandez', velocity = 'gauss',
               VS_min = 4, VS_max = 20, TI_min = 0.01, TI_max = 0.15, wd_min = 270, wd_max = 360, 
               yaw_max = 25, rho = 1.225):
        super(CustomEnv, self).__init__()
        
        self.t_max = t_steps   #The number of "simulations" pr episode.
        self.wind_speed_min = VS_min
        self.wind_speed_max = VS_max
        self.TI_min         = TI_min
        self.TI_max         = TI_max
        self.wd_min         = wd_min
        self.wd_max         = wd_max
        self.n_turb         = nx * ny
        self.yaw_max        = yaw_max
        self.rho            = rho
                     
        #Creates the base for the farm
        fi = FlorisInterface("gch.yaml")   
        
        #Turns it into a dictionary and then does the changes to the model
        fi_dict = fi.floris.as_dict()
        
        fi_dict["farm"]["turbine_type"] = [turb_type]
        fi_dict["wake"]["model_strings"]["combination_model"] = combination
        fi_dict["wake"]["model_strings"]["deflection_model"]  = deflection
        fi_dict["wake"]["model_strings"]["turbulence_model"]  = turbulence
        fi_dict["wake"]["model_strings"]["velocity_model"]    = velocity
        fi_dict["flow_field"]["air_density"]                  = rho
        
        # Turns it back into a floris object:
        self.fi = FlorisInterface(fi_dict)

        D = self.fi.floris.farm.rotor_diameters[0]

        x = np.linspace(0, D*dist*nx, nx)
        y = np.linspace(0, D*dist*ny, ny)

        xv, yv = np.meshgrid(x, y, indexing='xy')
        
        self.layout_x =  xv.flatten()
        self.layout_y = yv.flatten()
        
        #Reads and saves the power curve for one turbine:
        
        with open(turb_type+".yaml", 'r') as stream:
            try:
                parsed_yaml=yaml.safe_load(stream)
                #print(parsed_yaml)
            except yaml.YAMLError as exc:
                print(exc)

        ws_curve = parsed_yaml["power_thrust_table"]["wind_speed"]
        power_curve = parsed_yaml["power_thrust_table"]["power"]
        
        self.A = 3.14 * (D/2)**2
        self.power_curve = interpolate.interp1d(ws_curve, power_curve)
        

        # Define action and observation space
        
        # The actionspace is the 9 yaw angles.
        self.action_space = spaces.Box(low=-1, high=1,
                                            shape=(nx*ny,), dtype=np.float32)
        
        # The observationspace is WS, TI, WD:
        high = np.array([self.wind_speed_max, self.TI_max, self.wd_max], dtype = np.float32)
        low = np.array([self.wind_speed_min, self.TI_min, self.wd_min], dtype = np.float32)
        
        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        self.reset()
        
    def step(self, action):
        #print("we did a step")
        if self.time > self.t_max:
            done =  True
        else:
            self.time += 1
            done = False
            
        self.fi.calculate_wake(yaw_angles=np.array([[action]]))  #weird format, but it's okay
        
        power_farm = self.fi.get_farm_power()
        
        #Choose if you want ideal farm, or greedy farm for reward calculation.
        #rew = self.fi.get_farm_power()/self.power_ideal_farm
        rew = self.fi.get_farm_power()/self.power_greey_farm
        
        reward = (rew[0][0]-1)*100   #We do this to only return a scalar insted of a array. It maybe doenst like that
            
        info = {}
        observation = np.array([self.ws, self.TI, self.wd], dtype = np.float32)
        return observation, reward, done, info
    
    def reset(self):
        
        #
        self.ws = np.random.uniform(self.wind_speed_min, self.wind_speed_max)
        self.wd = np.random.uniform(self.wd_min, self.wd_max)
        self.TI = np.random.uniform(self.TI_min, self.TI_max)
        

        
        self.fi.reinitialize(
            layout=(self.layout_x, self.layout_y),
            wind_directions=[self.wd],
            turbulence_intensity= self.TI,
            wind_speeds=[self.ws]
            )
        
        
        #Calculate greedy power. Used for normalization
        self.fi.calculate_wake()
        self.power_greey_farm = self.fi.get_farm_power()
        
        #calculate the power for the ideal farm. Used for normalization
        self.power_ideal_farm = self.power_curve(self.ws)*self.n_turb* (1/2) * self.rho * self.A * self.ws**3
        
        self.time = 0
        done = False
        
        observation = np.array([self.ws, self.TI, self.wd], dtype = np.float32)
        return observation  # reward, done, info can't be included
    
    def render(self, mode='human'):
        pass
    
    def close (self):
        pass

In [87]:
env = CustomEnv(t_steps = 10, dist = 5, nx = 3, ny = 3,
               turb_type = 'nrel_5MW', combination = 'sosfs', deflection = 'gauss',
               turbulence = 'crespo_hernandez', velocity = 'gauss',
               VS_min = 4, VS_max = 20, TI_min = 0.01, TI_max = 0.15, wd_min = 270, wd_max = 270, 
               yaw_max = 25, rho = 1.225)

In [88]:
check_env(env)

In [89]:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log='logs')

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [90]:
models_dir = "models/PPO"
log_dir = "logs"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [None]:
TIMESTEPS = 50_000

for i in range(1,100):
    model.learn(total_timesteps = TIMESTEPS, reset_num_timesteps = False, tb_log_name="PPO1")
    model.save(f"{models_dir}/{TIMESTEPS*i}")


Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 0.2      |
| time/              |          |
|    fps             | 77       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.102       |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 2           |
|    time_elapsed         | 53          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009116381 |
|    clip_fraction        | 0.0973      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.8       |
|    explained_variance   | -0.257      |
|    le

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.362       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 11          |
|    time_elapsed         | 296         |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.008742597 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.7       |
|    explained_variance   | 0.143       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0808      |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00527    |
|    std                  | 0.988       |
|    value_loss           | 0.246       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.567       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 20          |
|    time_elapsed         | 540         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.010438452 |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.5       |
|    explained_variance   | 0.362       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.138       |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00256    |
|    std                  | 0.971       |
|    value_loss           | 0.257       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.772       |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 5           |
|    time_elapsed         | 134         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.006029619 |
|    clip_fraction        | 0.0908      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.4       |
|    explained_variance   | 0.557       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.437       |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.00257    |
|    std                  | 0.957       |
|    value_loss           | 0.319       |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.974       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 14          |
|    time_elapsed         | 377         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.014934105 |
|    clip_fraction        | 0.187       |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.2       |
|    explained_variance   | 0.621       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.199       |
|    n_updates            | 380         |
|    policy_gradient_loss | 0.000206    |
|    std                  | 0.943       |
|    value_loss           | 0.348       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.935       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 23          |
|    time_elapsed         | 621         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.010010951 |
|    clip_fraction        | 0.19        |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.2       |
|    explained_variance   | 0.658       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.18        |
|    n_updates            | 470         |
|    policy_gradient_loss | 0.00125     |
|    std                  | 0.944       |
|    value_loss           | 0.34        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.904       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 8           |
|    time_elapsed         | 215         |
|    total_timesteps      | 118784      |
| train/                  |             |
|    approx_kl            | 0.013478862 |
|    clip_fraction        | 0.18        |
|    clip_range           | 0.2         |
|    entropy_loss         | -12         |
|    explained_variance   | 0.623       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.306       |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.00327    |
|    std                  | 0.919       |
|    value_loss           | 0.446       |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.18        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 17          |
|    time_elapsed         | 458         |
|    total_timesteps      | 137216      |
| train/                  |             |
|    approx_kl            | 0.011156892 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.8       |
|    explained_variance   | 0.653       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.474       |
|    n_updates            | 660         |
|    policy_gradient_loss | 0.00175     |
|    std                  | 0.897       |
|    value_loss           | 0.385       |
-----------------------------------------
------------------------------------------
| rollout/                |      

Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.12     |
| time/              |          |
|    fps             | 77       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 155648   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 0.835      |
| time/                   |            |
|    fps                  | 74         |
|    iterations           | 2          |
|    time_elapsed         | 54         |
|    total_timesteps      | 157696     |
| train/                  |            |
|    approx_kl            | 0.01026748 |
|    clip_fraction        | 0.224      |
|    clip_range           | 0.2        |
|    entropy_loss         | -11.6      |
|    explained_variance   | 0.67       |
|    learning_rate    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.882       |
| time/                   |             |
|    fps                  | 74          |
|    iterations           | 11          |
|    time_elapsed         | 304         |
|    total_timesteps      | 176128      |
| train/                  |             |
|    approx_kl            | 0.027533412 |
|    clip_fraction        | 0.153       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.4       |
|    explained_variance   | 0.655       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.403       |
|    n_updates            | 850         |
|    policy_gradient_loss | -5.86e-05   |
|    std                  | 0.855       |
|    value_loss           | 0.507       |
-----------------------------------------
--------------------------------------
| rollout/                |          

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 1.19         |
| time/                   |              |
|    fps                  | 73           |
|    iterations           | 20           |
|    time_elapsed         | 554          |
|    total_timesteps      | 194560       |
| train/                  |              |
|    approx_kl            | 0.0094521195 |
|    clip_fraction        | 0.191        |
|    clip_range           | 0.2          |
|    entropy_loss         | -11.2        |
|    explained_variance   | 0.656        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.242        |
|    n_updates            | 940          |
|    policy_gradient_loss | -0.00194     |
|    std                  | 0.842        |
|    value_loss           | 0.622        |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.22        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 5           |
|    time_elapsed         | 135         |
|    total_timesteps      | 215040      |
| train/                  |             |
|    approx_kl            | 0.022070944 |
|    clip_fraction        | 0.287       |
|    clip_range           | 0.2         |
|    entropy_loss         | -11.1       |
|    explained_variance   | 0.656       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.315       |
|    n_updates            | 1040        |
|    policy_gradient_loss | 0.00873     |
|    std                  | 0.832       |
|    value_loss           | 0.402       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.15        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 14          |
|    time_elapsed         | 378         |
|    total_timesteps      | 233472      |
| train/                  |             |
|    approx_kl            | 0.010947312 |
|    clip_fraction        | 0.28        |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.8       |
|    explained_variance   | 0.681       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.302       |
|    n_updates            | 1130        |
|    policy_gradient_loss | 0.00477     |
|    std                  | 0.812       |
|    value_loss           | 0.535       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.21        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 23          |
|    time_elapsed         | 621         |
|    total_timesteps      | 251904      |
| train/                  |             |
|    approx_kl            | 0.013456289 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.7       |
|    explained_variance   | 0.641       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.332       |
|    n_updates            | 1220        |
|    policy_gradient_loss | 0.00105     |
|    std                  | 0.805       |
|    value_loss           | 0.766       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.08        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 8           |
|    time_elapsed         | 215         |
|    total_timesteps      | 272384      |
| train/                  |             |
|    approx_kl            | 0.010041915 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.8       |
|    explained_variance   | 0.665       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.108       |
|    n_updates            | 1320        |
|    policy_gradient_loss | -0.000887   |
|    std                  | 0.808       |
|    value_loss           | 0.63        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.33        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 17          |
|    time_elapsed         | 458         |
|    total_timesteps      | 290816      |
| train/                  |             |
|    approx_kl            | 0.013400571 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.8       |
|    explained_variance   | 0.661       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.02        |
|    n_updates            | 1410        |
|    policy_gradient_loss | -0.00224    |
|    std                  | 0.812       |
|    value_loss           | 0.835       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.38     |
| time/              |          |
|    fps             | 77       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 309248   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.33        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 2           |
|    time_elapsed         | 53          |
|    total_timesteps      | 311296      |
| train/                  |             |
|    approx_kl            | 0.015006341 |
|    clip_fraction        | 0.25        |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.7       |
|    explained_variance   | 0.721       |
|    le

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.44        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 11          |
|    time_elapsed         | 296         |
|    total_timesteps      | 329728      |
| train/                  |             |
|    approx_kl            | 0.018813923 |
|    clip_fraction        | 0.205       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.7       |
|    explained_variance   | 0.682       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.19        |
|    n_updates            | 1600        |
|    policy_gradient_loss | 0.00353     |
|    std                  | 0.809       |
|    value_loss           | 0.593       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.27        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 20          |
|    time_elapsed         | 539         |
|    total_timesteps      | 348160      |
| train/                  |             |
|    approx_kl            | 0.035661258 |
|    clip_fraction        | 0.313       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.6       |
|    explained_variance   | 0.758       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.42        |
|    n_updates            | 1690        |
|    policy_gradient_loss | 0.008       |
|    std                  | 0.797       |
|    value_loss           | 0.54        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.04        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 5           |
|    time_elapsed         | 134         |
|    total_timesteps      | 368640      |
| train/                  |             |
|    approx_kl            | 0.020313539 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.5       |
|    explained_variance   | 0.696       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.226       |
|    n_updates            | 1790        |
|    policy_gradient_loss | 0.00171     |
|    std                  | 0.791       |
|    value_loss           | 0.418       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.47        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 14          |
|    time_elapsed         | 377         |
|    total_timesteps      | 387072      |
| train/                  |             |
|    approx_kl            | 0.012565539 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.4       |
|    explained_variance   | 0.73        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.556       |
|    n_updates            | 1880        |
|    policy_gradient_loss | 0.0011      |
|    std                  | 0.785       |
|    value_loss           | 0.658       |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.07        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 23          |
|    time_elapsed         | 620         |
|    total_timesteps      | 405504      |
| train/                  |             |
|    approx_kl            | 0.016605664 |
|    clip_fraction        | 0.197       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.4       |
|    explained_variance   | 0.624       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0974      |
|    n_updates            | 1970        |
|    policy_gradient_loss | -0.00178    |
|    std                  | 0.778       |
|    value_loss           | 0.4         |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.74        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 8           |
|    time_elapsed         | 215         |
|    total_timesteps      | 425984      |
| train/                  |             |
|    approx_kl            | 0.009435291 |
|    clip_fraction        | 0.199       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.3       |
|    explained_variance   | 0.709       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.515       |
|    n_updates            | 2070        |
|    policy_gradient_loss | 0.00238     |
|    std                  | 0.771       |
|    value_loss           | 0.658       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.926       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 17          |
|    time_elapsed         | 459         |
|    total_timesteps      | 444416      |
| train/                  |             |
|    approx_kl            | 0.018792082 |
|    clip_fraction        | 0.225       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.4       |
|    explained_variance   | 0.729       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.134       |
|    n_updates            | 2160        |
|    policy_gradient_loss | 0.00155     |
|    std                  | 0.778       |
|    value_loss           | 0.491       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.41     |
| time/              |          |
|    fps             | 78       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 462848   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.832       |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 2           |
|    time_elapsed         | 53          |
|    total_timesteps      | 464896      |
| train/                  |             |
|    approx_kl            | 0.046477854 |
|    clip_fraction        | 0.254       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.4       |
|    explained_variance   | 0.74        |
|    le

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.54        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 11          |
|    time_elapsed         | 296         |
|    total_timesteps      | 483328      |
| train/                  |             |
|    approx_kl            | 0.025205178 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.4       |
|    explained_variance   | 0.7         |
|    learning_rate        | 0.0003      |
|    loss                 | 0.199       |
|    n_updates            | 2350        |
|    policy_gradient_loss | 0.00567     |
|    std                  | 0.779       |
|    value_loss           | 0.715       |
-----------------------------------------
------------------------------------------
| rollout/                |      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.01       |
| time/                   |            |
|    fps                  | 75         |
|    iterations           | 20         |
|    time_elapsed         | 540        |
|    total_timesteps      | 501760     |
| train/                  |            |
|    approx_kl            | 0.01237263 |
|    clip_fraction        | 0.138      |
|    clip_range           | 0.2        |
|    entropy_loss         | -10.3      |
|    explained_variance   | 0.685      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.52       |
|    n_updates            | 2440       |
|    policy_gradient_loss | 0.00278    |
|    std                  | 0.775      |
|    value_loss           | 0.656      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.28       |
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 5          |
|    time_elapsed         | 134        |
|    total_timesteps      | 522240     |
| train/                  |            |
|    approx_kl            | 0.01276689 |
|    clip_fraction        | 0.326      |
|    clip_range           | 0.2        |
|    entropy_loss         | -10.3      |
|    explained_variance   | 0.743      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.222      |
|    n_updates            | 2540       |
|    policy_gradient_loss | 0.00144    |
|    std                  | 0.77       |
|    value_loss           | 0.559      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.921       |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 14          |
|    time_elapsed         | 377         |
|    total_timesteps      | 540672      |
| train/                  |             |
|    approx_kl            | 0.008779192 |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.3       |
|    explained_variance   | 0.71        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.625       |
|    n_updates            | 2630        |
|    policy_gradient_loss | 0.00131     |
|    std                  | 0.772       |
|    value_loss           | 0.717       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.34        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 23          |
|    time_elapsed         | 621         |
|    total_timesteps      | 559104      |
| train/                  |             |
|    approx_kl            | 0.011198804 |
|    clip_fraction        | 0.163       |
|    clip_range           | 0.2         |
|    entropy_loss         | -10.2       |
|    explained_variance   | 0.705       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.219       |
|    n_updates            | 2720        |
|    policy_gradient_loss | 0.00398     |
|    std                  | 0.763       |
|    value_loss           | 0.75        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.36       |
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 8          |
|    time_elapsed         | 215        |
|    total_timesteps      | 579584     |
| train/                  |            |
|    approx_kl            | 0.02609872 |
|    clip_fraction        | 0.245      |
|    clip_range           | 0.2        |
|    entropy_loss         | -10        |
|    explained_variance   | 0.72       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.238      |
|    n_updates            | 2820       |
|    policy_gradient_loss | 0.00257    |
|    std                  | 0.75       |
|    value_loss           | 0.397      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.39        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 17          |
|    time_elapsed         | 458         |
|    total_timesteps      | 598016      |
| train/                  |             |
|    approx_kl            | 0.016994322 |
|    clip_fraction        | 0.252       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.93       |
|    explained_variance   | 0.689       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.73        |
|    n_updates            | 2910        |
|    policy_gradient_loss | 0.00509     |
|    std                  | 0.743       |
|    value_loss           | 0.768       |
-----------------------------------------
------------------------------------------
| rollout/                |      

Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.59     |
| time/              |          |
|    fps             | 77       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 616448   |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 12           |
|    ep_rew_mean          | 1.15         |
| time/                   |              |
|    fps                  | 76           |
|    iterations           | 2            |
|    time_elapsed         | 53           |
|    total_timesteps      | 618496       |
| train/                  |              |
|    approx_kl            | 0.0068973512 |
|    clip_fraction        | 0.185        |
|    clip_range           | 0.2          |
|    entropy_loss         | -9.79        |
|    explained_variance   | 0.718  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.25        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 11          |
|    time_elapsed         | 296         |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.022480993 |
|    clip_fraction        | 0.254       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.7        |
|    explained_variance   | 0.703       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.486       |
|    n_updates            | 3100        |
|    policy_gradient_loss | 0.0083      |
|    std                  | 0.728       |
|    value_loss           | 0.932       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.27        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 20          |
|    time_elapsed         | 540         |
|    total_timesteps      | 655360      |
| train/                  |             |
|    approx_kl            | 0.050354525 |
|    clip_fraction        | 0.333       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.72       |
|    explained_variance   | 0.711       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.421       |
|    n_updates            | 3190        |
|    policy_gradient_loss | 0.00114     |
|    std                  | 0.731       |
|    value_loss           | 0.885       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.25       |
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 5          |
|    time_elapsed         | 134        |
|    total_timesteps      | 675840     |
| train/                  |            |
|    approx_kl            | 0.01791593 |
|    clip_fraction        | 0.249      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.78      |
|    explained_variance   | 0.662      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.195      |
|    n_updates            | 3290       |
|    policy_gradient_loss | 0.00552    |
|    std                  | 0.736      |
|    value_loss           | 0.741      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.2         |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 14          |
|    time_elapsed         | 377         |
|    total_timesteps      | 694272      |
| train/                  |             |
|    approx_kl            | 0.017270029 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.77       |
|    explained_variance   | 0.697       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.234       |
|    n_updates            | 3380        |
|    policy_gradient_loss | 0.00231     |
|    std                  | 0.733       |
|    value_loss           | 0.602       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.94        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 23          |
|    time_elapsed         | 621         |
|    total_timesteps      | 712704      |
| train/                  |             |
|    approx_kl            | 0.011702256 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.74       |
|    explained_variance   | 0.644       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.379       |
|    n_updates            | 3470        |
|    policy_gradient_loss | 0.00157     |
|    std                  | 0.731       |
|    value_loss           | 0.904       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.16        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 8           |
|    time_elapsed         | 215         |
|    total_timesteps      | 733184      |
| train/                  |             |
|    approx_kl            | 0.045540743 |
|    clip_fraction        | 0.219       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.7        |
|    explained_variance   | 0.688       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.277       |
|    n_updates            | 3570        |
|    policy_gradient_loss | 0.00723     |
|    std                  | 0.728       |
|    value_loss           | 0.853       |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.34        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 17          |
|    time_elapsed         | 458         |
|    total_timesteps      | 751616      |
| train/                  |             |
|    approx_kl            | 0.025302311 |
|    clip_fraction        | 0.313       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.76       |
|    explained_variance   | 0.746       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.516       |
|    n_updates            | 3660        |
|    policy_gradient_loss | 0.00225     |
|    std                  | 0.732       |
|    value_loss           | 0.541       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 1.22     |
| time/              |          |
|    fps             | 78       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 770048   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.26        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 2           |
|    time_elapsed         | 53          |
|    total_timesteps      | 772096      |
| train/                  |             |
|    approx_kl            | 0.012806005 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.85       |
|    explained_variance   | 0.717       |
|    le

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.57        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 11          |
|    time_elapsed         | 296         |
|    total_timesteps      | 790528      |
| train/                  |             |
|    approx_kl            | 0.009047957 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.77       |
|    explained_variance   | 0.746       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.509       |
|    n_updates            | 3850        |
|    policy_gradient_loss | 0.00248     |
|    std                  | 0.738       |
|    value_loss           | 0.578       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.47       |
| time/                   |            |
|    fps                  | 75         |
|    iterations           | 20         |
|    time_elapsed         | 539        |
|    total_timesteps      | 808960     |
| train/                  |            |
|    approx_kl            | 0.03189892 |
|    clip_fraction        | 0.256      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.78      |
|    explained_variance   | 0.703      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.316      |
|    n_updates            | 3940       |
|    policy_gradient_loss | 0.00153    |
|    std                  | 0.737      |
|    value_loss           | 0.704      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.21       |
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 5          |
|    time_elapsed         | 134        |
|    total_timesteps      | 829440     |
| train/                  |            |
|    approx_kl            | 0.04912191 |
|    clip_fraction        | 0.302      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.72      |
|    explained_variance   | 0.687      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.352      |
|    n_updates            | 4040       |
|    policy_gradient_loss | 0.0116     |
|    std                  | 0.733      |
|    value_loss           | 0.566      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.3         |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 14          |
|    time_elapsed         | 377         |
|    total_timesteps      | 847872      |
| train/                  |             |
|    approx_kl            | 0.017617334 |
|    clip_fraction        | 0.286       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.7        |
|    explained_variance   | 0.692       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.313       |
|    n_updates            | 4130        |
|    policy_gradient_loss | 0.00865     |
|    std                  | 0.733       |
|    value_loss           | 0.601       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.38        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 23          |
|    time_elapsed         | 620         |
|    total_timesteps      | 866304      |
| train/                  |             |
|    approx_kl            | 0.020089637 |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.65       |
|    explained_variance   | 0.715       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.25        |
|    n_updates            | 4220        |
|    policy_gradient_loss | 0.00584     |
|    std                  | 0.725       |
|    value_loss           | 0.6         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.15        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 8           |
|    time_elapsed         | 215         |
|    total_timesteps      | 886784      |
| train/                  |             |
|    approx_kl            | 0.008787679 |
|    clip_fraction        | 0.154       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.54       |
|    explained_variance   | 0.587       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.839       |
|    n_updates            | 4320        |
|    policy_gradient_loss | -0.00236    |
|    std                  | 0.719       |
|    value_loss           | 0.964       |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.56        |
| time/                   |             |
|    fps                  | 75          |
|    iterations           | 17          |
|    time_elapsed         | 458         |
|    total_timesteps      | 905216      |
| train/                  |             |
|    approx_kl            | 0.010556339 |
|    clip_fraction        | 0.194       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.47       |
|    explained_variance   | 0.741       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.385       |
|    n_updates            | 4410        |
|    policy_gradient_loss | -0.000552   |
|    std                  | 0.714       |
|    value_loss           | 0.737       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

Logging to logs/PPO1_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 12       |
|    ep_rew_mean     | 0.974    |
| time/              |          |
|    fps             | 78       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 923648   |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.01        |
| time/                   |             |
|    fps                  | 76          |
|    iterations           | 2           |
|    time_elapsed         | 53          |
|    total_timesteps      | 925696      |
| train/                  |             |
|    approx_kl            | 0.042049423 |
|    clip_fraction        | 0.281       |
|    clip_range           | 0.2         |
|    entropy_loss         | -9.43       |
|    explained_variance   | 0.747       |
|    le

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.26       |
| time/                   |            |
|    fps                  | 76         |
|    iterations           | 11         |
|    time_elapsed         | 296        |
|    total_timesteps      | 944128     |
| train/                  |            |
|    approx_kl            | 0.01574119 |
|    clip_fraction        | 0.249      |
|    clip_range           | 0.2        |
|    entropy_loss         | -9.43      |
|    explained_variance   | 0.666      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.19       |
|    n_updates            | 4600       |
|    policy_gradient_loss | 0.00207    |
|    std                  | 0.711      |
|    value_loss           | 0.576      |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_

In [None]:
for _ in range(10):
    obs = env.reset()
    done = False
    while not done:
        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())

In [71]:
obs = env.reset()

In [80]:
obs, reward, done, info = env.step(env.action_space.sample())
print(obs)
print(reward)

[1.7070101e+01 1.6412806e-02 2.8751437e+02]
1.000000002699536
