## CNC Mill Wear Prediction Using Reinforcement Learning and Data from PHM 2010  

In [None]:
pip install torch torchvision torchaudio

In [None]:
import torch

if torch.cuda.is_available():
    print("CUDA is available! Using GPU.")
else:
    print("CUDA is not available. Using CPU.")

In [None]:
pip install stable-baselines3[extra]

In [None]:
pip install gymnasium

In [None]:
# Helper Libraries
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd
import os
import torch
import time

# Environment Building
import gymnasium
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete

# RL Algorithms
import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3 import A2C
from stable_baselines3 import DDPG
from stable_baselines3 import SAC

from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.results_plotter import plot_results, X_TIMESTEPS
from stable_baselines3.common.monitor import Monitor

from pathlib import Path

In [None]:
file_paths_cnc = "./normalised_time_domain_features_with_cut_and_flute_max.csv"
df = pd.read_csv(file_paths_cnc)
print(df.head())

In [None]:
# Global Parameters for CNC Custom Env

MAX_STEPS = len(df.loc[:, 'File'].values) #len = 315

MAX_DIFF = 6

REW_WEIGHT = [0.1 , 0.1 , 0.1 , 0.7] # flute_1-2-3 and flute_max

time_domain_features_columns = ['dyn_X_mean', 'dyn_X_rms', 'dyn_X_crest', 'dyn_X_avg_power', 'dyn_X_skewness', 'dyn_X_kurtosis', 'dyn_Y_mean', 'dyn_Y_rms', 'dyn_Y_crest', 'dyn_Y_avg_power', 'dyn_Y_skewness', 'dyn_Y_kurtosis', 'dyn_Z_mean', 'dyn_Z_rms', 'dyn_Z_crest', 'dyn_Z_avg_power', 'dyn_Z_skewness', 'dyn_Z_kurtosis', 'acc_X_mean', 'acc_X_rms', 'acc_X_crest', 'acc_X_avg_power', 'acc_X_skewness', 'acc_X_kurtosis', 'acc_Y_mean', 'acc_Y_rms', 'acc_Y_crest', 'acc_Y_avg_power', 'acc_Y_skewness', 'acc_Y_kurtosis', 'acc_Z_mean', 'acc_Z_rms', 'acc_Z_crest', 'acc_Z_avg_power', 'acc_Z_skewness', 'acc_Z_kurtosis', 'AE_mean', 'AE_rms', 'AE_crest', 'AE_avg_power', 'AE_skewness', 'AE_kurtosis']
flute_wear_collumns = ['flute_1','flute_2', 'flute_3', 'flute_max']

In [None]:
''''class CNC_Env_Non_Corrective_Pred(gymnasium.Env):

    def __init__(self, df):

        self.df = df
        self.reward_range = (np.NINF , 1 ) # Πρέπει να ορίσω το εύρος των rewards

        # Define the low and high bounds for each dimension of the action space
        low_bound = np.array([0, 0, 0], dtype=np.float64)
        high_bound = np.array([1, 1, 1], dtype=np.float64)

        # Action Space
        self.action_space = gymnasium.spaces.Box(low=low_bound, high=high_bound, dtype=np.float64)

        # Observation Space | Είναι κανονικοποιημένο από 0-1 | 7 sensors * 6 time domain features = 42
        self.observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(42,), dtype=np.float64)

        # Ξεκινάω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

    def _state_s0(self):
        # βάλε randomness εδώ
        self.current_step = 0
        # flute_1 = df[] diff bettwen d[1] - d[0] * (random val (0,2))
        # state_0 - ( state_1 - state_0)
        self.flute_1 = (2*self.df.loc[0, "flute_1"]) - self.df.loc[1, "flute_1"]
        self.flute_2 = (2*self.df.loc[0, "flute_2"]) - self.df.loc[1, "flute_2"]
        self.flute_3 =  (2*self.df.loc[0, "flute_3"]) - self.df.loc[1, "flute_3"]
        self.flute_max = (2*self.df.loc[0, "flute_max"]) - self.df.loc[1, "flute_max"]

        # Θέτουμε και τα predictions στην αρχική κατάσταση του συστήματος
        self.flute_1_pred = self.flute_1
        self.flute_2_pred = self.flute_2
        self.flute_3_pred = self.flute_3
        self.flute_max_pred = max(self.flute_1 , self.flute_2 , self.flute_3)

        self.pred_dif_1 = 0
        self.pred_dif_2 = 0
        self.pred_dif_3 = 0
        self.pred_dif_max = 0

    # Constructing Observations From Environment States
    def _get_obs(self):

        # Παίρνω τα time domain features του current (δηλαδή του αντίστοιχου cut)
        frame = self.df.loc[self.current_step, time_domain_features_columns].to_numpy(dtype=np.float64)

        # Φτιαχνω το observation
        obs = frame

        return obs

    def _get_info(self):
        return {
            "cut": self.current_step
        }

    def _take_action(self, action):

        # Βρες την διαφορά μεταξύ της προηγούμενης τιμής και της εκτιμόμενης τιμής

        # Action Space Example : array([0.79758393, 0.41074004, 0.0026002 ])

        # Αν θέλουμε μετά από κάθε state Sn να κάνουμε ένα Predict βάση του σωστού Sn-1
        #self.flute_1_pred = self.flute_1 + (action[0] * MAX_DIFF)
        #self.flute_2_pred = self.flute_2 + (action[1] * MAX_DIFF)
        #self.flute_3_pred = self.flute_3 + (action[2] * MAX_DIFF)
        #self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        ## Σε κάθε state Sn κάνουμε Predict βάση της προηγούμενης πρόβλεψης Sn-1
        #print("Before : ", self.flute_1_pred)
        # φθορα στο μυτακι 1 (ν) = φθορα στο μυτακι 1 πρβλεπομενη(ν-1) + prediction του agent
        # Prediction του agent = 100% max diff φθορων σε καταστασεις χρησεις του μηχανηματος (ν -> ν+1)
        self.flute_1_pred += (action[0] * MAX_DIFF)
        #print("After : ", self.flute_1_pred)
        self.flute_2_pred += (action[1] * MAX_DIFF)
        self.flute_3_pred += (action[2] * MAX_DIFF)
        self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        self.flute_1 = self.df.loc[self.current_step, "flute_1"]
        self.flute_2 = self.df.loc[self.current_step, "flute_2"]
        self.flute_3 = self.df.loc[self.current_step, "flute_3"]
        self.flute_max = self.df.loc[self.current_step, "flute_max"]

        # Η διαφορά μεταξύ της προβλεπόμενης τιμής και της κανονικής τιμής
        self.pred_dif_1 = self.flute_1_pred - self.flute_1
        self.pred_dif_2 = self.flute_1_pred - self.flute_1
        self.pred_dif_3 = self.flute_1_pred - self.flute_1
        self.pred_dif_max = self.flute_max_pred - self.flute_max

    def reset(self, seed = None, options = None):
        #We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Επιστρέφω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

        # Set the current step to the start of the dataset
        self.current_step = 0

        observation = self._get_obs()
        info = self._get_info()

        return observation,info

    def score_function(self,delta):
        if delta < 0:
            return 1 - np.exp(-delta/10)
        else:
            return  1 - np.exp(delta/4.5)

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        # Παραμετροποιεί το reward έτσι ώστε να ενισχύει τις δράσεις που προσφέρουν οφέλοι σε μέλλοντα χρόνο
        #delay_modifier = (self.current_step / MAX_STEPS)

        reward_flute_1 = self.score_function(self.pred_dif_1)
        reward_flute_2 = self.score_function(self.pred_dif_2)
        reward_flute_3 = self.score_function(self.pred_dif_3)
        reward_flute_max = self.score_function(self.pred_dif_max)

        reward = (REW_WEIGHT[0]*reward_flute_1) + (REW_WEIGHT[1]*reward_flute_2) + (REW_WEIGHT[2]*reward_flute_3) + (REW_WEIGHT[3]*reward_flute_max)
        #reward = reward * delay_modifier
        #print("Reward : " ,reward)
        # Ελέγχουμε εάν πάμε να βγούμε εκτός του dataset
        terminated = False
        if self.current_step > MAX_STEPS - 2:
            # τότε πρέπει να κάνουμε reset
            terminated = True  #διαλέγω terminated καθώς είναι φυσιολογική η λήξη

        observation = self._get_obs()
        info = self._get_info()

        #Εδω μπορούμε να βάλουμε κάποιο όριο απόκλισης τιμών στις προβλέψεις ώστε να κάνουμε την μάθηση ταχύτερη
        trunctated = False # Ελέγχουμε εάν υπάρχει κάποιο condition εξωτερικό του περιβάλλοντος πχ. Όριο χρόνου
        if reward < -5000 :
          trunctated = True

        return observation, reward, terminated,trunctated, info

    metadata = {"render_modes": ["human"]} #, "rgb_array"], "render_fps": 4}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        # Clean up resources (optional)
        pass

In [None]:
''''class CNC_Env_Corrective_Pred(gymnasium.Env):

    def __init__(self, df):

        self.df = df
        self.reward_range = (np.NINF , 1 ) # Πρέπει να ορίσω το εύρος των rewards

        # Define the low and high bounds for each dimension of the action space
        low_bound = np.array([0, 0, 0], dtype=np.float64)
        high_bound = np.array([1, 1, 1], dtype=np.float64)

        # Action Space
        self.action_space = gymnasium.spaces.Box(low=low_bound, high=high_bound, dtype=np.float64)

        # Observation Space | Είναι κανονικοποιημένο από 0-1
        self.observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(42,), dtype=np.float64)

        # Ξεκινάω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

    def _state_s0(self):
        # βάλε randomness εδώ
        self.current_step = 0
        # flute_1 = df[] diff bettwen d[1] - d[0] * (random val (0,2))
        # state_0 - ( state_1 - state_0)
        self.flute_1 = (2*self.df.loc[0, "flute_1"]) - self.df.loc[1, "flute_1"]
        self.flute_2 = (2*self.df.loc[0, "flute_2"]) - self.df.loc[1, "flute_2"]
        self.flute_3 =  (2*self.df.loc[0, "flute_3"]) - self.df.loc[1, "flute_3"]
        self.flute_max = (2*self.df.loc[0, "flute_max"]) - self.df.loc[1, "flute_max"]

        # Θέτουμε και τα predictions στην αρχική κατάσταση του συστήματος
        self.flute_1_pred = self.flute_1
        self.flute_2_pred = self.flute_2
        self.flute_3_pred = self.flute_3
        self.flute_max_pred = max(self.flute_1 , self.flute_2 , self.flute_3)

        self.pred_dif_1 = 0
        self.pred_dif_2 = 0
        self.pred_dif_3 = 0
        self.pred_dif_max = 0

    # Constructing Observations From Environment States
    def _get_obs(self):

        # Παίρνω τα time domain features του current (δηλαδή του αντίστοιχου cut)
        frame = self.df.loc[self.current_step, time_domain_features_columns].to_numpy(dtype=np.float64)

        # Φτιαχνω το observation
        obs = frame

        return obs

    def _get_info(self):
        return {
            "cut": self.current_step
        }

    def _take_action(self, action):

        # Βρες την διαφορά μεταξύ της προηγούμενης τιμής και της εκτιμόμενης τιμής

        # Action Space Example : array([0.79758393, 0.41074004, 0.0026002 ])

        # Αν θέλουμε μετά από κάθε state Sn να κάνουμε ένα Predict βάση του σωστού Sn-1
        self.flute_1_pred = self.flute_1 + (action[0] * MAX_DIFF)
        self.flute_2_pred = self.flute_2 + (action[1] * MAX_DIFF)
        self.flute_3_pred = self.flute_3 + (action[2] * MAX_DIFF)
        self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        ## Σε κάθε state Sn κάνουμε Predict βάση της προηγούμενης πρόβλεψης Sn-1
        #self.flute_1_pred = self.flute_1_pred + (action[0] * MAX_DIFF)    # !!!!!!!! flute_1_pred
        #self.flute_2_pred = self.flute_2_pred + (action[1] * MAX_DIFF)
        #self.flute_3_pred = self.flute_3_pred + (action[2] * MAX_DIFF)
        #self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        self.flute_1 = self.df.loc[self.current_step, "flute_1"]
        self.flute_2 = self.df.loc[self.current_step, "flute_2"]
        self.flute_3 = self.df.loc[self.current_step, "flute_3"]
        self.flute_max = self.df.loc[self.current_step, "flute_max"]

        # Η διαφορά μεταξύ της προβλεπόμενης τιμής και της κανονικής τιμής
        self.pred_dif_1 = self.flute_1_pred - self.flute_1
        self.pred_dif_2 = self.flute_1_pred - self.flute_1
        self.pred_dif_3 = self.flute_1_pred - self.flute_1
        self.pred_dif_max = self.flute_max_pred - self.flute_max

    def reset(self, seed = None, options = None):
        #We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Επιστρέφω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

        # Set the current step to the start of the dataset
        self.current_step = 0

        observation = self._get_obs()
        info = self._get_info()

        return observation,info

    def score_function(self,delta):
        if delta < 0:
            return 1 - np.exp(-delta/10)
        else:
            return  1 - np.exp(delta/4.5)

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        # Παραμετροποιεί το reward έτσι ώστε να ενισχύει τις δράσεις που προσφέρουν οφέλοι σε μέλλοντα χρόνο
        #delay_modifier = (self.current_step / MAX_STEPS)

        reward_flute_1 = self.score_function(self.pred_dif_1)
        reward_flute_2 = self.score_function(self.pred_dif_2)
        reward_flute_3 = self.score_function(self.pred_dif_3)
        reward_flute_max = self.score_function(self.pred_dif_max)

        reward = (REW_WEIGHT[0]*reward_flute_1) + (REW_WEIGHT[1]*reward_flute_2) + (REW_WEIGHT[2]*reward_flute_3) + (REW_WEIGHT[3]*reward_flute_max)
        #reward = reward * delay_modifier

        # Ελέγχουμε εάν πάμε να βγούμε εκτός του dataset
        terminated = False
        if self.current_step > MAX_STEPS - 2:
            # τότε πρέπει να κάνουμε reset
            terminated = True  #διαλέγω terminated καθώς είναι φυσιολογική η λήξη

        observation = self._get_obs()
        info = self._get_info()

        #Εδω μπορούμε να βάλουμε κάποιο όριο απόκλισης τιμών στις προβλέψεις ώστε να κάνουμε την μάθηση ταχύτερη
        trunctated = False # Ελέγχουμε εάν υπάρχει κάποιο condition εξωτερικό του περιβάλλοντος πχ. Όριο χρόνου

        return observation, reward, terminated,trunctated, info

    metadata = {"render_modes": ["human"]} #, "rgb_array"], "render_fps": 4}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        # Clean up resources (optional)
        pass

In [None]:
class CNC_Env_Corrective_Pred_No_Delay(gymnasium.Env):

    def __init__(self, df):

        self.df = df
        self.reward_range = (np.NINF , 1 ) # Πρέπει να ορίσω το εύρος των rewards

        # Define the low and high bounds for each dimension of the action space
        low_bound = np.array([0, 0, 0], dtype=np.float64)
        high_bound = np.array([1, 1, 1], dtype=np.float64)

        # Action Space
        self.action_space = gymnasium.spaces.Box(low=low_bound, high=high_bound, dtype=np.float64)

        # Observation Space | Είναι κανονικοποιημένο από 0-1
        self.observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(42,), dtype=np.float64)

        # Ξεκινάω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

    def _state_s0(self):
        # βάλε randomness εδώ
        self.current_step = 0
        # flute_1 = df[] diff bettwen d[1] - d[0] * (random val (0,2))
        # state_0 - ( state_1 - state_0)
        self.flute_1 = (2*self.df.loc[0, "flute_1"]) - self.df.loc[1, "flute_1"]
        self.flute_2 = (2*self.df.loc[0, "flute_2"]) - self.df.loc[1, "flute_2"]
        self.flute_3 =  (2*self.df.loc[0, "flute_3"]) - self.df.loc[1, "flute_3"]
        self.flute_max = (2*self.df.loc[0, "flute_max"]) - self.df.loc[1, "flute_max"]

        # Θέτουμε και τα predictions στην αρχική κατάσταση του συστήματος
        self.flute_1_pred = self.flute_1
        self.flute_2_pred = self.flute_2
        self.flute_3_pred = self.flute_3
        self.flute_max_pred = max(self.flute_1 , self.flute_2 , self.flute_3)

        self.pred_dif_1 = 0
        self.pred_dif_2 = 0
        self.pred_dif_3 = 0
        self.pred_dif_max = 0
        
        #Phm score
        self.score = 0 
        

    # Constructing Observations From Environment States
    def _get_obs(self):

        # Παίρνω τα time domain features του current (δηλαδή του αντίστοιχου cut)
        frame = self.df.loc[self.current_step, time_domain_features_columns].to_numpy(dtype=np.float64)

        # Φτιαχνω το observation
        obs = frame

        return obs

    def _get_info(self):
        return {
            "cut": self.current_step,
            "score": self.score,
        }
    

    def _take_action(self, action):

        # Βρες την διαφορά μεταξύ της προηγούμενης τιμής και της εκτιμόμενης τιμής

        # Action Space Example : array([0.79758393, 0.41074004, 0.0026002 ])

        # Αν θέλουμε μετά από κάθε state Sn να κάνουμε ένα Predict βάση του σωστού Sn-1
        self.flute_1_pred = self.flute_1 + (action[0] * MAX_DIFF)
        self.flute_2_pred = self.flute_2 + (action[1] * MAX_DIFF)
        self.flute_3_pred = self.flute_3 + (action[2] * MAX_DIFF)
        self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        self.flute_1 = self.df.loc[self.current_step, "flute_1"]
        self.flute_2 = self.df.loc[self.current_step, "flute_2"]
        self.flute_3 = self.df.loc[self.current_step, "flute_3"]
        self.flute_max = self.df.loc[self.current_step, "flute_max"]

        # Η διαφορά μεταξύ της προβλεπόμενης τιμής και της κανονικής τιμής
        self.pred_dif_1 = self.flute_1_pred - self.flute_1
        self.pred_dif_2 = self.flute_1_pred - self.flute_1
        self.pred_dif_3 = self.flute_1_pred - self.flute_1
        self.pred_dif_max = self.flute_max_pred - self.flute_max
        
        
    def reset(self, seed = None, options = None):
        #We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Επιστρέφω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

        # Set the current step to the start of the dataset
        self.current_step = 0

        observation = self._get_obs()
        info = self._get_info()

        return observation,info
    
    def score_function_original(self, delta):
        if delta < 0:
            return np.exp(-delta/10) - 1
        else:
            return np.exp(delta/4.5) - 1

    def score_function(self,delta):
        if delta < 0:
            return 1 - np.exp(-delta/10)
        else:
            return  1 - np.exp(delta/4.5)

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        # Παραμετροποιεί το reward έτσι ώστε να ενισχύει τις δράσεις που προσφέρουν οφέλοι σε μέλλοντα χρόνο
        #delay_modifier = (self.current_step / MAX_STEPS)

        reward_flute_1 = self.score_function(self.pred_dif_1)
        reward_flute_2 = self.score_function(self.pred_dif_2)
        reward_flute_3 = self.score_function(self.pred_dif_3)
        reward_flute_max = self.score_function(self.pred_dif_max)

        reward = (REW_WEIGHT[0]*reward_flute_1) + (REW_WEIGHT[1]*reward_flute_2) + (REW_WEIGHT[2]*reward_flute_3) + (REW_WEIGHT[3]*reward_flute_max)
        #reward = reward * delay_modifier
        
        # H βαθμολογία που θα είχε στο διαγωνισμό
        self.score += self.score_function_original(self.pred_dif_max)
        
        info = self._get_info()
        
        # Ελέγχουμε εάν πάμε να βγούμε εκτός του dataset
        terminated = False
        if self.current_step > MAX_STEPS - 2:
            # τότε πρέπει να κάνουμε reset
            #print("Terminated at :" self.current_step)
            terminated = True  #διαλέγω terminated καθώς είναι φυσιολογική η λήξη

        observation = self._get_obs()

        #Εδω μπορούμε να βάλουμε κάποιο όριο απόκλισης τιμών στις προβλέψεις ώστε να κάνουμε την μάθηση ταχύτερη
        trunctated = False # Ελέγχουμε εάν υπάρχει κάποιο condition εξωτερικό του περιβάλλοντος πχ. Όριο χρόνου

        return observation, reward, terminated,trunctated, info

    metadata = {"render_modes": ["human"]} #, "rgb_array"], "render_fps": 4}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        # Clean up resources (optional)
        pass

In [None]:
class CNC_Env_Corrective_Pred_with_Delay(gymnasium.Env):

    def __init__(self, df):

        self.df = df
        self.reward_range = (np.NINF , 1 ) # Πρέπει να ορίσω το εύρος των rewards

        # Define the low and high bounds for each dimension of the action space
        low_bound = np.array([0, 0, 0], dtype=np.float64)
        high_bound = np.array([1, 1, 1], dtype=np.float64)

        # Action Space
        self.action_space = gymnasium.spaces.Box(low=low_bound, high=high_bound, dtype=np.float64)

        # Observation Space | Είναι κανονικοποιημένο από 0-1
        self.observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(42,), dtype=np.float64)

        # Ξεκινάω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

    def _state_s0(self):
        # βάλε randomness εδώ
        self.current_step = 0
        # flute_1 = df[] diff bettwen d[1] - d[0] * (random val (0,2))
        # state_0 - ( state_1 - state_0)
        self.flute_1 = (2*self.df.loc[0, "flute_1"]) - self.df.loc[1, "flute_1"]
        self.flute_2 = (2*self.df.loc[0, "flute_2"]) - self.df.loc[1, "flute_2"]
        self.flute_3 =  (2*self.df.loc[0, "flute_3"]) - self.df.loc[1, "flute_3"]
        self.flute_max = (2*self.df.loc[0, "flute_max"]) - self.df.loc[1, "flute_max"]

        # Θέτουμε και τα predictions στην αρχική κατάσταση του συστήματος
        self.flute_1_pred = self.flute_1
        self.flute_2_pred = self.flute_2
        self.flute_3_pred = self.flute_3
        self.flute_max_pred = max(self.flute_1 , self.flute_2 , self.flute_3)

        self.pred_dif_1 = 0
        self.pred_dif_2 = 0
        self.pred_dif_3 = 0
        self.pred_dif_max = 0
        
        #Phm score
        self.score = 0 

    # Constructing Observations From Environment States
    def _get_obs(self):

        # Παίρνω τα time domain features του current (δηλαδή του αντίστοιχου cut)
        frame = self.df.loc[self.current_step, time_domain_features_columns].to_numpy(dtype=np.float64)

        # Φτιαχνω το observation
        obs = frame

        return obs

    def _get_info(self):
        return {
            "cut": self.current_step,
            "score": self.score
        }
    

    def _take_action(self, action):

        # Βρες την διαφορά μεταξύ της προηγούμενης τιμής και της εκτιμόμενης τιμής

        # Action Space Example : array([0.79758393, 0.41074004, 0.0026002 ])

        # Αν θέλουμε μετά από κάθε state Sn να κάνουμε ένα Predict βάση του σωστού Sn-1
        self.flute_1_pred = self.flute_1 + (action[0] * MAX_DIFF)
        self.flute_2_pred = self.flute_2 + (action[1] * MAX_DIFF)
        self.flute_3_pred = self.flute_3 + (action[2] * MAX_DIFF)
        self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        self.flute_1 = self.df.loc[self.current_step, "flute_1"]
        self.flute_2 = self.df.loc[self.current_step, "flute_2"]
        self.flute_3 = self.df.loc[self.current_step, "flute_3"]
        self.flute_max = self.df.loc[self.current_step, "flute_max"]

        # Η διαφορά μεταξύ της προβλεπόμενης τιμής και της κανονικής τιμής
        self.pred_dif_1 = self.flute_1_pred - self.flute_1
        self.pred_dif_2 = self.flute_1_pred - self.flute_1
        self.pred_dif_3 = self.flute_1_pred - self.flute_1
        self.pred_dif_max = self.flute_max_pred - self.flute_max
        
        
    def reset(self, seed = None, options = None):
        #We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Επιστρέφω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

        # Set the current step to the start of the dataset
        self.current_step = 0

        observation = self._get_obs()
        info = self._get_info()

        return observation,info
    
    def score_function_original(self, delta):
        if delta < 0:
            return np.exp(-delta/10) - 1
        else:
            return np.exp(delta/4.5) - 1

    def score_function(self,delta):
        if delta < 0:
            return 1 - np.exp(-delta/10)
        else:
            return  1 - np.exp(delta/4.5)

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        reward_flute_1 = self.score_function(self.pred_dif_1)
        reward_flute_2 = self.score_function(self.pred_dif_2)
        reward_flute_3 = self.score_function(self.pred_dif_3)
        reward_flute_max = self.score_function(self.pred_dif_max)

        reward_without_delay = (REW_WEIGHT[0]*reward_flute_1) + (REW_WEIGHT[1]*reward_flute_2) + (REW_WEIGHT[2]*reward_flute_3) + (REW_WEIGHT[3]*reward_flute_max)
        # Παραμετροποιεί το reward έτσι ώστε να ενισχύει τις δράσεις που προσφέρουν οφέλοι σε μέλλοντα χρόνο
        delay_modifier = (self.current_step / (MAX_STEPS-2))
        reward = reward_without_delay * delay_modifier
        
        # H βαθμολογία που θα είχε στο διαγωνισμό
        self.score += self.score_function_original(self.pred_dif_max)
        
        info = self._get_info()
        
        # Ελέγχουμε εάν πάμε να βγούμε εκτός του dataset
        terminated = False
        if self.current_step > MAX_STEPS - 2:
            # τότε πρέπει να κάνουμε reset
            terminated = True  #διαλέγω terminated καθώς είναι φυσιολογική η λήξη

        observation = self._get_obs()

        #Εδω μπορούμε να βάλουμε κάποιο όριο απόκλισης τιμών στις προβλέψεις ώστε να κάνουμε την μάθηση ταχύτερη
        trunctated = False # Ελέγχουμε εάν υπάρχει κάποιο condition εξωτερικό του περιβάλλοντος πχ. Όριο χρόνου

        return observation, reward, terminated,trunctated, info

    metadata = {"render_modes": ["human"]} #, "rgb_array"], "render_fps": 4}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        # Clean up resources (optional)
        pass

In [None]:
class CNC_Env_Non_Corrective_No_Delay(gymnasium.Env):

    def __init__(self, df):

        self.df = df
        self.reward_range = (np.NINF , 1 ) # Πρέπει να ορίσω το εύρος των rewards

        # Define the low and high bounds for each dimension of the action space
        low_bound = np.array([0, 0, 0], dtype=np.float64)
        high_bound = np.array([1, 1, 1], dtype=np.float64)

        # Action Space
        self.action_space = gymnasium.spaces.Box(low=low_bound, high=high_bound, dtype=np.float64)

        # Observation Space | Είναι κανονικοποιημένο από 0-1 | 7 sensors * 6 time domain features = 42
        self.observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(42,), dtype=np.float64)

        # Ξεκινάω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

    def _state_s0(self):
        # βάλε randomness εδώ
        self.current_step = 0
        # flute_1 = df[] diff bettwen d[1] - d[0] * (random val (0,2))
        # state_0 - ( state_1 - state_0)
        self.flute_1 = (2*self.df.loc[0, "flute_1"]) - self.df.loc[1, "flute_1"]
        self.flute_2 = (2*self.df.loc[0, "flute_2"]) - self.df.loc[1, "flute_2"]
        self.flute_3 =  (2*self.df.loc[0, "flute_3"]) - self.df.loc[1, "flute_3"]
        self.flute_max = (2*self.df.loc[0, "flute_max"]) - self.df.loc[1, "flute_max"]

        # Θέτουμε και τα predictions στην αρχική κατάσταση του συστήματος
        self.flute_1_pred = self.flute_1
        self.flute_2_pred = self.flute_2
        self.flute_3_pred = self.flute_3
        self.flute_max_pred = max(self.flute_1 , self.flute_2 , self.flute_3)

        self.pred_dif_1 = 0
        self.pred_dif_2 = 0
        self.pred_dif_3 = 0
        self.pred_dif_max = 0
        
        #Phm Score
        self.score = 0 

    # Constructing Observations From Environment States
    def _get_obs(self):

        # Παίρνω τα time domain features του current (δηλαδή του αντίστοιχου cut)
        frame = self.df.loc[self.current_step, time_domain_features_columns].to_numpy(dtype=np.float64)

        # Φτιαχνω το observation
        obs = frame

        return obs

    def _get_info(self):
        return {
            "cut": self.current_step,
            "score": self.score,
        }

    def _take_action(self, action):

        # Action Space Example : array([0.79758393, 0.41074004, 0.0026002 ])

        ## Σε κάθε state Sn κάνουμε Predict βάση της προηγούμενης πρόβλεψης Sn-1
        # Φθορα στο μυτακι_1(ν) = φθορα στο μυτακι_1_πρβλεπομενη(ν-1) + prediction του agent
        # Prediction του agent = 100% max diff φθορών σε καταστασεις χρησεις του μηχανηματος (ν -> ν+1)
        
        self.flute_1_pred += (action[0] * MAX_DIFF)
        self.flute_2_pred += (action[1] * MAX_DIFF)
        self.flute_3_pred += (action[2] * MAX_DIFF)
        self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        self.flute_1 = self.df.loc[self.current_step, "flute_1"]
        self.flute_2 = self.df.loc[self.current_step, "flute_2"]
        self.flute_3 = self.df.loc[self.current_step, "flute_3"]
        self.flute_max = self.df.loc[self.current_step, "flute_max"]

        # Η διαφορά μεταξύ της προβλεπόμενης τιμής και της κανονικής τιμής
        self.pred_dif_1 = self.flute_1_pred - self.flute_1
        self.pred_dif_2 = self.flute_1_pred - self.flute_1
        self.pred_dif_3 = self.flute_1_pred - self.flute_1
        self.pred_dif_max = self.flute_max_pred - self.flute_max

    def reset(self, seed = None, options = None):
        #We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Επιστρέφω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

        # Set the current step to the start of the dataset
        self.current_step = 0

        observation = self._get_obs()

        info = self._get_info()

        return observation,info

    def score_function(self,delta):
        if delta < 0:
            return 1 - np.exp(-delta/10)
        else:
            return  1 - np.exp(delta/4.5)
        

    def score_function_original(self, delta):
        if delta < 0:
            return np.exp(-delta/10) - 1
        else:
            return np.exp(delta/4.5) - 1

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        reward_flute_1 = self.score_function(self.pred_dif_1)
        reward_flute_2 = self.score_function(self.pred_dif_2)
        reward_flute_3 = self.score_function(self.pred_dif_3)
        reward_flute_max = self.score_function(self.pred_dif_max)

        reward = (REW_WEIGHT[0]*reward_flute_1) + (REW_WEIGHT[1]*reward_flute_2) + (REW_WEIGHT[2]*reward_flute_3) + (REW_WEIGHT[3]*reward_flute_max)

        # H βαθμολογία που θα είχε στο διαγωνισμό
        #self.score -= reward_flute_max
        
        # H βαθμολογία που θα είχε στο διαγωνισμό
        self.score += self.score_function_original(self.pred_dif_max)
        
        
        info = self._get_info()
        #print("In the env self.score is : ", self.score)
        #print(info)
        
        # Ελέγχουμε εάν πάμε να βγούμε εκτός του dataset
        terminated = False
        if self.current_step > MAX_STEPS - 2:
            # τότε πρέπει να κάνουμε reset)
            print("Reached 314", print(info))
            info = self._get_info()

            terminated = True  #διαλέγω terminated καθώς είναι φυσιολογική η λήξη

        observation = self._get_obs()

        #Εδω μπορούμε να βάλουμε κάποιο όριο απόκλισης τιμών στις προβλέψεις ώστε να κάνουμε την μάθηση ταχύτερη
        trunctated = False # Ελέγχουμε εάν υπάρχει κάποιο condition εξωτερικό του περιβάλλοντος πχ. Όριο χρόνου
        if reward < -200000000 : #200 million
            trunctated = True

        return observation, reward, terminated,trunctated, info

    metadata = {"render_modes": ["human"]} #, "rgb_array"], "render_fps": 4}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        # Clean up resources (optional)
        pass

In [None]:
class CNC_Env_Non_Corrective_with_Delay(gymnasium.Env):

    def __init__(self, df):

        self.df = df
        self.reward_range = (np.NINF , 1 ) # Πρέπει να ορίσω το εύρος των rewards

        # Define the low and high bounds for each dimension of the action space
        low_bound = np.array([0, 0, 0], dtype=np.float64)
        high_bound = np.array([1, 1, 1], dtype=np.float64)

        # Action Space
        self.action_space = gymnasium.spaces.Box(low=low_bound, high=high_bound, dtype=np.float64)

        # Observation Space | Είναι κανονικοποιημένο από 0-1 | 7 sensors * 6 time domain features = 42
        self.observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(42,), dtype=np.float64)

        # Ξεκινάω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

    def _state_s0(self):
        # βάλε randomness εδώ
        self.current_step = 0
        # flute_1 = df[] diff bettwen d[1] - d[0] * (random val (0,2))
        # state_0 - ( state_1 - state_0)
        self.flute_1 = (2*self.df.loc[0, "flute_1"]) - self.df.loc[1, "flute_1"]
        self.flute_2 = (2*self.df.loc[0, "flute_2"]) - self.df.loc[1, "flute_2"]
        self.flute_3 =  (2*self.df.loc[0, "flute_3"]) - self.df.loc[1, "flute_3"]
        self.flute_max = (2*self.df.loc[0, "flute_max"]) - self.df.loc[1, "flute_max"]

        # Θέτουμε και τα predictions στην αρχική κατάσταση του συστήματος
        self.flute_1_pred = self.flute_1
        self.flute_2_pred = self.flute_2
        self.flute_3_pred = self.flute_3
        self.flute_max_pred = max(self.flute_1 , self.flute_2 , self.flute_3)

        self.pred_dif_1 = 0
        self.pred_dif_2 = 0
        self.pred_dif_3 = 0
        self.pred_dif_max = 0
        
        #Phm Score
        self.score = 0 

    # Constructing Observations From Environment States
    def _get_obs(self):

        # Παίρνω τα time domain features του current (δηλαδή του αντίστοιχου cut)
        frame = self.df.loc[self.current_step, time_domain_features_columns].to_numpy(dtype=np.float64)

        # Φτιαχνω το observation
        obs = frame

        return obs

    def _get_info(self):
        return {
            "cut": self.current_step,
            "score": self.score
        }

    def _take_action(self, action):

        # Action Space Example : array([0.79758393, 0.41074004, 0.0026002 ])

        ## Σε κάθε state Sn κάνουμε Predict βάση της προηγούμενης πρόβλεψης Sn-1
        # Φθορα στο μυτακι_1(ν) = φθορα στο μυτακι_1_πρβλεπομενη(ν-1) + prediction του agent
        # Prediction του agent = 100% max diff φθορών σε καταστασεις χρησεις του μηχανηματος (ν -> ν+1)
        
        self.flute_1_pred += (action[0] * MAX_DIFF)
        self.flute_2_pred += (action[1] * MAX_DIFF)
        self.flute_3_pred += (action[2] * MAX_DIFF)
        self.flute_max_pred = max(self.flute_1_pred , self.flute_2_pred , self.flute_3_pred)

        self.flute_1 = self.df.loc[self.current_step, "flute_1"]
        self.flute_2 = self.df.loc[self.current_step, "flute_2"]
        self.flute_3 = self.df.loc[self.current_step, "flute_3"]
        self.flute_max = self.df.loc[self.current_step, "flute_max"]

        # Η διαφορά μεταξύ της προβλεπόμενης τιμής και της κανονικής τιμής
        self.pred_dif_1 = self.flute_1_pred - self.flute_1
        self.pred_dif_2 = self.flute_1_pred - self.flute_1
        self.pred_dif_3 = self.flute_1_pred - self.flute_1
        self.pred_dif_max = self.flute_max_pred - self.flute_max

    def reset(self, seed = None, options = None):
        #We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Επιστρέφω το περιβάλλον στην αρχική του κατάσταση (state s0)
        self._state_s0()

        # Set the current step to the start of the dataset
        self.current_step = 0

        observation = self._get_obs()
        info = self._get_info()

        return observation,info

    def score_function(self,delta):
        if delta < 0:
            return 1 - np.exp(-delta/10)
        else:
            return  1 - np.exp(delta/4.5)
        
    def score_function_original(self, delta):
        if delta < 0:
            return np.exp(-delta/10) - 1
        else:
            return np.exp(delta/4.5) - 1

    def step(self, action):
        # Execute one time step within the environment
        self._take_action(action)

        self.current_step += 1

        reward_flute_1 = self.score_function(self.pred_dif_1)
        reward_flute_2 = self.score_function(self.pred_dif_2)
        reward_flute_3 = self.score_function(self.pred_dif_3)
        reward_flute_max = self.score_function(self.pred_dif_max) #PHM 

        reward_without_delay = (REW_WEIGHT[0]*reward_flute_1) + (REW_WEIGHT[1]*reward_flute_2) + (REW_WEIGHT[2]*reward_flute_3) + (REW_WEIGHT[3]*reward_flute_max)
        # Παραμετροποιεί το reward έτσι ώστε να ενισχύει τις δράσεις που προσφέρουν οφέλοι σε μέλλοντα χρόνο
        delay_modifier = (self.current_step / (MAX_STEPS-2))
        reward = reward_without_delay * delay_modifier 
        
        # H βαθμολογία που θα είχε στο διαγωνισμό
        #self.score -= reward_flute_max
        
        self.score += self.score_function_original(self.pred_dif_max)
    
        info = self._get_info()
    
        # Ελέγχουμε εάν πάμε να βγούμε εκτός του dataset
        terminated = False
        if self.current_step > MAX_STEPS - 2:
            # τότε πρέπει να κάνουμε reset)
            terminated = True  #διαλέγω terminated καθώς είναι φυσιολογική η λήξη

        observation = self._get_obs()

        #Εδω μπορούμε να βάλουμε κάποιο όριο απόκλισης τιμών στις προβλέψεις ώστε να κάνουμε την μάθηση ταχύτερη
        trunctated = False # Ελέγχουμε εάν υπάρχει κάποιο condition εξωτερικό του περιβάλλοντος πχ. Όριο χρόνου
        if reward < -200000000 : #200milion
            trunctated = True

        return observation, reward, terminated,trunctated, info

    metadata = {"render_modes": ["human"]} #, "rgb_array"], "render_fps": 4}

    def render(self, mode='human'):
        # Render the environment (optional)
        pass

    def close(self):
        # Clean up resources (optional)
        pass

In [None]:
# Create an instance of the environment with your dataframe
c1_non_corrective_pred_no_delay_env = CNC_Env_Non_Corrective_No_Delay(df)
c1_non_corrective_pred_with_delay_env = CNC_Env_Non_Corrective_with_Delay(df)

c1_corrective_pred_no_delay_env = CNC_Env_Corrective_Pred_No_Delay(df)
c1_corrective_pred_with_delay_env = CNC_Env_Corrective_Pred_with_Delay(df)

In [None]:
check_env(c1_non_corrective_pred_no_delay_env, warn=True)
check_env(c1_non_corrective_pred_with_delay_env, warn=True)
check_env(c1_corrective_pred_no_delay_env, warn=True)
check_env(c1_corrective_pred_with_delay_env,warn=True)

In [None]:
# Define the log path
#log_path = os.path.join('training', 'logs')

# Create log directory if it doesn't exist
#os.makedirs(log_path, exist_ok=True)

# Wrap the environment with the Monitor
#non_corrective_pred_env = Monitor(non_corrective_pred_env, log_path)

# Wrap the environment with the Monitor
#corrective_pred_env = Monitor(corrective_pred_env, log_path)

In [None]:
# Training Help
TIMESTEPS = 10000

# Create a log path for our training logs 
logdir = "logs"
if not os.path.exists(logdir):
    os.makedirs(logdir)

### Tensorboard Command
C:\path >tensorboard --logdir=logs 

## Helper Functions

In [None]:
# Function to find the highest 'i' value from existing model files
def find_highest_i(models_dir):
    highest_i = 0
    for filename in os.listdir(models_dir):
        if filename.endswith(".zip"):  # Ensure we're only looking at model files
            try:
                # Extract 'i' from the filename based on the TIMESTEPS*i pattern
                i = int(filename.replace('.zip', '').split('/')[-1]) // TIMESTEPS
                if i > highest_i:
                    highest_i = i
            except ValueError:
                # In case the filename doesn't follow the expected pattern
                continue
    return highest_i

In [None]:
def make_filename(models_dir): 
    if not os.path.exists(models_dir): 
        os.makedirs(models_dir)

    highest_i = find_highest_i(models_dir)
    return f"{models_dir}/{TIMESTEPS*highest_i}.zip"

### **Callback**

In [None]:
from stable_baselines3.common.callbacks import BaseCallback
import warnings

# Suppress all warnings
warnings.filterwarnings("ignore")

# Define a custom callback to log the score during training
class ScoreLoggerCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(ScoreLoggerCallback, self).__init__(verbose) 
        self.max_score = 0  # Define max_score attribute here

    def _on_step(self) -> bool:
        # Retrieve the wrapped environment from the model
        env = self.model.env.envs[0]  # Assuming you only have one environment in the DummyVecEnv
        
        # Log the score after each training step
        phm_score = env.score
          
        current_step = env.current_step 
        if current_step < (MAX_STEPS - 5):
            # Log the maximum score achieved so far when current step is less than MAX_STEPS - 5
            self.logger.record("rollout/phm_score", self.max_score)
            #print("Current Step is :", current_step)
            #print("The score LESS THAN 315 is :", phm_score)
        
        if current_step >= (MAX_STEPS - 2):
            # Log the score and update max_score when current step is greater than or equal to MAX_STEPS - 2
            self.max_score = phm_score  # Update max_score
            self.logger.record("rollout/phm_score", phm_score)
            #print("Current Step is :", current_step)
            #print("The score at 315 is :", phm_score)
            
        return True

### **Load Model**

In [None]:
def load_model(model_filename, algo, env):
    if Path(model_filename).exists():
        algo_class = getattr(stable_baselines3, algo)
        loaded_model = algo_class.load(model_filename, env)
        print(f"Successfully loaded model from {model_filename}")
        return loaded_model 
    else:
        print(f"Model file '{model_filename}' does not exist.")

### **Run Model**

In [None]:
def run_model(env, algo, log_name,  models_dir, max_loops , max_time, first_run, model=None):
    
    callback = ScoreLoggerCallback()
    model_filename = make_filename(models_dir)
    highest_i = find_highest_i(models_dir)
    i = highest_i + 1
    di = i 
    start_time = time.time()
    
    while True:
    
        model_filename = make_filename(models_dir)
    
        if first_run : 
            model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=log_name, callback=callback)
            model_filename = f"{models_dir}/{TIMESTEPS*i}.zip"
            model.save(model_filename)
            i+1
            first_run = False
            
        model = load_model(model_filename, algo, env)
        model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=log_name, callback=callback)
        
        model_filename = f"{models_dir}/{TIMESTEPS*i}.zip"
        model.save(model_filename)

        reward, _= evaluate_policy(model, env, n_eval_episodes=2, render=False)
        print(reward)
        i += 1
        
        #trackers
        diff_timesteps = i - di
        print("diff_timesteps is ", diff_timesteps)
        elapsed_time = time.time() - start_time
        print("Elasped time is (sec)",elapsed_time )
        if diff_timesteps > max_loops or elapsed_time > max_time :
            break
        

**Warnings Supressor**

In [None]:
# Reset warnings to default state
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
#warnings.resetwarnings()

# Algorithms

#### Create the models
We run it only once, after that we load each model

In [None]:
## -- PPO -- ##
#ppo_model_non_corr_no_delay = PPO("MlpPolicy", c1_non_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#ppo_model_non_corr_with_delay = PPO("MlpPolicy", c1_non_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)
#---------------------
#ppo_model_corr_no_delay = PPO("MlpPolicy", c1_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#ppo_model_corr_with_delay = PPO("MlpPolicy", c1_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)
#---------------------

## -- A2C -- ##
#a2c_model_non_corr_no_delay = A2C("MlpPolicy", c1_non_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#a2c_model_non_corr_with_delay = A2C("MlpPolicy", c1_non_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)
#---------------------
#a2c_model_corr_no_delay = A2C("MlpPolicy", c1_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#a2c_model_corr_with_delay = A2C("MlpPolicy", c1_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)

## -- DDPG -- ##
#ddpg_model_non_corr_no_delay = DDPG("MlpPolicy", c1_non_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#ddpg_model_non_corr_with_delay = DDPG("MlpPolicy", c1_non_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)
#---------------------
#ddpg_model_corr_no_delay = DDPG("MlpPolicy", c1_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#ddpg_model_corr_with_delay = DDPG("MlpPolicy", c1_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)

## -- SAC -- ##
#sac_model_non_corr_no_delay = SAC("MlpPolicy", c1_non_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#sac_model_non_corr_with_delay = SAC("MlpPolicy", c1_non_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)
#---------------------
#sac_model_corr_no_delay = SAC("MlpPolicy", c1_corrective_pred_no_delay_env, verbose=1, tensorboard_log = logdir)
#sac_model_corr_with_delay = SAC("MlpPolicy", c1_corrective_pred_with_delay_env, verbose=1, tensorboard_log = logdir)

# PPO

### PPO Non Corrective No Delay

In [None]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "PPO", "PPO_model_non_corr_no_delay", "models/PPO_non_corr_no_delay", 1, 3600, True, ppo_model_non_corr_no_delay) #1hour   
# -----------
# MAIN RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "PPO", "PPO_model_non_corr_no_delay", "models/PPO_non_corr_no_delay", 10, 3600, False,)    

### PPO Non Corrective With Delay

In [None]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_with_delay_env, "PPO", "PPO_model_non_corr_with_delay", "models/PPO_non_corr_with_delay", 1, 3600, True, ppo_model_non_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_non_corrective_pred_with_delay_env, "PPO", "PPO_model_non_corr_with_delay", "models/PPO_non_corr_with_delay", 1000, 10200, False,)    

### PPO Corrective No Delay

In [None]:
# FIRST RUN
#run_model(c1_corrective_pred_no_delay_env, "PPO", "PPO_model_corr_no_delay", "models/PPO_corr_no_delay", 1, 3600, True, ppo_model_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_no_delay_env, "PPO", "PPO_model_corr_no_delay", "models/PPO_corr_no_delay", 1, 3600, False,)    

### PPO Corrective With Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_with_delay_env, "PPO", "PPO_model_corr_with_delay", "models/PPO_corr_with_delay", 1, 3600, True, ppo_model_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_with_delay_env, "PPO", "PPO_model_corr_with_delay", "models/PPO_corr_with_delay", 100, 3600, False,)    

#### PPO Corrective No Delay (Test 1)

In [None]:
#models_dir = "models\PPO_non_corr_no_delay_test" 
#ppo_model_non_corr_no_delay_test.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO_non_corr_no_delay_test", callback=callback)

#### PPO Corrective No Delay (Test 2)

In [None]:
#model_filename = make_filename("models\PPO_non_corr_no_delay_test", TIMESTEPS)
#ppo_corrective_no_delay_test_2 = log_name , model_dir

#ppo_model_non_corr_no_delay_test <- MODEL ,  tb_log_name="PPO_non_corr_no_delay_test2" , models_dir = "models\PPO_non_corr_no_delay_test"

In [None]:
#run_model(ppo_corrective_no_delay_test_2, c1_non_corrective_pred_no_delay_env, "ppo_model_non_corr_no_delay", "models/a2c_non_corr_no_delay", 0, 3600) #1hour

In [None]:
# Load PPO_non_corr Model 
#model_filename = make_filename("models\PPO_non_corr_no_delay", TIMESTEPS)
#ppo_model_non_corr_no_delay = PPO.load(model_filename, exp)
#print(f"Successfully loaded model from {model_filename}")

In [None]:
# Load PPO_non_corr Model TEST 
#model_filename = make_filename("models\PPO_non_corr_no_delay_test", TIMESTEPS)
#ppo_model_non_corr_no_delay_test = PPO.load(model_filename, c1_non_corrective_pred_no_delay_env )
#print(f"Successfully loaded model from {model_filename}")

In [None]:
# Train PPO_non_corr_no_delay
#models_dir = "models\PPO_non_corr_no_delay_test"
#highest_i = find_highest_i("models\PPO_non_corr_no_delay_test", TIMESTEPS)
# Adjust the range based on the highest 'i' found
#for i in range(highest_i + 1, highest_i + 31):  # Starts from the next available 'i'
#    ppo_model_non_corr_no_delay_test.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO_non_corr_no_delay_test", callback=callback)
#    model_filename = f"{models_dir}/{TIMESTEPS*i}.zip"
#    ppo_model_non_corr_no_delay_test.save(model_filename)

In [None]:
#model_filename = make_filename("models\PPO_non_corr_no_delay_test", TIMESTEPS)

In [None]:
#models_dir = "models\PPO_non_corr_no_delay_test"
#i = find_highest_i(models_dir, TIMESTEPS)
#print(i)
#print(TIMESTEPS)
#model_filename = f"{models_dir}/{TIMESTEPS*i}.zip"

#print(model_filename)

In [None]:
#models_dir = "models\PPO_non_corr_no_delay_test"
#highest_i = find_highest_i(models_dir, TIMESTEPS)
#callback = ScoreLoggerCallback()

#---------

#----------

# Wrap the environment with Monitor and DummyVecEnv
#env = c1_non_corrective_pred_no_delay_env
#env = Monitor(env)
#env = DummyVecEnv([lambda: env])
#ppo_model_non_corr_no_delay = PPO("MlpPolicy", env, verbose=1, tensorboard_log = logdir)
# Adjust the range based on the highest 'i' found
#i = highest_i + 1
#while True:
#    ppo_model_non_corr_no_delay_test.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="PPO_non_corr_no_delay_test2", callback=callback)
#    model_filename = f"{models_dir}/{TIMESTEPS*i}.zip"
#    
#    ppo_model_non_corr_no_delay_test.save(model_filename)
#    
    # Evaluate the model to check if the reward is greater than -4000
    # Assuming you have a function to evaluate the model and return its reward
#    reward, _ = evaluate_policy(ppo_model_non_corr_no_delay, c1_non_corrective_pred_no_delay_env, n_eval_episodes=2, render=False)
#    print(reward)
#    if reward > -4000:
#        break  # Exit the loop if reward exceeds -4000
#    i += 1

# A2C

### A2C Non Corrective No Delay

In [None]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "A2C", "A2C_model_non_corr_no_delay", "models/A2C_non_corr_no_delay", 1, 3600, True, a2c_model_non_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "A2C", "A2C_model_non_corr_no_delay", "models/A2C_non_corr_no_delay", 100, 3600, False,)    

### A2C Non Corrective With Delay

In [None]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "A2C", "A2C_model_non_corr_with_delay", "models/A2C_non_corr_with_delay", 1, 3600, True, a2c_model_non_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_non_corrective_pred_with_delay_env, "A2C", "A2C_model_non_corr_with_delay", "models/A2C_non_corr_with_delay", 1200, 8600, False,)    

### A2C Corrective No Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_no_delay_env, "A2C", "A2C_model_corr_no_delay", "models/A2C_corr_no_delay", 1, 3600, True, a2c_model_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "A2C", "A2C_model_corr_no_delay", "models/A2C_corr_no_delay", 100, 3600, False,)    

### A2C Corrective With Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_with_delay_env, "A2C", "A2C_model_corr_with_delay", "models/A2C_corr_with_delay", 1, 3600, True, a2c_model_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_with_delay_env, "A2C", "A2C_model_corr_with_delay", "models/A2C_corr_with_delay", 1000, 10600, False,)    

# **DDPG**

### DDPG Non Corrective No Delay

In [None]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "DDPG", "DDPG_model_non_corr_no_delay", "models/DDPG_non_corr_no_delay", 1, 3600, True, ddpg_model_non_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "DDPG", "DDPG_model_non_corr_no_delay", "models/DDPG_non_corr_no_delay", 100, 4600, False,)    

### DDPG Non Corrective With Delay

In [None]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_with_delay_env, "DDPG", "DDPG_model_non_corr_with_delay", "models/DDPG_non_corr_with_delay", 1, 3600, True, ddpg_model_non_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_non_corrective_pred_with_delay_env, "DDPG", "DDPG_model_non_corr_with_delay", "models/DDPG_non_corr_with_delay", 100, 4600, False,)    

### DDPG Corrective No Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_no_delay_env, "DDPG", "DDPG_model_corr_no_delay", "models/DDPG_corr_no_delay", 1, 3600, True, ddpg_model_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_no_delay_env, "DDPG", "DDPG_model_corr_no_delay", "models/DDPG_corr_no_delay", 1000, 10600, False,)    

### DDPG Corrective With Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_with_delay_env, "DDPG", "DDPG_model_corr_with_delay", "models/DDPG_corr_with_delay", 1, 3600, True, ddpg_model_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_with_delay_env, "DDPG", "DDPG_model_corr_with_delay", "models/DDPG_corr_with_delay", 1000, 4000, False,)    

# **SAC**

### SAC Non Corrective Νo Delay

In [73]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_no_delay_env, "SAC", "SAC_model_non_corr_no_delay", "models/SAC_non_corr_no_delay", 1, 3600, True, sac_model_non_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
run_model(c1_non_corrective_pred_no_delay_env, "SAC", "SAC_model_non_corr_no_delay", "models/SAC_non_corr_no_delay", 1000, 6600, False,)    

Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3600000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.2      |
|    ep_rew_mean     | -5.59e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 90920     |
|    fps             | 35        |
|    time_elapsed    | 2         |
|    total_timesteps | 3610099   |
| train/             |           |
|    actor_loss      | 1.66e+10  |
|    critic_loss     | 1.21e+15  |
|    ent_coef        | 5.86e+06  |
|    ent_coef_loss   | -1.98     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3609998   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.2      |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    ep

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.3      |
|    ep_rew_mean     | -5.31e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 90972     |
|    fps             | 37        |
|    time_elapsed    | 49        |
|    total_timesteps | 3611840   |
| train/             |           |
|    actor_loss      | 1.68e+10  |
|    critic_loss     | 1.53e+15  |
|    ent_coef        | 5.23e+06  |
|    ent_coef_loss   | 1.8       |
|    learning_rate   | 0.0003    |
|    n_updates       | 3611739   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.3      |
|    ep_rew_mean     | -5.29e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 90976     |
|    fps             | 37        |
|    time_elapsed    | 53        |
|    total_timesteps | 3611977   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -5.85e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91028     |
|    fps             | 37        |
|    time_elapsed    | 101       |
|    total_timesteps | 3613758   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 1e+15     |
|    ent_coef        | 5.19e+06  |
|    ent_coef_loss   | 0.57      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3613657   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91032     |
|    fps             | 37        |
|    time_elapsed    | 105       |
|    total_timesteps | 3613895   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.5      |
|    ep_rew_mean     | -5.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91084     |
|    fps             | 36        |
|    time_elapsed    | 153       |
|    total_timesteps | 3615692   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 1.13e+15  |
|    ent_coef        | 5.35e+06  |
|    ent_coef_loss   | 1.02      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3615591   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.5     |
|    ep_rew_mean     | -5.9e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 91088    |
|    fps             | 36       |
|    time_elapsed    | 157      |
|    total_timesteps | 3615831  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.5      |
|    ep_rew_mean     | -5.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91140     |
|    fps             | 36        |
|    time_elapsed    | 206       |
|    total_timesteps | 3617618   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 3.57e+15  |
|    ent_coef        | 5.28e+06  |
|    ent_coef_loss   | -3.78     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3617517   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.5      |
|    ep_rew_mean     | -5.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91144     |
|    fps             | 36        |
|    time_elapsed    | 210       |
|    total_timesteps | 3617756   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.4      |
|    ep_rew_mean     | -5.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91196     |
|    fps             | 36        |
|    time_elapsed    | 258       |
|    total_timesteps | 3619546   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 1.46e+15  |
|    ent_coef        | 5.2e+06   |
|    ent_coef_loss   | -5.02     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3619445   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.5      |
|    ep_rew_mean     | -5.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91200     |
|    fps             | 36        |
|    time_elapsed    | 261       |
|    total_timesteps | 3619683   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -6.02e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91248     |
|    fps             | 35        |
|    time_elapsed    | 36        |
|    total_timesteps | 3621298   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 1.16e+15  |
|    ent_coef        | 5.44e+06  |
|    ent_coef_loss   | 2.1       |
|    learning_rate   | 0.0003    |
|    n_updates       | 3621197   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.9      |
|    ep_rew_mean     | -5.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91252     |
|    fps             | 35        |
|    time_elapsed    | 39        |
|    total_timesteps | 3621427   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.9      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91304     |
|    fps             | 35        |
|    time_elapsed    | 87        |
|    total_timesteps | 3623111   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 3.1e+15   |
|    ent_coef        | 5.55e+06  |
|    ent_coef_loss   | -0.296    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3623010   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.7      |
|    ep_rew_mean     | -5.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91308     |
|    fps             | 35        |
|    time_elapsed    | 90        |
|    total_timesteps | 3623239   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32.1     |
|    ep_rew_mean     | -5.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 91360    |
|    fps             | 36       |
|    time_elapsed    | 135      |
|    total_timesteps | 3624899  |
| train/             |          |
|    actor_loss      | 1.72e+10 |
|    critic_loss     | 1.75e+15 |
|    ent_coef        | 5.56e+06 |
|    ent_coef_loss   | -1.48    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3624798  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.1      |
|    ep_rew_mean     | -5.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91364     |
|    fps             | 36        |
|    time_elapsed    | 138       |
|    total_timesteps | 3625030   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.3      |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91416     |
|    fps             | 36        |
|    time_elapsed    | 182       |
|    total_timesteps | 3626725   |
| train/             |           |
|    actor_loss      | 1.74e+10  |
|    critic_loss     | 3.55e+15  |
|    ent_coef        | 5.39e+06  |
|    ent_coef_loss   | 6.03      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3626624   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.3      |
|    ep_rew_mean     | -5.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91420     |
|    fps             | 36        |
|    time_elapsed    | 186       |
|    total_timesteps | 3626858   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.1      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91472     |
|    fps             | 36        |
|    time_elapsed    | 235       |
|    total_timesteps | 3628601   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 2.1e+15   |
|    ent_coef        | 5.19e+06  |
|    ent_coef_loss   | 0.478     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3628500   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.2      |
|    ep_rew_mean     | -5.89e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91476     |
|    fps             | 36        |
|    time_elapsed    | 239       |
|    total_timesteps | 3628736   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.8      |
|    ep_rew_mean     | -6.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91524     |
|    fps             | 37        |
|    time_elapsed    | 10        |
|    total_timesteps | 3630379   |
| train/             |           |
|    actor_loss      | 1.77e+10  |
|    critic_loss     | 2.48e+15  |
|    ent_coef        | 4.97e+06  |
|    ent_coef_loss   | 0.16      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3630278   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.9      |
|    ep_rew_mean     | -6.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91528     |
|    fps             | 37        |
|    time_elapsed    | 13        |
|    total_timesteps | 3630519   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -6.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91580     |
|    fps             | 36        |
|    time_elapsed    | 66        |
|    total_timesteps | 3632420   |
| train/             |           |
|    actor_loss      | 1.79e+10  |
|    critic_loss     | 5.06e+15  |
|    ent_coef        | 5.67e+06  |
|    ent_coef_loss   | -1.2      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3632319   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -6.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91584     |
|    fps             | 36        |
|    time_elapsed    | 70        |
|    total_timesteps | 3632570   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.5      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91636     |
|    fps             | 36        |
|    time_elapsed    | 123       |
|    total_timesteps | 3634556   |
| train/             |           |
|    actor_loss      | 1.79e+10  |
|    critic_loss     | 5.25e+15  |
|    ent_coef        | 5.95e+06  |
|    ent_coef_loss   | 1.93      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3634455   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.6      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91640     |
|    fps             | 36        |
|    time_elapsed    | 127       |
|    total_timesteps | 3634706   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38        |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91692     |
|    fps             | 36        |
|    time_elapsed    | 181       |
|    total_timesteps | 3636680   |
| train/             |           |
|    actor_loss      | 1.76e+10  |
|    critic_loss     | 5.92e+15  |
|    ent_coef        | 6.14e+06  |
|    ent_coef_loss   | -0.371    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3636579   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38        |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91696     |
|    fps             | 36        |
|    time_elapsed    | 185       |
|    total_timesteps | 3636831   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.9      |
|    ep_rew_mean     | -5.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91748     |
|    fps             | 36        |
|    time_elapsed    | 238       |
|    total_timesteps | 3638794   |
| train/             |           |
|    actor_loss      | 1.74e+10  |
|    critic_loss     | 4.67e+15  |
|    ent_coef        | 6.14e+06  |
|    ent_coef_loss   | -4.16     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3638693   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 37.8     |
|    ep_rew_mean     | -5.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 91752    |
|    fps             | 36       |
|    time_elapsed    | 242      |
|    total_timesteps | 3638946  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.6      |
|    ep_rew_mean     | -5.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91800     |
|    fps             | 36        |
|    time_elapsed    | 21        |
|    total_timesteps | 3640780   |
| train/             |           |
|    actor_loss      | 1.74e+10  |
|    critic_loss     | 1.02e+16  |
|    ent_coef        | 5.54e+06  |
|    ent_coef_loss   | 2.76      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3640679   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.5      |
|    ep_rew_mean     | -5.85e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91804     |
|    fps             | 36        |
|    time_elapsed    | 25        |
|    total_timesteps | 3640916   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -5.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91856     |
|    fps             | 36        |
|    time_elapsed    | 75        |
|    total_timesteps | 3642748   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 6.31e+15  |
|    ent_coef        | 5.65e+06  |
|    ent_coef_loss   | -2.92     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3642647   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.1      |
|    ep_rew_mean     | -5.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91860     |
|    fps             | 36        |
|    time_elapsed    | 79        |
|    total_timesteps | 3642891   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.91e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91912     |
|    fps             | 36        |
|    time_elapsed    | 129       |
|    total_timesteps | 3644756   |
| train/             |           |
|    actor_loss      | 1.7e+10   |
|    critic_loss     | 4.96e+15  |
|    ent_coef        | 5.71e+06  |
|    ent_coef_loss   | -4.28     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3644655   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.91e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91916     |
|    fps             | 36        |
|    time_elapsed    | 133       |
|    total_timesteps | 3644894   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91968     |
|    fps             | 36        |
|    time_elapsed    | 182       |
|    total_timesteps | 3646700   |
| train/             |           |
|    actor_loss      | 1.7e+10   |
|    critic_loss     | 4.3e+15   |
|    ent_coef        | 5.57e+06  |
|    ent_coef_loss   | 0.5       |
|    learning_rate   | 0.0003    |
|    n_updates       | 3646599   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 91972     |
|    fps             | 36        |
|    time_elapsed    | 186       |
|    total_timesteps | 3646838   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.4      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92024     |
|    fps             | 36        |
|    time_elapsed    | 234       |
|    total_timesteps | 3648616   |
| train/             |           |
|    actor_loss      | 1.68e+10  |
|    critic_loss     | 5.09e+15  |
|    ent_coef        | 5.43e+06  |
|    ent_coef_loss   | 0.12      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3648515   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.4      |
|    ep_rew_mean     | -5.17e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92028     |
|    fps             | 36        |
|    time_elapsed    | 238       |
|    total_timesteps | 3648754   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.2      |
|    ep_rew_mean     | -5.15e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92076     |
|    fps             | 36        |
|    time_elapsed    | 11        |
|    total_timesteps | 3650410   |
| train/             |           |
|    actor_loss      | 1.67e+10  |
|    critic_loss     | 2.12e+15  |
|    ent_coef        | 5.55e+06  |
|    ent_coef_loss   | 1.16      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3650309   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.3      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92080     |
|    fps             | 36        |
|    time_elapsed    | 14        |
|    total_timesteps | 3650547   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.1      |
|    ep_rew_mean     | -4.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92132     |
|    fps             | 36        |
|    time_elapsed    | 62        |
|    total_timesteps | 3652309   |
| train/             |           |
|    actor_loss      | 1.68e+10  |
|    critic_loss     | 2.14e+15  |
|    ent_coef        | 5.54e+06  |
|    ent_coef_loss   | -3.08     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3652208   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.1      |
|    ep_rew_mean     | -4.85e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92136     |
|    fps             | 36        |
|    time_elapsed    | 66        |
|    total_timesteps | 3652445   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.9      |
|    ep_rew_mean     | -4.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92188     |
|    fps             | 36        |
|    time_elapsed    | 114       |
|    total_timesteps | 3654210   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 2.7e+15   |
|    ent_coef        | 5.92e+06  |
|    ent_coef_loss   | -0.882    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3654109   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -4.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92192     |
|    fps             | 36        |
|    time_elapsed    | 117       |
|    total_timesteps | 3654347   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.8      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92244     |
|    fps             | 36        |
|    time_elapsed    | 164       |
|    total_timesteps | 3656100   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 2.77e+15  |
|    ent_coef        | 5.71e+06  |
|    ent_coef_loss   | 2.9       |
|    learning_rate   | 0.0003    |
|    n_updates       | 3655999   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.8      |
|    ep_rew_mean     | -5.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92248     |
|    fps             | 36        |
|    time_elapsed    | 168       |
|    total_timesteps | 3656235   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -5.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92300     |
|    fps             | 36        |
|    time_elapsed    | 217       |
|    total_timesteps | 3658010   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 2.55e+15  |
|    ent_coef        | 5.36e+06  |
|    ent_coef_loss   | -0.561    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3657909   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34       |
|    ep_rew_mean     | -5.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 92304    |
|    fps             | 36       |
|    time_elapsed    | 221      |
|    total_timesteps | 3658146  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.2      |
|    ep_rew_mean     | -4.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92356     |
|    fps             | 36        |
|    time_elapsed    | 269       |
|    total_timesteps | 3659929   |
| train/             |           |
|    actor_loss      | 1.7e+10   |
|    critic_loss     | 1.62e+15  |
|    ent_coef        | 5.21e+06  |
|    ent_coef_loss   | -1.18     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3659828   |
----------------------------------
-369483949.629675
diff_timesteps is  5
Elasped time is (sec) 1362.8494641780853
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3650000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.2      |
|    ep_rew_mean     | -4.75e+08

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.6      |
|    ep_rew_mean     | -5.16e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92408     |
|    fps             | 37        |
|    time_elapsed    | 47        |
|    total_timesteps | 3661746   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 1.56e+15  |
|    ent_coef        | 5.08e+06  |
|    ent_coef_loss   | -4.48     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3661645   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.6      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92412     |
|    fps             | 37        |
|    time_elapsed    | 50        |
|    total_timesteps | 3661886   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.8      |
|    ep_rew_mean     | -5.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92464     |
|    fps             | 36        |
|    time_elapsed    | 102       |
|    total_timesteps | 3663782   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 3.07e+15  |
|    ent_coef        | 5.12e+06  |
|    ent_coef_loss   | 0.625     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3663681   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.8      |
|    ep_rew_mean     | -5.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92468     |
|    fps             | 36        |
|    time_elapsed    | 106       |
|    total_timesteps | 3663931   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.9      |
|    ep_rew_mean     | -6.31e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92520     |
|    fps             | 36        |
|    time_elapsed    | 158       |
|    total_timesteps | 3665862   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 2.31e+15  |
|    ent_coef        | 4.92e+06  |
|    ent_coef_loss   | -1.9      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3665761   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37        |
|    ep_rew_mean     | -6.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92524     |
|    fps             | 36        |
|    time_elapsed    | 162       |
|    total_timesteps | 3666012   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.4      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92576     |
|    fps             | 37        |
|    time_elapsed    | 215       |
|    total_timesteps | 3667972   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 1.55e+15  |
|    ent_coef        | 4.96e+06  |
|    ent_coef_loss   | 1.39      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3667871   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.5      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92580     |
|    fps             | 37        |
|    time_elapsed    | 219       |
|    total_timesteps | 3668122   |
| train/            

-646250677.545418
diff_timesteps is  6
Elasped time is (sec) 1632.4474201202393
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3660000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.6      |
|    ep_rew_mean     | -5.82e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92632     |
|    fps             | 38        |
|    time_elapsed    | 1         |
|    total_timesteps | 3670076   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 1.1e+15   |
|    ent_coef        | 5.23e+06  |
|    ent_coef_loss   | 1.54      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3669975   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.5      |
|    ep_rew_mean     | -5.84e+08

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.9      |
|    ep_rew_mean     | -6.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92684     |
|    fps             | 37        |
|    time_elapsed    | 52        |
|    total_timesteps | 3671964   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 3.26e+15  |
|    ent_coef        | 5.1e+06   |
|    ent_coef_loss   | 1.14      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3671863   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.8      |
|    ep_rew_mean     | -6.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92688     |
|    fps             | 37        |
|    time_elapsed    | 56        |
|    total_timesteps | 3672110   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92740     |
|    fps             | 37        |
|    time_elapsed    | 106       |
|    total_timesteps | 3673964   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 3.15e+15  |
|    ent_coef        | 4.98e+06  |
|    ent_coef_loss   | 3.18      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3673863   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.9      |
|    ep_rew_mean     | -6.59e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92744     |
|    fps             | 37        |
|    time_elapsed    | 110       |
|    total_timesteps | 3674106   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -5.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92796     |
|    fps             | 37        |
|    time_elapsed    | 159       |
|    total_timesteps | 3675934   |
| train/             |           |
|    actor_loss      | 1.7e+10   |
|    critic_loss     | 3.73e+15  |
|    ent_coef        | 4.98e+06  |
|    ent_coef_loss   | -0.0948   |
|    learning_rate   | 0.0003    |
|    n_updates       | 3675833   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -5.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92800     |
|    fps             | 37        |
|    time_elapsed    | 163       |
|    total_timesteps | 3676074   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -5.09e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92852     |
|    fps             | 37        |
|    time_elapsed    | 212       |
|    total_timesteps | 3677894   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 2.62e+15  |
|    ent_coef        | 4.86e+06  |
|    ent_coef_loss   | 1.7       |
|    learning_rate   | 0.0003    |
|    n_updates       | 3677793   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -5.07e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92856     |
|    fps             | 37        |
|    time_elapsed    | 216       |
|    total_timesteps | 3678035   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92908     |
|    fps             | 37        |
|    time_elapsed    | 266       |
|    total_timesteps | 3679877   |
| train/             |           |
|    actor_loss      | 1.72e+10  |
|    critic_loss     | 2.83e+15  |
|    ent_coef        | 4.96e+06  |
|    ent_coef_loss   | 1.27      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3679776   |
----------------------------------
-372177772.643721
diff_timesteps is  7
Elasped time is (sec) 1902.6696991920471
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3670000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.11e+08

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92960     |
|    fps             | 36        |
|    time_elapsed    | 47        |
|    total_timesteps | 3681752   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 1.81e+15  |
|    ent_coef        | 5.53e+06  |
|    ent_coef_loss   | -0.0358   |
|    learning_rate   | 0.0003    |
|    n_updates       | 3681651   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 92964     |
|    fps             | 36        |
|    time_elapsed    | 51        |
|    total_timesteps | 3681900   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.5      |
|    ep_rew_mean     | -6.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93016     |
|    fps             | 36        |
|    time_elapsed    | 104       |
|    total_timesteps | 3683826   |
| train/             |           |
|    actor_loss      | 1.75e+10  |
|    critic_loss     | 4.56e+15  |
|    ent_coef        | 5.53e+06  |
|    ent_coef_loss   | -0.0734   |
|    learning_rate   | 0.0003    |
|    n_updates       | 3683725   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.6      |
|    ep_rew_mean     | -6.05e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93020     |
|    fps             | 36        |
|    time_elapsed    | 108       |
|    total_timesteps | 3683974   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.1      |
|    ep_rew_mean     | -5.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93072     |
|    fps             | 36        |
|    time_elapsed    | 161       |
|    total_timesteps | 3685910   |
| train/             |           |
|    actor_loss      | 1.74e+10  |
|    critic_loss     | 1.51e+15  |
|    ent_coef        | 5.63e+06  |
|    ent_coef_loss   | -2.07     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3685809   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.1      |
|    ep_rew_mean     | -5.74e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93076     |
|    fps             | 36        |
|    time_elapsed    | 165       |
|    total_timesteps | 3686059   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.31e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93128     |
|    fps             | 36        |
|    time_elapsed    | 218       |
|    total_timesteps | 3688001   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 4.42e+15  |
|    ent_coef        | 5.53e+06  |
|    ent_coef_loss   | -2.55     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3687900   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.31e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93132     |
|    fps             | 36        |
|    time_elapsed    | 222       |
|    total_timesteps | 3688149   |
| train/            

-451375026.032652
diff_timesteps is  8
Elasped time is (sec) 2176.1309039592743
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3680000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.19e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93184     |
|    fps             | 37        |
|    time_elapsed    | 3         |
|    total_timesteps | 3690113   |
| train/             |           |
|    actor_loss      | 1.75e+10  |
|    critic_loss     | 9.47e+14  |
|    ent_coef        | 5.52e+06  |
|    ent_coef_loss   | 1.67      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3690012   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.17e+08

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.19e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93236     |
|    fps             | 37        |
|    time_elapsed    | 54        |
|    total_timesteps | 3692051   |
| train/             |           |
|    actor_loss      | 1.76e+10  |
|    critic_loss     | 1.21e+15  |
|    ent_coef        | 5.29e+06  |
|    ent_coef_loss   | 0.353     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3691950   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93240     |
|    fps             | 37        |
|    time_elapsed    | 59        |
|    total_timesteps | 3692202   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -5.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93292     |
|    fps             | 37        |
|    time_elapsed    | 111       |
|    total_timesteps | 3694132   |
| train/             |           |
|    actor_loss      | 1.76e+10  |
|    critic_loss     | 1.05e+15  |
|    ent_coef        | 4.97e+06  |
|    ent_coef_loss   | -1.34     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3694031   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93296     |
|    fps             | 37        |
|    time_elapsed    | 115       |
|    total_timesteps | 3694280   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -5.15e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93348     |
|    fps             | 37        |
|    time_elapsed    | 168       |
|    total_timesteps | 3696217   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 1.11e+15  |
|    ent_coef        | 4.77e+06  |
|    ent_coef_loss   | 2.1       |
|    learning_rate   | 0.0003    |
|    n_updates       | 3696116   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -5.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93352     |
|    fps             | 36        |
|    time_elapsed    | 172       |
|    total_timesteps | 3696366   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 37.2     |
|    ep_rew_mean     | -5e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 93404    |
|    fps             | 36       |
|    time_elapsed    | 224      |
|    total_timesteps | 3698301  |
| train/             |          |
|    actor_loss      | 1.71e+10 |
|    critic_loss     | 9.31e+14 |
|    ent_coef        | 4.62e+06 |
|    ent_coef_loss   | 0.628    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3698200  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -4.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93408     |
|    fps             | 36        |
|    time_elapsed    | 228       |
|    total_timesteps | 3698450   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.4      |
|    ep_rew_mean     | -5.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93456     |
|    fps             | 37        |
|    time_elapsed    | 7         |
|    total_timesteps | 3700268   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 4.74e+15  |
|    ent_coef        | 4.57e+06  |
|    ent_coef_loss   | -2.43     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3700167   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.5      |
|    ep_rew_mean     | -5.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93460     |
|    fps             | 37        |
|    time_elapsed    | 11        |
|    total_timesteps | 3700421   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 38.1     |
|    ep_rew_mean     | -6e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 93512    |
|    fps             | 36       |
|    time_elapsed    | 65       |
|    total_timesteps | 3702422  |
| train/             |          |
|    actor_loss      | 1.71e+10 |
|    critic_loss     | 1.75e+16 |
|    ent_coef        | 4.63e+06 |
|    ent_coef_loss   | -1.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 3702321  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.2      |
|    ep_rew_mean     | -6.05e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93516     |
|    fps             | 36        |
|    time_elapsed    | 69        |
|    total_timesteps | 3702576   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.5      |
|    ep_rew_mean     | -6.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93568     |
|    fps             | 36        |
|    time_elapsed    | 124       |
|    total_timesteps | 3704579   |
| train/             |           |
|    actor_loss      | 1.7e+10   |
|    critic_loss     | 1.65e+16  |
|    ent_coef        | 4.43e+06  |
|    ent_coef_loss   | 1.68      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3704478   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.5      |
|    ep_rew_mean     | -6.16e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93572     |
|    fps             | 36        |
|    time_elapsed    | 128       |
|    total_timesteps | 3704731   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.4      |
|    ep_rew_mean     | -5.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93624     |
|    fps             | 36        |
|    time_elapsed    | 183       |
|    total_timesteps | 3706723   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 4.91e+15  |
|    ent_coef        | 4.53e+06  |
|    ent_coef_loss   | -3.28     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3706622   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.4      |
|    ep_rew_mean     | -6.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93628     |
|    fps             | 36        |
|    time_elapsed    | 187       |
|    total_timesteps | 3706877   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.4      |
|    ep_rew_mean     | -6.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93680     |
|    fps             | 36        |
|    time_elapsed    | 241       |
|    total_timesteps | 3708881   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 4.91e+15  |
|    ent_coef        | 4.67e+06  |
|    ent_coef_loss   | -0.831    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3708780   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.5      |
|    ep_rew_mean     | -6.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93684     |
|    fps             | 36        |
|    time_elapsed    | 246       |
|    total_timesteps | 3709036   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.7      |
|    ep_rew_mean     | -6.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93732     |
|    fps             | 36        |
|    time_elapsed    | 25        |
|    total_timesteps | 3710933   |
| train/             |           |
|    actor_loss      | 1.68e+10  |
|    critic_loss     | 7.74e+15  |
|    ent_coef        | 4.9e+06   |
|    ent_coef_loss   | -2.89     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3710832   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.8      |
|    ep_rew_mean     | -6.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93736     |
|    fps             | 36        |
|    time_elapsed    | 29        |
|    total_timesteps | 3711092   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 39.2      |
|    ep_rew_mean     | -6.31e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93788     |
|    fps             | 36        |
|    time_elapsed    | 85        |
|    total_timesteps | 3713145   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 2.08e+16  |
|    ent_coef        | 4.7e+06   |
|    ent_coef_loss   | -0.821    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3713044   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 39.2     |
|    ep_rew_mean     | -6.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 93792    |
|    fps             | 36       |
|    time_elapsed    | 89       |
|    total_timesteps | 3713301  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 39.5      |
|    ep_rew_mean     | -6.28e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93844     |
|    fps             | 36        |
|    time_elapsed    | 145       |
|    total_timesteps | 3715356   |
| train/             |           |
|    actor_loss      | 1.68e+10  |
|    critic_loss     | 8.26e+15  |
|    ent_coef        | 4.82e+06  |
|    ent_coef_loss   | -2.83     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3715255   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 39.6      |
|    ep_rew_mean     | -6.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93848     |
|    fps             | 36        |
|    time_elapsed    | 149       |
|    total_timesteps | 3715519   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 40.1      |
|    ep_rew_mean     | -6.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93900     |
|    fps             | 36        |
|    time_elapsed    | 206       |
|    total_timesteps | 3717626   |
| train/             |           |
|    actor_loss      | 1.69e+10  |
|    critic_loss     | 7.24e+15  |
|    ent_coef        | 4.8e+06   |
|    ent_coef_loss   | -0.369    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3717525   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 40.1      |
|    ep_rew_mean     | -7.03e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93904     |
|    fps             | 36        |
|    time_elapsed    | 210       |
|    total_timesteps | 3717786   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 40.3      |
|    ep_rew_mean     | -7.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 93956     |
|    fps             | 36        |
|    time_elapsed    | 267       |
|    total_timesteps | 3719862   |
| train/             |           |
|    actor_loss      | 1.71e+10  |
|    critic_loss     | 5.83e+15  |
|    ent_coef        | 4.87e+06  |
|    ent_coef_loss   | 3.74      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3719761   |
----------------------------------
-713870397.939969
diff_timesteps is  11
Elasped time is (sec) 2990.724313735962
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3710000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 40.2      |
|    ep_rew_mean     | -7.43e+08

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 39        |
|    ep_rew_mean     | -6.26e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94008     |
|    fps             | 36        |
|    time_elapsed    | 50        |
|    total_timesteps | 3721868   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 8.28e+15  |
|    ent_coef        | 5.22e+06  |
|    ent_coef_loss   | 0.12      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3721767   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.9      |
|    ep_rew_mean     | -6.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94012     |
|    fps             | 36        |
|    time_elapsed    | 54        |
|    total_timesteps | 3722017   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.6      |
|    ep_rew_mean     | -5.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94064     |
|    fps             | 36        |
|    time_elapsed    | 107       |
|    total_timesteps | 3723951   |
| train/             |           |
|    actor_loss      | 1.74e+10  |
|    critic_loss     | 6.32e+15  |
|    ent_coef        | 5.32e+06  |
|    ent_coef_loss   | -1.58     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3723850   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.5      |
|    ep_rew_mean     | -5.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94068     |
|    fps             | 36        |
|    time_elapsed    | 111       |
|    total_timesteps | 3724099   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.8      |
|    ep_rew_mean     | -6.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94120     |
|    fps             | 36        |
|    time_elapsed    | 163       |
|    total_timesteps | 3726000   |
| train/             |           |
|    actor_loss      | 1.72e+10  |
|    critic_loss     | 3.42e+15  |
|    ent_coef        | 5.33e+06  |
|    ent_coef_loss   | -2.38     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3725899   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.8      |
|    ep_rew_mean     | -7.09e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94124     |
|    fps             | 36        |
|    time_elapsed    | 167       |
|    total_timesteps | 3726146   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -7.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94176     |
|    fps             | 36        |
|    time_elapsed    | 217       |
|    total_timesteps | 3727998   |
| train/             |           |
|    actor_loss      | 1.73e+10  |
|    critic_loss     | 4.01e+15  |
|    ent_coef        | 5.33e+06  |
|    ent_coef_loss   | 0.408     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3727897   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -7.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94180     |
|    fps             | 36        |
|    time_elapsed    | 220       |
|    total_timesteps | 3728137   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -6.16e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94232     |
|    fps             | 36        |
|    time_elapsed    | 269       |
|    total_timesteps | 3729935   |
| train/             |           |
|    actor_loss      | 1.74e+10  |
|    critic_loss     | 3.7e+15   |
|    ent_coef        | 5.29e+06  |
|    ent_coef_loss   | 4.04      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3729834   |
----------------------------------
-494053124.495784
diff_timesteps is  12
Elasped time is (sec) 3261.959039926529
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_no_delay/3720000.zip
Logging to logs\SAC_model_non_corr_no_delay_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.9     |
|    ep_rew_mean     | -6.1e+08 |
|

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.3      |
|    ep_rew_mean     | -6.19e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94284     |
|    fps             | 37        |
|    time_elapsed    | 46        |
|    total_timesteps | 3731738   |
| train/             |           |
|    actor_loss      | 1.75e+10  |
|    critic_loss     | 3.74e+15  |
|    ent_coef        | 5.47e+06  |
|    ent_coef_loss   | -0.282    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3731637   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.3      |
|    ep_rew_mean     | -6.28e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94288     |
|    fps             | 37        |
|    time_elapsed    | 50        |
|    total_timesteps | 3731874   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -6.71e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94340     |
|    fps             | 37        |
|    time_elapsed    | 97        |
|    total_timesteps | 3733640   |
| train/             |           |
|    actor_loss      | 1.78e+10  |
|    critic_loss     | 8.2e+15   |
|    ent_coef        | 5.74e+06  |
|    ent_coef_loss   | 1.11      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3733539   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -6.76e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94344     |
|    fps             | 37        |
|    time_elapsed    | 101       |
|    total_timesteps | 3733777   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -6.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94396     |
|    fps             | 36        |
|    time_elapsed    | 149       |
|    total_timesteps | 3735548   |
| train/             |           |
|    actor_loss      | 1.8e+10   |
|    critic_loss     | 6.84e+15  |
|    ent_coef        | 5.91e+06  |
|    ent_coef_loss   | -3.69     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3735447   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34        |
|    ep_rew_mean     | -6.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94400     |
|    fps             | 37        |
|    time_elapsed    | 153       |
|    total_timesteps | 3735689   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.6     |
|    ep_rew_mean     | -6.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 94452    |
|    fps             | 36       |
|    time_elapsed    | 202      |
|    total_timesteps | 3737505  |
| train/             |          |
|    actor_loss      | 1.85e+10 |
|    critic_loss     | 5.99e+15 |
|    ent_coef        | 6.02e+06 |
|    ent_coef_loss   | 2.08     |
|    learning_rate   | 0.0003   |
|    n_updates       | 3737404  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.6      |
|    ep_rew_mean     | -6.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94456     |
|    fps             | 36        |
|    time_elapsed    | 206       |
|    total_timesteps | 3737646   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -7.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94508     |
|    fps             | 36        |
|    time_elapsed    | 258       |
|    total_timesteps | 3739530   |
| train/             |           |
|    actor_loss      | 1.87e+10  |
|    critic_loss     | 4.67e+15  |
|    ent_coef        | 6.03e+06  |
|    ent_coef_loss   | -0.886    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3739429   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -7.12e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94512     |
|    fps             | 36        |
|    time_elapsed    | 261       |
|    total_timesteps | 3739673   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.1      |
|    ep_rew_mean     | -8.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94560     |
|    fps             | 36        |
|    time_elapsed    | 44        |
|    total_timesteps | 3741633   |
| train/             |           |
|    actor_loss      | 1.95e+10  |
|    critic_loss     | 3.91e+16  |
|    ent_coef        | 5.66e+06  |
|    ent_coef_loss   | 0.654     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3741532   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.4      |
|    ep_rew_mean     | -8.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94564     |
|    fps             | 36        |
|    time_elapsed    | 48        |
|    total_timesteps | 3741802   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 42        |
|    ep_rew_mean     | -8.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94616     |
|    fps             | 29        |
|    time_elapsed    | 138       |
|    total_timesteps | 3744049   |
| train/             |           |
|    actor_loss      | 2e+10     |
|    critic_loss     | 3.28e+16  |
|    ent_coef        | 6.44e+06  |
|    ent_coef_loss   | -0.487    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3743948   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 42.3      |
|    ep_rew_mean     | -8.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94620     |
|    fps             | 28        |
|    time_elapsed    | 147       |
|    total_timesteps | 3744227   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 46.5      |
|    ep_rew_mean     | -7.92e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94672     |
|    fps             | 23        |
|    time_elapsed    | 284       |
|    total_timesteps | 3746793   |
| train/             |           |
|    actor_loss      | 2.05e+10  |
|    critic_loss     | 4.59e+16  |
|    ent_coef        | 6.91e+06  |
|    ent_coef_loss   | 0.0958    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3746692   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 47        |
|    ep_rew_mean     | -8.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94676     |
|    fps             | 23        |
|    time_elapsed    | 297       |
|    total_timesteps | 3747008   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 50.9      |
|    ep_rew_mean     | -1.14e+09 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94728     |
|    fps             | 23        |
|    time_elapsed    | 415       |
|    total_timesteps | 3749679   |
| train/             |           |
|    actor_loss      | 2.1e+10   |
|    critic_loss     | 3.68e+16  |
|    ent_coef        | 6.29e+06  |
|    ent_coef_loss   | 2.52      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3749578   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 50.8      |
|    ep_rew_mean     | -1.16e+09 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94732     |
|    fps             | 23        |
|    time_elapsed    | 420       |
|    total_timesteps | 3749859   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 45.1      |
|    ep_rew_mean     | -1.02e+09 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94780     |
|    fps             | 30        |
|    time_elapsed    | 57        |
|    total_timesteps | 3751745   |
| train/             |           |
|    actor_loss      | 2.03e+10  |
|    critic_loss     | 1.27e+16  |
|    ent_coef        | 6.84e+06  |
|    ent_coef_loss   | 0.724     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3751644   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 44.4      |
|    ep_rew_mean     | -1.01e+09 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94784     |
|    fps             | 30        |
|    time_elapsed    | 62        |
|    total_timesteps | 3751899   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.8      |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94836     |
|    fps             | 30        |
|    time_elapsed    | 127       |
|    total_timesteps | 3753918   |
| train/             |           |
|    actor_loss      | 2.02e+10  |
|    critic_loss     | 1.15e+16  |
|    ent_coef        | 7.08e+06  |
|    ent_coef_loss   | -2.29     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3753817   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.7      |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94840     |
|    fps             | 30        |
|    time_elapsed    | 132       |
|    total_timesteps | 3754072   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.6      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94892     |
|    fps             | 30        |
|    time_elapsed    | 197       |
|    total_timesteps | 3756066   |
| train/             |           |
|    actor_loss      | 2.07e+10  |
|    critic_loss     | 1.45e+16  |
|    ent_coef        | 7.18e+06  |
|    ent_coef_loss   | -3.35     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3755965   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38.5      |
|    ep_rew_mean     | -5.52e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94896     |
|    fps             | 30        |
|    time_elapsed    | 201       |
|    total_timesteps | 3756218   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 38        |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94948     |
|    fps             | 31        |
|    time_elapsed    | 262       |
|    total_timesteps | 3758181   |
| train/             |           |
|    actor_loss      | 2.05e+10  |
|    critic_loss     | 4.45e+15  |
|    ent_coef        | 7.35e+06  |
|    ent_coef_loss   | -0.962    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3758080   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.9      |
|    ep_rew_mean     | -5.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 94952     |
|    fps             | 31        |
|    time_elapsed    | 267       |
|    total_timesteps | 3758334   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.6      |
|    ep_rew_mean     | -5.32e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95000     |
|    fps             | 33        |
|    time_elapsed    | 4         |
|    total_timesteps | 3760149   |
| train/             |           |
|    actor_loss      | 2.06e+10  |
|    critic_loss     | 4.1e+15   |
|    ent_coef        | 7.49e+06  |
|    ent_coef_loss   | 2.64      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3760048   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.6      |
|    ep_rew_mean     | -5.27e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95004     |
|    fps             | 33        |
|    time_elapsed    | 8         |
|    total_timesteps | 3760299   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.4      |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95056     |
|    fps             | 32        |
|    time_elapsed    | 69        |
|    total_timesteps | 3762241   |
| train/             |           |
|    actor_loss      | 2.06e+10  |
|    critic_loss     | 3.74e+15  |
|    ent_coef        | 6.73e+06  |
|    ent_coef_loss   | -2.68     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3762140   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.4      |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95060     |
|    fps             | 32        |
|    time_elapsed    | 74        |
|    total_timesteps | 3762391   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 37.3     |
|    ep_rew_mean     | -5.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 95112    |
|    fps             | 32       |
|    time_elapsed    | 135      |
|    total_timesteps | 3764328  |
| train/             |          |
|    actor_loss      | 2.06e+10 |
|    critic_loss     | 4.19e+15 |
|    ent_coef        | 6.46e+06 |
|    ent_coef_loss   | 1.59     |
|    learning_rate   | 0.0003   |
|    n_updates       | 3764227  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95116     |
|    fps             | 32        |
|    time_elapsed    | 139       |
|    total_timesteps | 3764476   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95168     |
|    fps             | 31        |
|    time_elapsed    | 200       |
|    total_timesteps | 3766414   |
| train/             |           |
|    actor_loss      | 2.06e+10  |
|    critic_loss     | 3.12e+15  |
|    ent_coef        | 6.41e+06  |
|    ent_coef_loss   | -2.95     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3766313   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.52e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95172     |
|    fps             | 31        |
|    time_elapsed    | 205       |
|    total_timesteps | 3766563   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.44e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95224     |
|    fps             | 31        |
|    time_elapsed    | 266       |
|    total_timesteps | 3768502   |
| train/             |           |
|    actor_loss      | 2.03e+10  |
|    critic_loss     | 2.49e+15  |
|    ent_coef        | 6.49e+06  |
|    ent_coef_loss   | -1.4      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3768401   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.3      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95228     |
|    fps             | 31        |
|    time_elapsed    | 270       |
|    total_timesteps | 3768651   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95276     |
|    fps             | 31        |
|    time_elapsed    | 14        |
|    total_timesteps | 3770444   |
| train/             |           |
|    actor_loss      | 2.05e+10  |
|    critic_loss     | 2.07e+15  |
|    ent_coef        | 6.97e+06  |
|    ent_coef_loss   | -2.26     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3770343   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37.2      |
|    ep_rew_mean     | -5.72e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95280     |
|    fps             | 31        |
|    time_elapsed    | 18        |
|    total_timesteps | 3770591   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37        |
|    ep_rew_mean     | -6.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95332     |
|    fps             | 32        |
|    time_elapsed    | 78        |
|    total_timesteps | 3772507   |
| train/             |           |
|    actor_loss      | 2.07e+10  |
|    critic_loss     | 2.4e+15   |
|    ent_coef        | 6.62e+06  |
|    ent_coef_loss   | -0.237    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3772406   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 37        |
|    ep_rew_mean     | -6.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95336     |
|    fps             | 32        |
|    time_elapsed    | 82        |
|    total_timesteps | 3772653   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.6      |
|    ep_rew_mean     | -7.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95388     |
|    fps             | 32        |
|    time_elapsed    | 141       |
|    total_timesteps | 3774547   |
| train/             |           |
|    actor_loss      | 2.08e+10  |
|    critic_loss     | 3.16e+15  |
|    ent_coef        | 6.68e+06  |
|    ent_coef_loss   | 1.14      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3774446   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.6      |
|    ep_rew_mean     | -7.12e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95392     |
|    fps             | 32        |
|    time_elapsed    | 145       |
|    total_timesteps | 3774691   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.3      |
|    ep_rew_mean     | -6.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95444     |
|    fps             | 31        |
|    time_elapsed    | 206       |
|    total_timesteps | 3776579   |
| train/             |           |
|    actor_loss      | 2.08e+10  |
|    critic_loss     | 4.1e+15   |
|    ent_coef        | 6.59e+06  |
|    ent_coef_loss   | 1.82      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3776478   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.3      |
|    ep_rew_mean     | -6.81e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95448     |
|    fps             | 31        |
|    time_elapsed    | 210       |
|    total_timesteps | 3776723   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.3      |
|    ep_rew_mean     | -6.83e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95500     |
|    fps             | 31        |
|    time_elapsed    | 270       |
|    total_timesteps | 3778610   |
| train/             |           |
|    actor_loss      | 2.1e+10   |
|    critic_loss     | 5.91e+15  |
|    ent_coef        | 6.52e+06  |
|    ent_coef_loss   | -2.13     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3778509   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.3      |
|    ep_rew_mean     | -6.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95504     |
|    fps             | 31        |
|    time_elapsed    | 274       |
|    total_timesteps | 3778755   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 36.3     |
|    ep_rew_mean     | -6.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 95552    |
|    fps             | 32       |
|    time_elapsed    | 15       |
|    total_timesteps | 3780506  |
| train/             |          |
|    actor_loss      | 2.11e+10 |
|    critic_loss     | 4.59e+15 |
|    ent_coef        | 6.52e+06 |
|    ent_coef_loss   | 1.76     |
|    learning_rate   | 0.0003   |
|    n_updates       | 3780405  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -6.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95556     |
|    fps             | 32        |
|    time_elapsed    | 20        |
|    total_timesteps | 3780650   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95608     |
|    fps             | 32        |
|    time_elapsed    | 77        |
|    total_timesteps | 3782511   |
| train/             |           |
|    actor_loss      | 2.13e+10  |
|    critic_loss     | 4.91e+15  |
|    ent_coef        | 6.6e+06   |
|    ent_coef_loss   | -3.01     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3782410   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95612     |
|    fps             | 32        |
|    time_elapsed    | 82        |
|    total_timesteps | 3782655   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95664     |
|    fps             | 32        |
|    time_elapsed    | 136       |
|    total_timesteps | 3784509   |
| train/             |           |
|    actor_loss      | 2.17e+10  |
|    critic_loss     | 5.55e+15  |
|    ent_coef        | 6.68e+06  |
|    ent_coef_loss   | 1.22      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3784408   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95668     |
|    fps             | 32        |
|    time_elapsed    | 141       |
|    total_timesteps | 3784652   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95720     |
|    fps             | 32        |
|    time_elapsed    | 199       |
|    total_timesteps | 3786507   |
| train/             |           |
|    actor_loss      | 2.2e+10   |
|    critic_loss     | 7.31e+15  |
|    ent_coef        | 6.88e+06  |
|    ent_coef_loss   | 2.83      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3786406   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -6.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95724     |
|    fps             | 32        |
|    time_elapsed    | 204       |
|    total_timesteps | 3786645   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.36e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95776     |
|    fps             | 32        |
|    time_elapsed    | 262       |
|    total_timesteps | 3788509   |
| train/             |           |
|    actor_loss      | 2.2e+10   |
|    critic_loss     | 7.76e+15  |
|    ent_coef        | 7.31e+06  |
|    ent_coef_loss   | -1.36     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3788408   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95780     |
|    fps             | 32        |
|    time_elapsed    | 266       |
|    total_timesteps | 3788654   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.9      |
|    ep_rew_mean     | -6.19e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95828     |
|    fps             | 31        |
|    time_elapsed    | 12        |
|    total_timesteps | 3790396   |
| train/             |           |
|    actor_loss      | 2.25e+10  |
|    critic_loss     | 5.29e+15  |
|    ent_coef        | 7.92e+06  |
|    ent_coef_loss   | -4.41     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3790295   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.23e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95832     |
|    fps             | 32        |
|    time_elapsed    | 16        |
|    total_timesteps | 3790542   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.4      |
|    ep_rew_mean     | -6.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95884     |
|    fps             | 32        |
|    time_elapsed    | 75        |
|    total_timesteps | 3792452   |
| train/             |           |
|    actor_loss      | 2.26e+10  |
|    critic_loss     | 6.76e+15  |
|    ent_coef        | 8e+06     |
|    ent_coef_loss   | 1.42      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3792351   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.4      |
|    ep_rew_mean     | -6.28e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95888     |
|    fps             | 32        |
|    time_elapsed    | 79        |
|    total_timesteps | 3792597   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.6      |
|    ep_rew_mean     | -6.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95940     |
|    fps             | 32        |
|    time_elapsed    | 137       |
|    total_timesteps | 3794501   |
| train/             |           |
|    actor_loss      | 2.31e+10  |
|    critic_loss     | 7.34e+15  |
|    ent_coef        | 8.06e+06  |
|    ent_coef_loss   | 0.584     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3794400   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.7      |
|    ep_rew_mean     | -6.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95944     |
|    fps             | 32        |
|    time_elapsed    | 142       |
|    total_timesteps | 3794649   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.4      |
|    ep_rew_mean     | -6.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 95996     |
|    fps             | 32        |
|    time_elapsed    | 198       |
|    total_timesteps | 3796533   |
| train/             |           |
|    actor_loss      | 2.34e+10  |
|    critic_loss     | 5.21e+15  |
|    ent_coef        | 8.05e+06  |
|    ent_coef_loss   | 1.19      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3796432   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.4      |
|    ep_rew_mean     | -6.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96000     |
|    fps             | 32        |
|    time_elapsed    | 202       |
|    total_timesteps | 3796679   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -5.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96052     |
|    fps             | 32        |
|    time_elapsed    | 260       |
|    total_timesteps | 3798560   |
| train/             |           |
|    actor_loss      | 2.34e+10  |
|    critic_loss     | 3.71e+15  |
|    ent_coef        | 7.9e+06   |
|    ent_coef_loss   | 1.55      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3798459   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -5.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96056     |
|    fps             | 32        |
|    time_elapsed    | 265       |
|    total_timesteps | 3798705   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -6.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96104     |
|    fps             | 32        |
|    time_elapsed    | 14        |
|    total_timesteps | 3800472   |
| train/             |           |
|    actor_loss      | 2.38e+10  |
|    critic_loss     | 4.48e+15  |
|    ent_coef        | 8.08e+06  |
|    ent_coef_loss   | 0.677     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3800371   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -6.04e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96108     |
|    fps             | 31        |
|    time_elapsed    | 19        |
|    total_timesteps | 3800617   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.1      |
|    ep_rew_mean     | -5.91e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96160     |
|    fps             | 31        |
|    time_elapsed    | 80        |
|    total_timesteps | 3802488   |
| train/             |           |
|    actor_loss      | 2.37e+10  |
|    critic_loss     | 3.63e+15  |
|    ent_coef        | 7.55e+06  |
|    ent_coef_loss   | -0.44     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3802387   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.1      |
|    ep_rew_mean     | -5.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96164     |
|    fps             | 31        |
|    time_elapsed    | 84        |
|    total_timesteps | 3802632   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.9      |
|    ep_rew_mean     | -6.05e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96216     |
|    fps             | 31        |
|    time_elapsed    | 141       |
|    total_timesteps | 3804499   |
| train/             |           |
|    actor_loss      | 2.43e+10  |
|    critic_loss     | 7.34e+15  |
|    ent_coef        | 7.68e+06  |
|    ent_coef_loss   | -0.744    |
|    learning_rate   | 0.0003    |
|    n_updates       | 3804398   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.9     |
|    ep_rew_mean     | -6.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 96220    |
|    fps             | 31       |
|    time_elapsed    | 145      |
|    total_timesteps | 3804643  |
| train/             |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.9     |
|    ep_rew_mean     | -6e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 96272    |
|    fps             | 32       |
|    time_elapsed    | 202      |
|    total_timesteps | 3806513  |
| train/             |          |
|    actor_loss      | 2.46e+10 |
|    critic_loss     | 6.75e+15 |
|    ent_coef        | 7.81e+06 |
|    ent_coef_loss   | -1       |
|    learning_rate   | 0.0003   |
|    n_updates       | 3806412  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.9      |
|    ep_rew_mean     | -5.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96276     |
|    fps             | 32        |
|    time_elapsed    | 206       |
|    total_timesteps | 3806657   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.04e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96328     |
|    fps             | 32        |
|    time_elapsed    | 263       |
|    total_timesteps | 3808531   |
| train/             |           |
|    actor_loss      | 2.48e+10  |
|    critic_loss     | 8.18e+15  |
|    ent_coef        | 7.95e+06  |
|    ent_coef_loss   | 0.686     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3808430   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.07e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96332     |
|    fps             | 32        |
|    time_elapsed    | 268       |
|    total_timesteps | 3808675   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.17e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96380     |
|    fps             | 32        |
|    time_elapsed    | 13        |
|    total_timesteps | 3810431   |
| train/             |           |
|    actor_loss      | 2.53e+10  |
|    critic_loss     | 7.68e+15  |
|    ent_coef        | 8.79e+06  |
|    ent_coef_loss   | 1.34      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3810330   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36        |
|    ep_rew_mean     | -6.04e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96384     |
|    fps             | 32        |
|    time_elapsed    | 17        |
|    total_timesteps | 3810573   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.9      |
|    ep_rew_mean     | -6.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96436     |
|    fps             | 1         |
|    time_elapsed    | 1935      |
|    total_timesteps | 3812432   |
| train/             |           |
|    actor_loss      | 2.56e+10  |
|    critic_loss     | 8.27e+15  |
|    ent_coef        | 8.87e+06  |
|    ent_coef_loss   | 2.13      |
|    learning_rate   | 0.0003    |
|    n_updates       | 3812331   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.8      |
|    ep_rew_mean     | -6.03e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96440     |
|    fps             | 1         |
|    time_elapsed    | 1941      |
|    total_timesteps | 3812576   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96492     |
|    fps             | 2         |
|    time_elapsed    | 2002      |
|    total_timesteps | 3814431   |
| train/             |           |
|    actor_loss      | 2.58e+10  |
|    critic_loss     | 7.64e+15  |
|    ent_coef        | 8.76e+06  |
|    ent_coef_loss   | -6.98     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3814330   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.7      |
|    ep_rew_mean     | -6.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96496     |
|    fps             | 2         |
|    time_elapsed    | 2007      |
|    total_timesteps | 3814573   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.5      |
|    ep_rew_mean     | -6.04e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96548     |
|    fps             | 3         |
|    time_elapsed    | 2065      |
|    total_timesteps | 3816418   |
| train/             |           |
|    actor_loss      | 2.59e+10  |
|    critic_loss     | 9.16e+15  |
|    ent_coef        | 9.05e+06  |
|    ent_coef_loss   | -2.06     |
|    learning_rate   | 0.0003    |
|    n_updates       | 3816317   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.5      |
|    ep_rew_mean     | -6.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96552     |
|    fps             | 3         |
|    time_elapsed    | 2069      |
|    total_timesteps | 3816560   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -6.02e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96604     |
|    fps             | 3         |
|    time_elapsed    | 2127      |
|    total_timesteps | 3818400   |
| train/             |           |
|    actor_loss      | 2.63e+10  |
|    critic_loss     | 9.41e+15  |
|    ent_coef        | 9.3e+06   |
|    ent_coef_loss   | -0.0198   |
|    learning_rate   | 0.0003    |
|    n_updates       | 3818299   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -6.03e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 96608     |
|    fps             | 4         |
|    time_elapsed    | 2132      |
|    total_timesteps | 3818541   |
| train/            

### SAC Non Corrective With Delay

In [74]:
# FIRST RUN 
#run_model(c1_non_corrective_pred_with_delay_env, "SAC", "SAC_model_non_corr_with_delay", "models/SAC_non_corr_with_delay", 1, 3600, True, sac_model_non_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
run_model(c1_non_corrective_pred_with_delay_env, "SAC", "SAC_model_non_corr_with_delay", "models/SAC_non_corr_with_delay", 10000, 12600, False,)   

Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_with_delay/2240000.zip
Logging to logs\SAC_model_non_corr_with_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.65e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72140     |
|    fps             | 35        |
|    time_elapsed    | 3         |
|    total_timesteps | 2250109   |
| train/             |           |
|    actor_loss      | 6.49e+10  |
|    critic_loss     | 1.55e+16  |
|    ent_coef        | 2.01e+07  |
|    ent_coef_loss   | 3.89      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2250008   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.27e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72192     |
|    fps             | 32        |
|    time_elapsed    | 47        |
|    total_timesteps | 2251528   |
| train/             |           |
|    actor_loss      | 6.59e+10  |
|    critic_loss     | 1.94e+16  |
|    ent_coef        | 2.2e+07   |
|    ent_coef_loss   | 2.06      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2251427   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.3     |
|    ep_rew_mean     | -5.2e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72196    |
|    fps             | 32       |
|    time_elapsed    | 50       |
|    total_timesteps | 2251637  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72248     |
|    fps             | 32        |
|    time_elapsed    | 95        |
|    total_timesteps | 2253059   |
| train/             |           |
|    actor_loss      | 6.6e+10   |
|    critic_loss     | 2e+16     |
|    ent_coef        | 2.28e+07  |
|    ent_coef_loss   | 2.02      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2252958   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72252     |
|    fps             | 32        |
|    time_elapsed    | 98        |
|    total_timesteps | 2253169   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72304     |
|    fps             | 32        |
|    time_elapsed    | 141       |
|    total_timesteps | 2254595   |
| train/             |           |
|    actor_loss      | 6.6e+10   |
|    critic_loss     | 2.02e+16  |
|    ent_coef        | 2.23e+07  |
|    ent_coef_loss   | -3.38     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2254494   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.4     |
|    ep_rew_mean     | -5.5e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72308    |
|    fps             | 32       |
|    time_elapsed    | 144      |
|    total_timesteps | 2254704  |
| train/             |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.4     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72360    |
|    fps             | 32       |
|    time_elapsed    | 189      |
|    total_timesteps | 2256131  |
| train/             |          |
|    actor_loss      | 6.68e+10 |
|    critic_loss     | 1.95e+16 |
|    ent_coef        | 2.29e+07 |
|    ent_coef_loss   | 0.868    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2256030  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72364     |
|    fps             | 32        |
|    time_elapsed    | 192       |
|    total_timesteps | 2256240   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72416     |
|    fps             | 31        |
|    time_elapsed    | 242       |
|    total_timesteps | 2257661   |
| train/             |           |
|    actor_loss      | 6.74e+10  |
|    critic_loss     | 2.24e+16  |
|    ent_coef        | 2.31e+07  |
|    ent_coef_loss   | -4.77     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2257560   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72420     |
|    fps             | 31        |
|    time_elapsed    | 246       |
|    total_timesteps | 2257771   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72472     |
|    fps             | 31        |
|    time_elapsed    | 292       |
|    total_timesteps | 2259193   |
| train/             |           |
|    actor_loss      | 6.81e+10  |
|    critic_loss     | 2.41e+16  |
|    ent_coef        | 2.32e+07  |
|    ent_coef_loss   | 4.77      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2259092   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72476     |
|    fps             | 31        |
|    time_elapsed    | 295       |
|    total_timesteps | 2259301   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72524     |
|    fps             | 30        |
|    time_elapsed    | 20        |
|    total_timesteps | 2260631   |
| train/             |           |
|    actor_loss      | 6.83e+10  |
|    critic_loss     | 2.34e+16  |
|    ent_coef        | 2.24e+07  |
|    ent_coef_loss   | 2.06      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2260530   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72528     |
|    fps             | 31        |
|    time_elapsed    | 23        |
|    total_timesteps | 2260743   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72580     |
|    fps             | 31        |
|    time_elapsed    | 69        |
|    total_timesteps | 2262168   |
| train/             |           |
|    actor_loss      | 6.88e+10  |
|    critic_loss     | 2.11e+16  |
|    ent_coef        | 2.31e+07  |
|    ent_coef_loss   | -4.63     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2262067   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72584     |
|    fps             | 31        |
|    time_elapsed    | 72        |
|    total_timesteps | 2262279   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.74e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72636     |
|    fps             | 32        |
|    time_elapsed    | 115       |
|    total_timesteps | 2263703   |
| train/             |           |
|    actor_loss      | 6.95e+10  |
|    critic_loss     | 2.49e+16  |
|    ent_coef        | 2.44e+07  |
|    ent_coef_loss   | -2.49     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2263602   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72640     |
|    fps             | 31        |
|    time_elapsed    | 119       |
|    total_timesteps | 2263812   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72692     |
|    fps             | 32        |
|    time_elapsed    | 162       |
|    total_timesteps | 2265246   |
| train/             |           |
|    actor_loss      | 6.97e+10  |
|    critic_loss     | 2.32e+16  |
|    ent_coef        | 2.49e+07  |
|    ent_coef_loss   | 2.31      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2265145   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72696     |
|    fps             | 32        |
|    time_elapsed    | 165       |
|    total_timesteps | 2265357   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | -6.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72748    |
|    fps             | 32       |
|    time_elapsed    | 208      |
|    total_timesteps | 2266788  |
| train/             |          |
|    actor_loss      | 7e+10    |
|    critic_loss     | 2.75e+16 |
|    ent_coef        | 2.49e+07 |
|    ent_coef_loss   | 3.08     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2266687  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72752     |
|    fps             | 32        |
|    time_elapsed    | 212       |
|    total_timesteps | 2266898   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.16e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72804     |
|    fps             | 32        |
|    time_elapsed    | 257       |
|    total_timesteps | 2268327   |
| train/             |           |
|    actor_loss      | 7.04e+10  |
|    critic_loss     | 2.6e+16   |
|    ent_coef        | 2.46e+07  |
|    ent_coef_loss   | 2.43      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2268226   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | -6.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72808    |
|    fps             | 32       |
|    time_elapsed    | 260      |
|    total_timesteps | 2268437  |
| train/             |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | -6.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72860    |
|    fps             | 32       |
|    time_elapsed    | 305      |
|    total_timesteps | 2269865  |
| train/             |          |
|    actor_loss      | 7.09e+10 |
|    critic_loss     | 2.52e+16 |
|    ent_coef        | 2.45e+07 |
|    ent_coef_loss   | -5.66    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2269764  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.02e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72864     |
|    fps             | 32        |
|    time_elapsed    | 308       |
|    total_timesteps | 2269975   |
| train/             |           |
|  

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.4     |
|    ep_rew_mean     | -5.9e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 72912    |
|    fps             | 32       |
|    time_elapsed    | 40       |
|    total_timesteps | 2271318  |
| train/             |          |
|    actor_loss      | 7.08e+10 |
|    critic_loss     | 2.97e+16 |
|    ent_coef        | 2.54e+07 |
|    ent_coef_loss   | -2.76    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2271217  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72916     |
|    fps             | 32        |
|    time_elapsed    | 44        |
|    total_timesteps | 2271428   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72968     |
|    fps             | 31        |
|    time_elapsed    | 90        |
|    total_timesteps | 2272851   |
| train/             |           |
|    actor_loss      | 7.19e+10  |
|    critic_loss     | 2.47e+16  |
|    ent_coef        | 2.45e+07  |
|    ent_coef_loss   | 3.45      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2272750   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 72972     |
|    fps             | 31        |
|    time_elapsed    | 93        |
|    total_timesteps | 2272960   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73024     |
|    fps             | 31        |
|    time_elapsed    | 137       |
|    total_timesteps | 2274382   |
| train/             |           |
|    actor_loss      | 7.16e+10  |
|    critic_loss     | 2.91e+16  |
|    ent_coef        | 2.44e+07  |
|    ent_coef_loss   | -2.48     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2274281   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73028     |
|    fps             | 31        |
|    time_elapsed    | 141       |
|    total_timesteps | 2274493   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.27e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73080     |
|    fps             | 31        |
|    time_elapsed    | 186       |
|    total_timesteps | 2275916   |
| train/             |           |
|    actor_loss      | 7.24e+10  |
|    critic_loss     | 2.57e+16  |
|    ent_coef        | 2.43e+07  |
|    ent_coef_loss   | -1.31     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2275815   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.4     |
|    ep_rew_mean     | -5.2e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 73084    |
|    fps             | 31       |
|    time_elapsed    | 189      |
|    total_timesteps | 2276024  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73136     |
|    fps             | 31        |
|    time_elapsed    | 235       |
|    total_timesteps | 2277442   |
| train/             |           |
|    actor_loss      | 7.26e+10  |
|    critic_loss     | 2.75e+16  |
|    ent_coef        | 2.49e+07  |
|    ent_coef_loss   | 0.884     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2277341   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73140     |
|    fps             | 31        |
|    time_elapsed    | 238       |
|    total_timesteps | 2277552   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.29e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73192     |
|    fps             | 31        |
|    time_elapsed    | 284       |
|    total_timesteps | 2278976   |
| train/             |           |
|    actor_loss      | 7.33e+10  |
|    critic_loss     | 2.6e+16   |
|    ent_coef        | 2.53e+07  |
|    ent_coef_loss   | 2.07      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2278875   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.36e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73196     |
|    fps             | 31        |
|    time_elapsed    | 287       |
|    total_timesteps | 2279085   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73244     |
|    fps             | 29        |
|    time_elapsed    | 13        |
|    total_timesteps | 2280409   |
| train/             |           |
|    actor_loss      | 7.31e+10  |
|    critic_loss     | 2.59e+16  |
|    ent_coef        | 2.46e+07  |
|    ent_coef_loss   | 0.438     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2280308   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73248     |
|    fps             | 31        |
|    time_elapsed    | 16        |
|    total_timesteps | 2280519   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73300     |
|    fps             | 32        |
|    time_elapsed    | 60        |
|    total_timesteps | 2281939   |
| train/             |           |
|    actor_loss      | 7.43e+10  |
|    critic_loss     | 3.29e+16  |
|    ent_coef        | 2.43e+07  |
|    ent_coef_loss   | 1.33      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2281838   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.3     |
|    ep_rew_mean     | -5e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 73304    |
|    fps             | 32       |
|    time_elapsed    | 62       |
|    total_timesteps | 2282048  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73356     |
|    fps             | 32        |
|    time_elapsed    | 106       |
|    total_timesteps | 2283467   |
| train/             |           |
|    actor_loss      | 7.49e+10  |
|    critic_loss     | 2.83e+16  |
|    ent_coef        | 2.52e+07  |
|    ent_coef_loss   | -0.323    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2283366   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73360     |
|    fps             | 32        |
|    time_elapsed    | 109       |
|    total_timesteps | 2283575   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73412     |
|    fps             | 32        |
|    time_elapsed    | 154       |
|    total_timesteps | 2284999   |
| train/             |           |
|    actor_loss      | 7.44e+10  |
|    critic_loss     | 3.13e+16  |
|    ent_coef        | 2.65e+07  |
|    ent_coef_loss   | -1.23     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2284898   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.3     |
|    ep_rew_mean     | -5.2e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 73416    |
|    fps             | 32       |
|    time_elapsed    | 158      |
|    total_timesteps | 2285108  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.48e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73468     |
|    fps             | 31        |
|    time_elapsed    | 204       |
|    total_timesteps | 2286536   |
| train/             |           |
|    actor_loss      | 7.51e+10  |
|    critic_loss     | 2.94e+16  |
|    ent_coef        | 2.67e+07  |
|    ent_coef_loss   | 1.13      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2286435   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73472     |
|    fps             | 31        |
|    time_elapsed    | 208       |
|    total_timesteps | 2286646   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73524     |
|    fps             | 31        |
|    time_elapsed    | 253       |
|    total_timesteps | 2288071   |
| train/             |           |
|    actor_loss      | 7.56e+10  |
|    critic_loss     | 3.22e+16  |
|    ent_coef        | 2.69e+07  |
|    ent_coef_loss   | -2.65     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2287970   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.85e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73528     |
|    fps             | 31        |
|    time_elapsed    | 256       |
|    total_timesteps | 2288182   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73580     |
|    fps             | 31        |
|    time_elapsed    | 301       |
|    total_timesteps | 2289611   |
| train/             |           |
|    actor_loss      | 7.59e+10  |
|    critic_loss     | 2.81e+16  |
|    ent_coef        | 2.75e+07  |
|    ent_coef_loss   | 1.53      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2289510   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73584     |
|    fps             | 31        |
|    time_elapsed    | 305       |
|    total_timesteps | 2289721   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73632     |
|    fps             | 28        |
|    time_elapsed    | 36        |
|    total_timesteps | 2291038   |
| train/             |           |
|    actor_loss      | 7.64e+10  |
|    critic_loss     | 2.66e+16  |
|    ent_coef        | 2.68e+07  |
|    ent_coef_loss   | 2.5       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2290937   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.82e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73636     |
|    fps             | 28        |
|    time_elapsed    | 39        |
|    total_timesteps | 2291149   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.05e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73688     |
|    fps             | 25        |
|    time_elapsed    | 102       |
|    total_timesteps | 2292567   |
| train/             |           |
|    actor_loss      | 7.68e+10  |
|    critic_loss     | 3.15e+16  |
|    ent_coef        | 2.7e+07   |
|    ent_coef_loss   | -0.392    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2292466   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73692     |
|    fps             | 25        |
|    time_elapsed    | 105       |
|    total_timesteps | 2292676   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.05e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73744     |
|    fps             | 28        |
|    time_elapsed    | 144       |
|    total_timesteps | 2294098   |
| train/             |           |
|    actor_loss      | 7.69e+10  |
|    critic_loss     | 3.24e+16  |
|    ent_coef        | 2.67e+07  |
|    ent_coef_loss   | 4.39      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2293997   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -4.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73748     |
|    fps             | 28        |
|    time_elapsed    | 147       |
|    total_timesteps | 2294206   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.15e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73800     |
|    fps             | 30        |
|    time_elapsed    | 185       |
|    total_timesteps | 2295628   |
| train/             |           |
|    actor_loss      | 7.72e+10  |
|    critic_loss     | 3.37e+16  |
|    ent_coef        | 2.69e+07  |
|    ent_coef_loss   | -1.64     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2295527   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73804     |
|    fps             | 30        |
|    time_elapsed    | 188       |
|    total_timesteps | 2295738   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73856     |
|    fps             | 31        |
|    time_elapsed    | 224       |
|    total_timesteps | 2297158   |
| train/             |           |
|    actor_loss      | 7.77e+10  |
|    critic_loss     | 3.79e+16  |
|    ent_coef        | 2.7e+07   |
|    ent_coef_loss   | 0.555     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2297057   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.3      |
|    ep_rew_mean     | -5.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73860     |
|    fps             | 31        |
|    time_elapsed    | 227       |
|    total_timesteps | 2297268   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73912     |
|    fps             | 32        |
|    time_elapsed    | 264       |
|    total_timesteps | 2298694   |
| train/             |           |
|    actor_loss      | 7.82e+10  |
|    critic_loss     | 3.17e+16  |
|    ent_coef        | 2.7e+07   |
|    ent_coef_loss   | -1.32     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2298593   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73916     |
|    fps             | 32        |
|    time_elapsed    | 267       |
|    total_timesteps | 2298802   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.3     |
|    ep_rew_mean     | -5.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 73964    |
|    fps             | 39       |
|    time_elapsed    | 3        |
|    total_timesteps | 2300138  |
| train/             |          |
|    actor_loss      | 7.89e+10 |
|    critic_loss     | 3.44e+16 |
|    ent_coef        | 2.67e+07 |
|    ent_coef_loss   | -1.14    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2300037  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 73968     |
|    fps             | 38        |
|    time_elapsed    | 6         |
|    total_timesteps | 2300248   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74020     |
|    fps             | 38        |
|    time_elapsed    | 43        |
|    total_timesteps | 2301674   |
| train/             |           |
|    actor_loss      | 7.9e+10   |
|    critic_loss     | 3.22e+16  |
|    ent_coef        | 2.85e+07  |
|    ent_coef_loss   | 0.647     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2301573   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.4     |
|    ep_rew_mean     | -5.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 74024    |
|    fps             | 38       |
|    time_elapsed    | 46       |
|    total_timesteps | 2301784  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74076     |
|    fps             | 38        |
|    time_elapsed    | 83        |
|    total_timesteps | 2303214   |
| train/             |           |
|    actor_loss      | 7.88e+10  |
|    critic_loss     | 2.88e+16  |
|    ent_coef        | 2.78e+07  |
|    ent_coef_loss   | -1.21     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2303113   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74080     |
|    fps             | 38        |
|    time_elapsed    | 86        |
|    total_timesteps | 2303324   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74132     |
|    fps             | 38        |
|    time_elapsed    | 124       |
|    total_timesteps | 2304758   |
| train/             |           |
|    actor_loss      | 7.99e+10  |
|    critic_loss     | 4.05e+16  |
|    ent_coef        | 2.78e+07  |
|    ent_coef_loss   | 3.46      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2304657   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74136     |
|    fps             | 38        |
|    time_elapsed    | 127       |
|    total_timesteps | 2304868   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74188     |
|    fps             | 37        |
|    time_elapsed    | 165       |
|    total_timesteps | 2306293   |
| train/             |           |
|    actor_loss      | 8e+10     |
|    critic_loss     | 3.82e+16  |
|    ent_coef        | 2.79e+07  |
|    ent_coef_loss   | 1.15      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2306192   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74192     |
|    fps             | 37        |
|    time_elapsed    | 168       |
|    total_timesteps | 2306403   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.85e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74244     |
|    fps             | 37        |
|    time_elapsed    | 207       |
|    total_timesteps | 2307833   |
| train/             |           |
|    actor_loss      | 7.97e+10  |
|    critic_loss     | 3.01e+16  |
|    ent_coef        | 2.8e+07   |
|    ent_coef_loss   | 1.35      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2307732   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74248     |
|    fps             | 37        |
|    time_elapsed    | 210       |
|    total_timesteps | 2307942   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74300     |
|    fps             | 37        |
|    time_elapsed    | 249       |
|    total_timesteps | 2309363   |
| train/             |           |
|    actor_loss      | 8.04e+10  |
|    critic_loss     | 3.79e+16  |
|    ent_coef        | 2.82e+07  |
|    ent_coef_loss   | 2.67      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2309262   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74304     |
|    fps             | 37        |
|    time_elapsed    | 252       |
|    total_timesteps | 2309472   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74352     |
|    fps             | 36        |
|    time_elapsed    | 21        |
|    total_timesteps | 2310797   |
| train/             |           |
|    actor_loss      | 8.1e+10   |
|    critic_loss     | 3.39e+16  |
|    ent_coef        | 2.93e+07  |
|    ent_coef_loss   | 5.88      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2310696   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.48e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74356     |
|    fps             | 36        |
|    time_elapsed    | 24        |
|    total_timesteps | 2310908   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.82e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74408     |
|    fps             | 37        |
|    time_elapsed    | 62        |
|    total_timesteps | 2312338   |
| train/             |           |
|    actor_loss      | 8.12e+10  |
|    critic_loss     | 3.42e+16  |
|    ent_coef        | 3.02e+07  |
|    ent_coef_loss   | 0.462     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2312237   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 74412    |
|    fps             | 37       |
|    time_elapsed    | 65       |
|    total_timesteps | 2312447  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -6.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74464     |
|    fps             | 37        |
|    time_elapsed    | 104       |
|    total_timesteps | 2313883   |
| train/             |           |
|    actor_loss      | 8.14e+10  |
|    critic_loss     | 3.49e+16  |
|    ent_coef        | 3.01e+07  |
|    ent_coef_loss   | 0.121     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2313782   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | -6.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 74468    |
|    fps             | 37       |
|    time_elapsed    | 107      |
|    total_timesteps | 2313993  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.44e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74520     |
|    fps             | 36        |
|    time_elapsed    | 146       |
|    total_timesteps | 2315424   |
| train/             |           |
|    actor_loss      | 8.19e+10  |
|    critic_loss     | 3.43e+16  |
|    ent_coef        | 2.98e+07  |
|    ent_coef_loss   | -2.56     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2315323   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.49e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74524     |
|    fps             | 37        |
|    time_elapsed    | 149       |
|    total_timesteps | 2315535   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 74576    |
|    fps             | 37       |
|    time_elapsed    | 187      |
|    total_timesteps | 2316960  |
| train/             |          |
|    actor_loss      | 8.27e+10 |
|    critic_loss     | 4.24e+16 |
|    ent_coef        | 2.93e+07 |
|    ent_coef_loss   | 1.02     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2316859  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74580     |
|    fps             | 37        |
|    time_elapsed    | 190       |
|    total_timesteps | 2317070   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.45e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74632     |
|    fps             | 36        |
|    time_elapsed    | 230       |
|    total_timesteps | 2318496   |
| train/             |           |
|    actor_loss      | 8.23e+10  |
|    critic_loss     | 4.45e+16  |
|    ent_coef        | 2.92e+07  |
|    ent_coef_loss   | -1.33     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2318395   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.4      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74636     |
|    fps             | 36        |
|    time_elapsed    | 233       |
|    total_timesteps | 2318607   |
| train/            

-361269944.047565
diff_timesteps is  7
Elasped time is (sec) 2095.6059741973877
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_with_delay/2310000.zip
Logging to logs\SAC_model_non_corr_with_delay_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 74688    |
|    fps             | 37       |
|    time_elapsed    | 1        |
|    total_timesteps | 2320055  |
| train/             |          |
|    actor_loss      | 8.29e+10 |
|    critic_loss     | 3.46e+16 |
|    ent_coef        | 2.92e+07 |
|    ent_coef_loss   | -0.362   |
|    learning_rate   | 0.0003   |
|    n_updates       | 2319954  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.92e+08 |
|    phm_sc

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 74740    |
|    fps             | 37       |
|    time_elapsed    | 39       |
|    total_timesteps | 2321487  |
| train/             |          |
|    actor_loss      | 8.31e+10 |
|    critic_loss     | 3.99e+16 |
|    ent_coef        | 3.18e+07 |
|    ent_coef_loss   | 1.25     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2321386  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74744     |
|    fps             | 37        |
|    time_elapsed    | 42        |
|    total_timesteps | 2321596   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74796     |
|    fps             | 37        |
|    time_elapsed    | 81        |
|    total_timesteps | 2323035   |
| train/             |           |
|    actor_loss      | 8.38e+10  |
|    critic_loss     | 4.56e+16  |
|    ent_coef        | 3.21e+07  |
|    ent_coef_loss   | 0.746     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2322934   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74800     |
|    fps             | 37        |
|    time_elapsed    | 84        |
|    total_timesteps | 2323146   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.65e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74852     |
|    fps             | 37        |
|    time_elapsed    | 123       |
|    total_timesteps | 2324580   |
| train/             |           |
|    actor_loss      | 8.37e+10  |
|    critic_loss     | 3.95e+16  |
|    ent_coef        | 3.25e+07  |
|    ent_coef_loss   | -1.27     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2324479   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74856     |
|    fps             | 37        |
|    time_elapsed    | 126       |
|    total_timesteps | 2324690   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74908     |
|    fps             | 37        |
|    time_elapsed    | 164       |
|    total_timesteps | 2326117   |
| train/             |           |
|    actor_loss      | 8.43e+10  |
|    critic_loss     | 3.81e+16  |
|    ent_coef        | 3.18e+07  |
|    ent_coef_loss   | -0.931    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2326016   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74912     |
|    fps             | 37        |
|    time_elapsed    | 167       |
|    total_timesteps | 2326229   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74964     |
|    fps             | 37        |
|    time_elapsed    | 204       |
|    total_timesteps | 2327660   |
| train/             |           |
|    actor_loss      | 8.41e+10  |
|    critic_loss     | 3.87e+16  |
|    ent_coef        | 3.17e+07  |
|    ent_coef_loss   | -0.465    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2327559   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.5      |
|    ep_rew_mean     | -5.82e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 74968     |
|    fps             | 37        |
|    time_elapsed    | 207       |
|    total_timesteps | 2327770   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75020     |
|    fps             | 37        |
|    time_elapsed    | 244       |
|    total_timesteps | 2329209   |
| train/             |           |
|    actor_loss      | 8.44e+10  |
|    critic_loss     | 4.04e+16  |
|    ent_coef        | 3.16e+07  |
|    ent_coef_loss   | -2.01     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2329108   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75024     |
|    fps             | 37        |
|    time_elapsed    | 247       |
|    total_timesteps | 2329319   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75072     |
|    fps             | 37        |
|    time_elapsed    | 17        |
|    total_timesteps | 2330660   |
| train/             |           |
|    actor_loss      | 8.49e+10  |
|    critic_loss     | 4.2e+16   |
|    ent_coef        | 3.19e+07  |
|    ent_coef_loss   | -1.14     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2330559   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | -6.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 75076    |
|    fps             | 37       |
|    time_elapsed    | 20       |
|    total_timesteps | 2330769  |
| train/             |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | -6.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 75128    |
|    fps             | 36       |
|    time_elapsed    | 60       |
|    total_timesteps | 2332208  |
| train/             |          |
|    actor_loss      | 8.5e+10  |
|    critic_loss     | 4.58e+16 |
|    ent_coef        | 3.24e+07 |
|    ent_coef_loss   | 2.15     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2332107  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75132     |
|    fps             | 36        |
|    time_elapsed    | 62        |
|    total_timesteps | 2332318   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75184     |
|    fps             | 36        |
|    time_elapsed    | 101       |
|    total_timesteps | 2333755   |
| train/             |           |
|    actor_loss      | 8.53e+10  |
|    critic_loss     | 3.74e+16  |
|    ent_coef        | 3.28e+07  |
|    ent_coef_loss   | 0.899     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2333654   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75188     |
|    fps             | 36        |
|    time_elapsed    | 104       |
|    total_timesteps | 2333866   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75240     |
|    fps             | 36        |
|    time_elapsed    | 143       |
|    total_timesteps | 2335309   |
| train/             |           |
|    actor_loss      | 8.58e+10  |
|    critic_loss     | 4.71e+16  |
|    ent_coef        | 3.29e+07  |
|    ent_coef_loss   | -5.52     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2335208   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75244     |
|    fps             | 36        |
|    time_elapsed    | 146       |
|    total_timesteps | 2335420   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75296     |
|    fps             | 36        |
|    time_elapsed    | 185       |
|    total_timesteps | 2336860   |
| train/             |           |
|    actor_loss      | 8.58e+10  |
|    critic_loss     | 4.79e+16  |
|    ent_coef        | 3.26e+07  |
|    ent_coef_loss   | 2.21      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2336759   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75300     |
|    fps             | 36        |
|    time_elapsed    | 188       |
|    total_timesteps | 2336970   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75352     |
|    fps             | 36        |
|    time_elapsed    | 228       |
|    total_timesteps | 2338406   |
| train/             |           |
|    actor_loss      | 8.64e+10  |
|    critic_loss     | 4.74e+16  |
|    ent_coef        | 3.21e+07  |
|    ent_coef_loss   | 2.38      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2338305   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75356     |
|    fps             | 36        |
|    time_elapsed    | 231       |
|    total_timesteps | 2338516   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.48e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75408     |
|    fps             | 36        |
|    time_elapsed    | 270       |
|    total_timesteps | 2339953   |
| train/             |           |
|    actor_loss      | 8.67e+10  |
|    critic_loss     | 4.2e+16   |
|    ent_coef        | 3.2e+07   |
|    ent_coef_loss   | 0.987     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2339852   |
----------------------------------
-341566756.142444
diff_timesteps is  9
Elasped time is (sec) 2633.1990687847137
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_with_delay/2330000.zip
Logging to logs\SAC_model_non_corr_with_delay_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | -6.5e+08

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75460     |
|    fps             | 36        |
|    time_elapsed    | 38        |
|    total_timesteps | 2341408   |
| train/             |           |
|    actor_loss      | 8.7e+10   |
|    critic_loss     | 4.68e+16  |
|    ent_coef        | 3.04e+07  |
|    ent_coef_loss   | -1.61     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2341307   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.37e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75464     |
|    fps             | 36        |
|    time_elapsed    | 41        |
|    total_timesteps | 2341520   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75516     |
|    fps             | 36        |
|    time_elapsed    | 81        |
|    total_timesteps | 2342965   |
| train/             |           |
|    actor_loss      | 8.65e+10  |
|    critic_loss     | 3.93e+16  |
|    ent_coef        | 3.14e+07  |
|    ent_coef_loss   | -5.92     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2342864   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75520     |
|    fps             | 36        |
|    time_elapsed    | 84        |
|    total_timesteps | 2343076   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.28e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75572     |
|    fps             | 36        |
|    time_elapsed    | 123       |
|    total_timesteps | 2344515   |
| train/             |           |
|    actor_loss      | 8.69e+10  |
|    critic_loss     | 3.28e+16  |
|    ent_coef        | 3.13e+07  |
|    ent_coef_loss   | 1.26      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2344414   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75576     |
|    fps             | 36        |
|    time_elapsed    | 126       |
|    total_timesteps | 2344625   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75628     |
|    fps             | 36        |
|    time_elapsed    | 165       |
|    total_timesteps | 2346060   |
| train/             |           |
|    actor_loss      | 8.68e+10  |
|    critic_loss     | 3.93e+16  |
|    ent_coef        | 3.17e+07  |
|    ent_coef_loss   | 2.29      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2345959   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.65e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75632     |
|    fps             | 36        |
|    time_elapsed    | 168       |
|    total_timesteps | 2346170   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75684     |
|    fps             | 36        |
|    time_elapsed    | 207       |
|    total_timesteps | 2347605   |
| train/             |           |
|    actor_loss      | 8.75e+10  |
|    critic_loss     | 3.95e+16  |
|    ent_coef        | 3.16e+07  |
|    ent_coef_loss   | -2.07     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2347504   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.34e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75688     |
|    fps             | 36        |
|    time_elapsed    | 210       |
|    total_timesteps | 2347715   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75740     |
|    fps             | 36        |
|    time_elapsed    | 249       |
|    total_timesteps | 2349151   |
| train/             |           |
|    actor_loss      | 8.77e+10  |
|    critic_loss     | 3.73e+16  |
|    ent_coef        | 3.14e+07  |
|    ent_coef_loss   | 0.0474    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2349050   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75744     |
|    fps             | 36        |
|    time_elapsed    | 252       |
|    total_timesteps | 2349260   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.6      |
|    ep_rew_mean     | -6.07e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75792     |
|    fps             | 36        |
|    time_elapsed    | 16        |
|    total_timesteps | 2350605   |
| train/             |           |
|    actor_loss      | 8.71e+10  |
|    critic_loss     | 3.39e+16  |
|    ent_coef        | 3.14e+07  |
|    ent_coef_loss   | 2.3       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2350504   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | -6.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 75796    |
|    fps             | 36       |
|    time_elapsed    | 19       |
|    total_timesteps | 2350716  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75848     |
|    fps             | 37        |
|    time_elapsed    | 58        |
|    total_timesteps | 2352164   |
| train/             |           |
|    actor_loss      | 8.77e+10  |
|    critic_loss     | 3.7e+16   |
|    ent_coef        | 3.33e+07  |
|    ent_coef_loss   | -4.25     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2352063   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75852     |
|    fps             | 37        |
|    time_elapsed    | 61        |
|    total_timesteps | 2352275   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75904     |
|    fps             | 37        |
|    time_elapsed    | 100       |
|    total_timesteps | 2353719   |
| train/             |           |
|    actor_loss      | 8.74e+10  |
|    critic_loss     | 4.17e+16  |
|    ent_coef        | 3.32e+07  |
|    ent_coef_loss   | -1.51     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2353618   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75908     |
|    fps             | 37        |
|    time_elapsed    | 103       |
|    total_timesteps | 2353830   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75960     |
|    fps             | 37        |
|    time_elapsed    | 142       |
|    total_timesteps | 2355277   |
| train/             |           |
|    actor_loss      | 8.79e+10  |
|    critic_loss     | 3.74e+16  |
|    ent_coef        | 3.28e+07  |
|    ent_coef_loss   | 3.85      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2355176   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 75964     |
|    fps             | 37        |
|    time_elapsed    | 144       |
|    total_timesteps | 2355388   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76016     |
|    fps             | 37        |
|    time_elapsed    | 183       |
|    total_timesteps | 2356832   |
| train/             |           |
|    actor_loss      | 8.91e+10  |
|    critic_loss     | 3.59e+16  |
|    ent_coef        | 3.25e+07  |
|    ent_coef_loss   | -1.5      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2356731   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76020     |
|    fps             | 37        |
|    time_elapsed    | 186       |
|    total_timesteps | 2356943   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76072     |
|    fps             | 37        |
|    time_elapsed    | 224       |
|    total_timesteps | 2358384   |
| train/             |           |
|    actor_loss      | 8.73e+10  |
|    critic_loss     | 3.87e+16  |
|    ent_coef        | 3.28e+07  |
|    ent_coef_loss   | -0.0624   |
|    learning_rate   | 0.0003    |
|    n_updates       | 2358283   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76076     |
|    fps             | 37        |
|    time_elapsed    | 227       |
|    total_timesteps | 2358494   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76128     |
|    fps             | 37        |
|    time_elapsed    | 266       |
|    total_timesteps | 2359939   |
| train/             |           |
|    actor_loss      | 8.81e+10  |
|    critic_loss     | 3.55e+16  |
|    ent_coef        | 3.28e+07  |
|    ent_coef_loss   | -1.6      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2359838   |
----------------------------------
-330598594.115423
diff_timesteps is  11
Elasped time is (sec) 3173.7441771030426
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_with_delay/2350000.zip
Logging to logs\SAC_model_non_corr_with_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.0

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76180     |
|    fps             | 37        |
|    time_elapsed    | 37        |
|    total_timesteps | 2361391   |
| train/             |           |
|    actor_loss      | 8.84e+10  |
|    critic_loss     | 3.52e+16  |
|    ent_coef        | 3.28e+07  |
|    ent_coef_loss   | -4.76     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2361290   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76184     |
|    fps             | 37        |
|    time_elapsed    | 40        |
|    total_timesteps | 2361503   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76236     |
|    fps             | 37        |
|    time_elapsed    | 78        |
|    total_timesteps | 2362945   |
| train/             |           |
|    actor_loss      | 8.87e+10  |
|    critic_loss     | 3.59e+16  |
|    ent_coef        | 3.16e+07  |
|    ent_coef_loss   | 2.94      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2362844   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76240     |
|    fps             | 37        |
|    time_elapsed    | 81        |
|    total_timesteps | 2363055   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76292     |
|    fps             | 37        |
|    time_elapsed    | 120       |
|    total_timesteps | 2364496   |
| train/             |           |
|    actor_loss      | 8.87e+10  |
|    critic_loss     | 4.07e+16  |
|    ent_coef        | 3.22e+07  |
|    ent_coef_loss   | 1.17      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2364395   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.7     |
|    ep_rew_mean     | -6.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 76296    |
|    fps             | 37       |
|    time_elapsed    | 123      |
|    total_timesteps | 2364608  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76348     |
|    fps             | 37        |
|    time_elapsed    | 162       |
|    total_timesteps | 2366050   |
| train/             |           |
|    actor_loss      | 8.86e+10  |
|    critic_loss     | 3.49e+16  |
|    ent_coef        | 3.17e+07  |
|    ent_coef_loss   | -1.36     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2365949   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.65e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76352     |
|    fps             | 37        |
|    time_elapsed    | 165       |
|    total_timesteps | 2366159   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76404     |
|    fps             | 37        |
|    time_elapsed    | 204       |
|    total_timesteps | 2367602   |
| train/             |           |
|    actor_loss      | 8.85e+10  |
|    critic_loss     | 3.49e+16  |
|    ent_coef        | 3.19e+07  |
|    ent_coef_loss   | 2.03      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2367501   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76408     |
|    fps             | 37        |
|    time_elapsed    | 207       |
|    total_timesteps | 2367713   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76460     |
|    fps             | 37        |
|    time_elapsed    | 245       |
|    total_timesteps | 2369158   |
| train/             |           |
|    actor_loss      | 8.9e+10   |
|    critic_loss     | 2.78e+16  |
|    ent_coef        | 3.19e+07  |
|    ent_coef_loss   | 0.916     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2369057   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.99e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76464     |
|    fps             | 37        |
|    time_elapsed    | 248       |
|    total_timesteps | 2369268   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.15e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76512     |
|    fps             | 37        |
|    time_elapsed    | 16        |
|    total_timesteps | 2370611   |
| train/             |           |
|    actor_loss      | 8.9e+10   |
|    critic_loss     | 4.12e+16  |
|    ent_coef        | 3.26e+07  |
|    ent_coef_loss   | -0.419    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2370510   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.17e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76516     |
|    fps             | 37        |
|    time_elapsed    | 19        |
|    total_timesteps | 2370722   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76568     |
|    fps             | 37        |
|    time_elapsed    | 57        |
|    total_timesteps | 2372164   |
| train/             |           |
|    actor_loss      | 8.88e+10  |
|    critic_loss     | 3.82e+16  |
|    ent_coef        | 3.11e+07  |
|    ent_coef_loss   | 3.94      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2372063   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76572     |
|    fps             | 37        |
|    time_elapsed    | 60        |
|    total_timesteps | 2372275   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76624     |
|    fps             | 37        |
|    time_elapsed    | 100       |
|    total_timesteps | 2373719   |
| train/             |           |
|    actor_loss      | 8.9e+10   |
|    critic_loss     | 3.11e+16  |
|    ent_coef        | 3.09e+07  |
|    ent_coef_loss   | 0.725     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2373618   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76628     |
|    fps             | 37        |
|    time_elapsed    | 103       |
|    total_timesteps | 2373829   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76680     |
|    fps             | 37        |
|    time_elapsed    | 141       |
|    total_timesteps | 2375268   |
| train/             |           |
|    actor_loss      | 8.86e+10  |
|    critic_loss     | 3.33e+16  |
|    ent_coef        | 3.08e+07  |
|    ent_coef_loss   | 1.69      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2375167   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76684     |
|    fps             | 37        |
|    time_elapsed    | 144       |
|    total_timesteps | 2375378   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -7.03e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76736     |
|    fps             | 37        |
|    time_elapsed    | 184       |
|    total_timesteps | 2376822   |
| train/             |           |
|    actor_loss      | 8.93e+10  |
|    critic_loss     | 2.67e+16  |
|    ent_coef        | 3.1e+07   |
|    ent_coef_loss   | 4.05      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2376721   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.8     |
|    ep_rew_mean     | -7.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 76740    |
|    fps             | 37       |
|    time_elapsed    | 186      |
|    total_timesteps | 2376933  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76792     |
|    fps             | 37        |
|    time_elapsed    | 224       |
|    total_timesteps | 2378370   |
| train/             |           |
|    actor_loss      | 8.82e+10  |
|    critic_loss     | 2.5e+16   |
|    ent_coef        | 3.07e+07  |
|    ent_coef_loss   | 2.48      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2378269   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76796     |
|    fps             | 37        |
|    time_elapsed    | 226       |
|    total_timesteps | 2378480   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76848     |
|    fps             | 37        |
|    time_elapsed    | 264       |
|    total_timesteps | 2379924   |
| train/             |           |
|    actor_loss      | 8.88e+10  |
|    critic_loss     | 3.4e+16   |
|    ent_coef        | 3.08e+07  |
|    ent_coef_loss   | -1.33     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2379823   |
----------------------------------
-338837564.227274
diff_timesteps is  13
Elasped time is (sec) 3708.5736622810364
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_with_delay/2370000.zip
Logging to logs\SAC_model_non_corr_with_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.6

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76900     |
|    fps             | 38        |
|    time_elapsed    | 35        |
|    total_timesteps | 2381391   |
| train/             |           |
|    actor_loss      | 8.94e+10  |
|    critic_loss     | 3.53e+16  |
|    ent_coef        | 3.17e+07  |
|    ent_coef_loss   | 1.24      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2381290   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.28e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76904     |
|    fps             | 38        |
|    time_elapsed    | 38        |
|    total_timesteps | 2381503   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.15e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76956     |
|    fps             | 37        |
|    time_elapsed    | 78        |
|    total_timesteps | 2382949   |
| train/             |           |
|    actor_loss      | 8.85e+10  |
|    critic_loss     | 2.72e+16  |
|    ent_coef        | 3.04e+07  |
|    ent_coef_loss   | -0.425    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2382848   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -7.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 76960     |
|    fps             | 37        |
|    time_elapsed    | 81        |
|    total_timesteps | 2383061   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77012     |
|    fps             | 35        |
|    time_elapsed    | 127       |
|    total_timesteps | 2384499   |
| train/             |           |
|    actor_loss      | 8.84e+10  |
|    critic_loss     | 2.88e+16  |
|    ent_coef        | 3.04e+07  |
|    ent_coef_loss   | 1.44      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2384398   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77016     |
|    fps             | 35        |
|    time_elapsed    | 131       |
|    total_timesteps | 2384611   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77068     |
|    fps             | 35        |
|    time_elapsed    | 172       |
|    total_timesteps | 2386056   |
| train/             |           |
|    actor_loss      | 8.87e+10  |
|    critic_loss     | 2.61e+16  |
|    ent_coef        | 3.07e+07  |
|    ent_coef_loss   | -1.53     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2385955   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.7      |
|    ep_rew_mean     | -6.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77072     |
|    fps             | 35        |
|    time_elapsed    | 175       |
|    total_timesteps | 2386167   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77124     |
|    fps             | 34        |
|    time_elapsed    | 218       |
|    total_timesteps | 2387614   |
| train/             |           |
|    actor_loss      | 8.91e+10  |
|    critic_loss     | 2.65e+16  |
|    ent_coef        | 3.06e+07  |
|    ent_coef_loss   | -1.27     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2387513   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.8     |
|    ep_rew_mean     | -6.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 77128    |
|    fps             | 34       |
|    time_elapsed    | 221      |
|    total_timesteps | 2387726  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.8      |
|    ep_rew_mean     | -6.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77180     |
|    fps             | 35        |
|    time_elapsed    | 259       |
|    total_timesteps | 2389174   |
| train/             |           |
|    actor_loss      | 8.88e+10  |
|    critic_loss     | 2.66e+16  |
|    ent_coef        | 3.05e+07  |
|    ent_coef_loss   | -1.54     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2389073   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77184     |
|    fps             | 35        |
|    time_elapsed    | 262       |
|    total_timesteps | 2389286   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77232     |
|    fps             | 36        |
|    time_elapsed    | 17        |
|    total_timesteps | 2390643   |
| train/             |           |
|    actor_loss      | 8.95e+10  |
|    critic_loss     | 2.3e+16   |
|    ent_coef        | 3.29e+07  |
|    ent_coef_loss   | 0.137     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2390542   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | -7.2e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 77236    |
|    fps             | 36       |
|    time_elapsed    | 20       |
|    total_timesteps | 2390755  |
| train/             |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | -7e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 77288    |
|    fps             | 36       |
|    time_elapsed    | 60       |
|    total_timesteps | 2392204  |
| train/             |          |
|    actor_loss      | 8.9e+10  |
|    critic_loss     | 2.55e+16 |
|    ent_coef        | 3.24e+07 |
|    ent_coef_loss   | -3.01    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2392103  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.03e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77292     |
|    fps             | 36        |
|    time_elapsed    | 62        |
|    total_timesteps | 2392316   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77344     |
|    fps             | 36        |
|    time_elapsed    | 102       |
|    total_timesteps | 2393765   |
| train/             |           |
|    actor_loss      | 8.87e+10  |
|    critic_loss     | 2.42e+16  |
|    ent_coef        | 3.18e+07  |
|    ent_coef_loss   | 7.68      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2393664   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77348     |
|    fps             | 36        |
|    time_elapsed    | 105       |
|    total_timesteps | 2393876   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.89e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77400     |
|    fps             | 37        |
|    time_elapsed    | 143       |
|    total_timesteps | 2395325   |
| train/             |           |
|    actor_loss      | 8.84e+10  |
|    critic_loss     | 2.55e+16  |
|    ent_coef        | 3.16e+07  |
|    ent_coef_loss   | -0.61     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2395224   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.89e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77404     |
|    fps             | 37        |
|    time_elapsed    | 146       |
|    total_timesteps | 2395437   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.07e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77456     |
|    fps             | 36        |
|    time_elapsed    | 186       |
|    total_timesteps | 2396888   |
| train/             |           |
|    actor_loss      | 8.92e+10  |
|    critic_loss     | 2.23e+16  |
|    ent_coef        | 3.12e+07  |
|    ent_coef_loss   | -2.28     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2396787   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.05e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77460     |
|    fps             | 37        |
|    time_elapsed    | 189       |
|    total_timesteps | 2397000   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77512     |
|    fps             | 37        |
|    time_elapsed    | 228       |
|    total_timesteps | 2398452   |
| train/             |           |
|    actor_loss      | 8.88e+10  |
|    critic_loss     | 2.25e+16  |
|    ent_coef        | 3.1e+07   |
|    ent_coef_loss   | 3.65      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2398351   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.98e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77516     |
|    fps             | 37        |
|    time_elapsed    | 231       |
|    total_timesteps | 2398564   |
| train/            

-285574671.884915
diff_timesteps is  15
Elasped time is (sec) 4261.377958536148
Wrapping the env in a DummyVecEnv.
Successfully loaded model from models/SAC_non_corr_with_delay/2390000.zip
Logging to logs\SAC_model_non_corr_with_delay_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77568     |
|    fps             | 44        |
|    time_elapsed    | 0         |
|    total_timesteps | 2400028   |
| train/             |           |
|    actor_loss      | 8.82e+10  |
|    critic_loss     | 1.8e+16   |
|    ent_coef        | 3.04e+07  |
|    ent_coef_loss   | 5.13      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2399927   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.85

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77620     |
|    fps             | 36        |
|    time_elapsed    | 40        |
|    total_timesteps | 2401478   |
| train/             |           |
|    actor_loss      | 8.9e+10   |
|    critic_loss     | 2.02e+16  |
|    ent_coef        | 3.19e+07  |
|    ent_coef_loss   | -0.462    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2401377   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77624     |
|    fps             | 36        |
|    time_elapsed    | 43        |
|    total_timesteps | 2401590   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77676     |
|    fps             | 36        |
|    time_elapsed    | 82        |
|    total_timesteps | 2403046   |
| train/             |           |
|    actor_loss      | 8.87e+10  |
|    critic_loss     | 2.32e+16  |
|    ent_coef        | 3.12e+07  |
|    ent_coef_loss   | -0.0545   |
|    learning_rate   | 0.0003    |
|    n_updates       | 2402945   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.89e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77680     |
|    fps             | 36        |
|    time_elapsed    | 85        |
|    total_timesteps | 2403158   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77732     |
|    fps             | 37        |
|    time_elapsed    | 124       |
|    total_timesteps | 2404614   |
| train/             |           |
|    actor_loss      | 8.89e+10  |
|    critic_loss     | 2.05e+16  |
|    ent_coef        | 3.07e+07  |
|    ent_coef_loss   | -3.31     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2404513   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77736     |
|    fps             | 37        |
|    time_elapsed    | 127       |
|    total_timesteps | 2404726   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.09e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77788     |
|    fps             | 36        |
|    time_elapsed    | 167       |
|    total_timesteps | 2406181   |
| train/             |           |
|    actor_loss      | 8.85e+10  |
|    critic_loss     | 1.96e+16  |
|    ent_coef        | 3.05e+07  |
|    ent_coef_loss   | -0.517    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2406080   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77792     |
|    fps             | 36        |
|    time_elapsed    | 170       |
|    total_timesteps | 2406293   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77844     |
|    fps             | 36        |
|    time_elapsed    | 211       |
|    total_timesteps | 2407746   |
| train/             |           |
|    actor_loss      | 8.86e+10  |
|    critic_loss     | 1.98e+16  |
|    ent_coef        | 3.07e+07  |
|    ent_coef_loss   | 1.14      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2407645   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.89e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77848     |
|    fps             | 36        |
|    time_elapsed    | 214       |
|    total_timesteps | 2407858   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77900     |
|    fps             | 36        |
|    time_elapsed    | 255       |
|    total_timesteps | 2409310   |
| train/             |           |
|    actor_loss      | 8.8e+10   |
|    critic_loss     | 2.35e+16  |
|    ent_coef        | 3.02e+07  |
|    ent_coef_loss   | -0.195    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2409209   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77904     |
|    fps             | 36        |
|    time_elapsed    | 258       |
|    total_timesteps | 2409421   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | -6.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 77952    |
|    fps             | 36       |
|    time_elapsed    | 21       |
|    total_timesteps | 2410783  |
| train/             |          |
|    actor_loss      | 8.84e+10 |
|    critic_loss     | 2.37e+16 |
|    ent_coef        | 3e+07    |
|    ent_coef_loss   | 4.07     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2410682  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.37e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 77956     |
|    fps             | 36        |
|    time_elapsed    | 24        |
|    total_timesteps | 2410895   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78008     |
|    fps             | 36        |
|    time_elapsed    | 63        |
|    total_timesteps | 2412352   |
| train/             |           |
|    actor_loss      | 8.87e+10  |
|    critic_loss     | 2.04e+16  |
|    ent_coef        | 2.93e+07  |
|    ent_coef_loss   | 1.9       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2412251   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.58e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78012     |
|    fps             | 36        |
|    time_elapsed    | 67        |
|    total_timesteps | 2412464   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.72e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78064     |
|    fps             | 36        |
|    time_elapsed    | 106       |
|    total_timesteps | 2413918   |
| train/             |           |
|    actor_loss      | 8.78e+10  |
|    critic_loss     | 2.3e+16   |
|    ent_coef        | 2.83e+07  |
|    ent_coef_loss   | 4.54      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2413817   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28       |
|    ep_rew_mean     | -6.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 78068    |
|    fps             | 36       |
|    time_elapsed    | 109      |
|    total_timesteps | 2414030  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78120     |
|    fps             | 36        |
|    time_elapsed    | 149       |
|    total_timesteps | 2415483   |
| train/             |           |
|    actor_loss      | 8.75e+10  |
|    critic_loss     | 2.12e+16  |
|    ent_coef        | 2.83e+07  |
|    ent_coef_loss   | -2.1      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2415382   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.02e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78124     |
|    fps             | 36        |
|    time_elapsed    | 152       |
|    total_timesteps | 2415595   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.07e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78176     |
|    fps             | 37        |
|    time_elapsed    | 190       |
|    total_timesteps | 2417049   |
| train/             |           |
|    actor_loss      | 8.76e+10  |
|    critic_loss     | 1.58e+16  |
|    ent_coef        | 2.81e+07  |
|    ent_coef_loss   | -2.57     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2416948   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.09e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78180     |
|    fps             | 37        |
|    time_elapsed    | 193       |
|    total_timesteps | 2417161   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78232     |
|    fps             | 37        |
|    time_elapsed    | 231       |
|    total_timesteps | 2418615   |
| train/             |           |
|    actor_loss      | 8.73e+10  |
|    critic_loss     | 1.76e+16  |
|    ent_coef        | 2.77e+07  |
|    ent_coef_loss   | 1.5       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2418514   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.89e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78236     |
|    fps             | 37        |
|    time_elapsed    | 234       |
|    total_timesteps | 2418727   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78284     |
|    fps             | 39        |
|    time_elapsed    | 2         |
|    total_timesteps | 2420084   |
| train/             |           |
|    actor_loss      | 8.72e+10  |
|    critic_loss     | 1.56e+16  |
|    ent_coef        | 2.65e+07  |
|    ent_coef_loss   | 3.08      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2419983   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28       |
|    ep_rew_mean     | -7e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 78288    |
|    fps             | 39       |
|    time_elapsed    | 4        |
|    total_timesteps | 2420196  |
| train/             |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | -7e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 78340    |
|    fps             | 37       |
|    time_elapsed    | 43       |
|    total_timesteps | 2421647  |
| train/             |          |
|    actor_loss      | 8.7e+10  |
|    critic_loss     | 1.44e+16 |
|    ent_coef        | 2.68e+07 |
|    ent_coef_loss   | 1.86     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2421546  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78344     |
|    fps             | 37        |
|    time_elapsed    | 46        |
|    total_timesteps | 2421759   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78396     |
|    fps             | 37        |
|    time_elapsed    | 84        |
|    total_timesteps | 2423209   |
| train/             |           |
|    actor_loss      | 8.73e+10  |
|    critic_loss     | 1.58e+16  |
|    ent_coef        | 2.65e+07  |
|    ent_coef_loss   | -1.93     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2423108   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78400     |
|    fps             | 37        |
|    time_elapsed    | 88        |
|    total_timesteps | 2423321   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.8     |
|    ep_rew_mean     | -6.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 78452    |
|    fps             | 37       |
|    time_elapsed    | 127      |
|    total_timesteps | 2424767  |
| train/             |          |
|    actor_loss      | 8.66e+10 |
|    critic_loss     | 1.4e+16  |
|    ent_coef        | 2.67e+07 |
|    ent_coef_loss   | 1.35     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2424666  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.8     |
|    ep_rew_mean     | -6.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 78456    |
|    fps             | 37       |
|    time_elapsed    | 130      |
|    total_timesteps | 2424879  |
| train/             |          |
|    actor_los

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78508     |
|    fps             | 37        |
|    time_elapsed    | 169       |
|    total_timesteps | 2426331   |
| train/             |           |
|    actor_loss      | 8.67e+10  |
|    critic_loss     | 1.38e+16  |
|    ent_coef        | 2.61e+07  |
|    ent_coef_loss   | 1.42      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2426230   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78512     |
|    fps             | 37        |
|    time_elapsed    | 172       |
|    total_timesteps | 2426443   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78564     |
|    fps             | 37        |
|    time_elapsed    | 210       |
|    total_timesteps | 2427897   |
| train/             |           |
|    actor_loss      | 8.62e+10  |
|    critic_loss     | 1.19e+16  |
|    ent_coef        | 2.63e+07  |
|    ent_coef_loss   | -0.0723   |
|    learning_rate   | 0.0003    |
|    n_updates       | 2427796   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -7.02e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78568     |
|    fps             | 37        |
|    time_elapsed    | 213       |
|    total_timesteps | 2428009   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78620     |
|    fps             | 37        |
|    time_elapsed    | 252       |
|    total_timesteps | 2429464   |
| train/             |           |
|    actor_loss      | 8.68e+10  |
|    critic_loss     | 1.11e+16  |
|    ent_coef        | 2.63e+07  |
|    ent_coef_loss   | 0.113     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2429363   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28       |
|    ep_rew_mean     | -7.1e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 78624    |
|    fps             | 37       |
|    time_elapsed    | 255      |
|    total_timesteps | 2429576  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78672     |
|    fps             | 37        |
|    time_elapsed    | 24        |
|    total_timesteps | 2430922   |
| train/             |           |
|    actor_loss      | 8.63e+10  |
|    critic_loss     | 1.32e+16  |
|    ent_coef        | 2.69e+07  |
|    ent_coef_loss   | 0.0575    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2430821   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.29e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78676     |
|    fps             | 37        |
|    time_elapsed    | 27        |
|    total_timesteps | 2431034   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78728     |
|    fps             | 37        |
|    time_elapsed    | 66        |
|    total_timesteps | 2432488   |
| train/             |           |
|    actor_loss      | 8.64e+10  |
|    critic_loss     | 1.32e+16  |
|    ent_coef        | 2.75e+07  |
|    ent_coef_loss   | 0.935     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2432387   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -7.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78732     |
|    fps             | 37        |
|    time_elapsed    | 69        |
|    total_timesteps | 2432600   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.49e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78784     |
|    fps             | 36        |
|    time_elapsed    | 109       |
|    total_timesteps | 2434052   |
| train/             |           |
|    actor_loss      | 8.66e+10  |
|    critic_loss     | 1.24e+16  |
|    ent_coef        | 2.73e+07  |
|    ent_coef_loss   | 0.834     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2433951   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 27.9      |
|    ep_rew_mean     | -6.49e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78788     |
|    fps             | 36        |
|    time_elapsed    | 113       |
|    total_timesteps | 2434164   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78840     |
|    fps             | 35        |
|    time_elapsed    | 157       |
|    total_timesteps | 2435619   |
| train/             |           |
|    actor_loss      | 8.54e+10  |
|    critic_loss     | 1.14e+16  |
|    ent_coef        | 2.72e+07  |
|    ent_coef_loss   | 0.141     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2435518   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.76e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78844     |
|    fps             | 35        |
|    time_elapsed    | 160       |
|    total_timesteps | 2435731   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78896     |
|    fps             | 35        |
|    time_elapsed    | 204       |
|    total_timesteps | 2437185   |
| train/             |           |
|    actor_loss      | 8.63e+10  |
|    critic_loss     | 1.16e+16  |
|    ent_coef        | 2.69e+07  |
|    ent_coef_loss   | -0.14     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2437084   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78900     |
|    fps             | 35        |
|    time_elapsed    | 207       |
|    total_timesteps | 2437297   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78952     |
|    fps             | 35        |
|    time_elapsed    | 250       |
|    total_timesteps | 2438753   |
| train/             |           |
|    actor_loss      | 8.62e+10  |
|    critic_loss     | 1.09e+16  |
|    ent_coef        | 2.68e+07  |
|    ent_coef_loss   | -0.246    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2438652   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 78956     |
|    fps             | 35        |
|    time_elapsed    | 253       |
|    total_timesteps | 2438865   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.45e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79004     |
|    fps             | 37        |
|    time_elapsed    | 5         |
|    total_timesteps | 2440224   |
| train/             |           |
|    actor_loss      | 8.58e+10  |
|    critic_loss     | 8.5e+15   |
|    ent_coef        | 2.6e+07   |
|    ent_coef_loss   | -1.71     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2440123   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79008     |
|    fps             | 38        |
|    time_elapsed    | 8         |
|    total_timesteps | 2440336   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28       |
|    ep_rew_mean     | -6.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 79060    |
|    fps             | 37       |
|    time_elapsed    | 47       |
|    total_timesteps | 2441791  |
| train/             |          |
|    actor_loss      | 8.58e+10 |
|    critic_loss     | 1.03e+16 |
|    ent_coef        | 2.81e+07 |
|    ent_coef_loss   | 3.72     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2441690  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | -6.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 79064    |
|    fps             | 37       |
|    time_elapsed    | 50       |
|    total_timesteps | 2441901  |
| train/             |          |
|    actor_los

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79116     |
|    fps             | 37        |
|    time_elapsed    | 88        |
|    total_timesteps | 2443357   |
| train/             |           |
|    actor_loss      | 8.57e+10  |
|    critic_loss     | 1.02e+16  |
|    ent_coef        | 2.83e+07  |
|    ent_coef_loss   | 0.0718    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2443256   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79120     |
|    fps             | 37        |
|    time_elapsed    | 91        |
|    total_timesteps | 2443469   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.08e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79172     |
|    fps             | 37        |
|    time_elapsed    | 130       |
|    total_timesteps | 2444925   |
| train/             |           |
|    actor_loss      | 8.7e+10   |
|    critic_loss     | 1.19e+16  |
|    ent_coef        | 2.85e+07  |
|    ent_coef_loss   | 0.864     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2444824   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -6.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79176     |
|    fps             | 37        |
|    time_elapsed    | 133       |
|    total_timesteps | 2445037   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79228     |
|    fps             | 37        |
|    time_elapsed    | 172       |
|    total_timesteps | 2446494   |
| train/             |           |
|    actor_loss      | 8.6e+10   |
|    critic_loss     | 9.53e+15  |
|    ent_coef        | 2.82e+07  |
|    ent_coef_loss   | -1.22     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2446393   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79232     |
|    fps             | 37        |
|    time_elapsed    | 175       |
|    total_timesteps | 2446606   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.49e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79284     |
|    fps             | 37        |
|    time_elapsed    | 213       |
|    total_timesteps | 2448061   |
| train/             |           |
|    actor_loss      | 8.6e+10   |
|    critic_loss     | 9.68e+15  |
|    ent_coef        | 2.78e+07  |
|    ent_coef_loss   | 0.546     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2447960   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28       |
|    ep_rew_mean     | -5.5e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 79288    |
|    fps             | 37       |
|    time_elapsed    | 216      |
|    total_timesteps | 2448173  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79340     |
|    fps             | 37        |
|    time_elapsed    | 255       |
|    total_timesteps | 2449630   |
| train/             |           |
|    actor_loss      | 8.55e+10  |
|    critic_loss     | 1.14e+16  |
|    ent_coef        | 2.79e+07  |
|    ent_coef_loss   | 0.53      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2449529   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.59e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79344     |
|    fps             | 37        |
|    time_elapsed    | 258       |
|    total_timesteps | 2449742   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79392     |
|    fps             | 37        |
|    time_elapsed    | 28        |
|    total_timesteps | 2451092   |
| train/             |           |
|    actor_loss      | 8.55e+10  |
|    critic_loss     | 1.04e+16  |
|    ent_coef        | 2.85e+07  |
|    ent_coef_loss   | 1.8       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2450991   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79396     |
|    fps             | 37        |
|    time_elapsed    | 31        |
|    total_timesteps | 2451204   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.67e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79448     |
|    fps             | 37        |
|    time_elapsed    | 70        |
|    total_timesteps | 2452661   |
| train/             |           |
|    actor_loss      | 8.62e+10  |
|    critic_loss     | 1.07e+16  |
|    ent_coef        | 2.83e+07  |
|    ent_coef_loss   | 2.94      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2452560   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79452     |
|    fps             | 37        |
|    time_elapsed    | 73        |
|    total_timesteps | 2452773   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79504     |
|    fps             | 37        |
|    time_elapsed    | 112       |
|    total_timesteps | 2454232   |
| train/             |           |
|    actor_loss      | 8.6e+10   |
|    critic_loss     | 1.1e+16   |
|    ent_coef        | 2.79e+07  |
|    ent_coef_loss   | -2.6      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2454131   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.74e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79508     |
|    fps             | 37        |
|    time_elapsed    | 115       |
|    total_timesteps | 2454344   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79560     |
|    fps             | 37        |
|    time_elapsed    | 154       |
|    total_timesteps | 2455806   |
| train/             |           |
|    actor_loss      | 8.48e+10  |
|    critic_loss     | 9.63e+15  |
|    ent_coef        | 2.74e+07  |
|    ent_coef_loss   | -0.118    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2455705   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79564     |
|    fps             | 37        |
|    time_elapsed    | 157       |
|    total_timesteps | 2455918   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79616     |
|    fps             | 37        |
|    time_elapsed    | 196       |
|    total_timesteps | 2457375   |
| train/             |           |
|    actor_loss      | 8.56e+10  |
|    critic_loss     | 9.38e+15  |
|    ent_coef        | 2.72e+07  |
|    ent_coef_loss   | -3.34     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2457274   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.1     |
|    ep_rew_mean     | -5.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 79620    |
|    fps             | 37       |
|    time_elapsed    | 199      |
|    total_timesteps | 2457487  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.49e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79672     |
|    fps             | 37        |
|    time_elapsed    | 238       |
|    total_timesteps | 2458946   |
| train/             |           |
|    actor_loss      | 8.56e+10  |
|    critic_loss     | 9.11e+15  |
|    ent_coef        | 2.69e+07  |
|    ent_coef_loss   | 0.883     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2458845   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79676     |
|    fps             | 37        |
|    time_elapsed    | 241       |
|    total_timesteps | 2459058   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.45e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79724     |
|    fps             | 37        |
|    time_elapsed    | 11        |
|    total_timesteps | 2460420   |
| train/             |           |
|    actor_loss      | 8.53e+10  |
|    critic_loss     | 1.01e+16  |
|    ent_coef        | 2.52e+07  |
|    ent_coef_loss   | -4.78     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2460319   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79728     |
|    fps             | 37        |
|    time_elapsed    | 14        |
|    total_timesteps | 2460532   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79780     |
|    fps             | 37        |
|    time_elapsed    | 53        |
|    total_timesteps | 2461993   |
| train/             |           |
|    actor_loss      | 8.47e+10  |
|    critic_loss     | 9.71e+15  |
|    ent_coef        | 2.72e+07  |
|    ent_coef_loss   | -0.578    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2461892   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79784     |
|    fps             | 37        |
|    time_elapsed    | 56        |
|    total_timesteps | 2462105   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.59e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79836     |
|    fps             | 37        |
|    time_elapsed    | 96        |
|    total_timesteps | 2463565   |
| train/             |           |
|    actor_loss      | 8.5e+10   |
|    critic_loss     | 1.05e+16  |
|    ent_coef        | 2.67e+07  |
|    ent_coef_loss   | -5.35     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2463464   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79840     |
|    fps             | 37        |
|    time_elapsed    | 99        |
|    total_timesteps | 2463677   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79892     |
|    fps             | 36        |
|    time_elapsed    | 138       |
|    total_timesteps | 2465135   |
| train/             |           |
|    actor_loss      | 8.47e+10  |
|    critic_loss     | 9.43e+15  |
|    ent_coef        | 2.63e+07  |
|    ent_coef_loss   | -1.94     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2465034   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28        |
|    ep_rew_mean     | -5.37e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79896     |
|    fps             | 36        |
|    time_elapsed    | 141       |
|    total_timesteps | 2465247   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79948     |
|    fps             | 37        |
|    time_elapsed    | 180       |
|    total_timesteps | 2466707   |
| train/             |           |
|    actor_loss      | 8.43e+10  |
|    critic_loss     | 8.3e+15   |
|    ent_coef        | 2.61e+07  |
|    ent_coef_loss   | 3.55      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2466606   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.24e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 79952     |
|    fps             | 37        |
|    time_elapsed    | 183       |
|    total_timesteps | 2466820   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80004     |
|    fps             | 37        |
|    time_elapsed    | 222       |
|    total_timesteps | 2468280   |
| train/             |           |
|    actor_loss      | 8.36e+10  |
|    critic_loss     | 7.39e+15  |
|    ent_coef        | 2.55e+07  |
|    ent_coef_loss   | -1.5      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2468179   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80008     |
|    fps             | 37        |
|    time_elapsed    | 225       |
|    total_timesteps | 2468393   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80060     |
|    fps             | 37        |
|    time_elapsed    | 265       |
|    total_timesteps | 2469855   |
| train/             |           |
|    actor_loss      | 8.33e+10  |
|    critic_loss     | 7.76e+15  |
|    ent_coef        | 2.51e+07  |
|    ent_coef_loss   | 4.39      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2469754   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.31e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80064     |
|    fps             | 37        |
|    time_elapsed    | 268       |
|    total_timesteps | 2469968   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80112     |
|    fps             | 37        |
|    time_elapsed    | 35        |
|    total_timesteps | 2471325   |
| train/             |           |
|    actor_loss      | 8.32e+10  |
|    critic_loss     | 8.37e+15  |
|    ent_coef        | 2.45e+07  |
|    ent_coef_loss   | -4.06     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2471224   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80116     |
|    fps             | 37        |
|    time_elapsed    | 38        |
|    total_timesteps | 2471438   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80168     |
|    fps             | 37        |
|    time_elapsed    | 78        |
|    total_timesteps | 2472899   |
| train/             |           |
|    actor_loss      | 8.29e+10  |
|    critic_loss     | 6.27e+15  |
|    ent_coef        | 2.48e+07  |
|    ent_coef_loss   | 4.22      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2472798   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80172     |
|    fps             | 37        |
|    time_elapsed    | 81        |
|    total_timesteps | 2473011   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80224     |
|    fps             | 37        |
|    time_elapsed    | 120       |
|    total_timesteps | 2474478   |
| train/             |           |
|    actor_loss      | 8.27e+10  |
|    critic_loss     | 6.63e+15  |
|    ent_coef        | 2.4e+07   |
|    ent_coef_loss   | -0.0929   |
|    learning_rate   | 0.0003    |
|    n_updates       | 2474377   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80228     |
|    fps             | 37        |
|    time_elapsed    | 123       |
|    total_timesteps | 2474590   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -5.37e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80280     |
|    fps             | 37        |
|    time_elapsed    | 162       |
|    total_timesteps | 2476054   |
| train/             |           |
|    actor_loss      | 8.29e+10  |
|    critic_loss     | 7.38e+15  |
|    ent_coef        | 2.36e+07  |
|    ent_coef_loss   | 2.9       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2475953   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80284     |
|    fps             | 37        |
|    time_elapsed    | 165       |
|    total_timesteps | 2476168   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.2     |
|    ep_rew_mean     | -5e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 80336    |
|    fps             | 37       |
|    time_elapsed    | 204      |
|    total_timesteps | 2477632  |
| train/             |          |
|    actor_loss      | 8.23e+10 |
|    critic_loss     | 6.06e+15 |
|    ent_coef        | 2.34e+07 |
|    ent_coef_loss   | 1.47     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2477531  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -4.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80340     |
|    fps             | 37        |
|    time_elapsed    | 207       |
|    total_timesteps | 2477744   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -4.95e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80392     |
|    fps             | 37        |
|    time_elapsed    | 246       |
|    total_timesteps | 2479206   |
| train/             |           |
|    actor_loss      | 8.17e+10  |
|    critic_loss     | 7.15e+15  |
|    ent_coef        | 2.32e+07  |
|    ent_coef_loss   | -3.96     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2479105   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -4.96e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80396     |
|    fps             | 37        |
|    time_elapsed    | 249       |
|    total_timesteps | 2479318   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -4.92e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80444     |
|    fps             | 37        |
|    time_elapsed    | 18        |
|    total_timesteps | 2480676   |
| train/             |           |
|    actor_loss      | 8.13e+10  |
|    critic_loss     | 5.85e+15  |
|    ent_coef        | 2.34e+07  |
|    ent_coef_loss   | -7.27     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2480575   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -4.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80448     |
|    fps             | 37        |
|    time_elapsed    | 21        |
|    total_timesteps | 2480788   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80500     |
|    fps             | 37        |
|    time_elapsed    | 60        |
|    total_timesteps | 2482254   |
| train/             |           |
|    actor_loss      | 8.16e+10  |
|    critic_loss     | 6.73e+15  |
|    ent_coef        | 2.25e+07  |
|    ent_coef_loss   | -3.58     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2482153   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -5.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80504     |
|    fps             | 37        |
|    time_elapsed    | 63        |
|    total_timesteps | 2482366   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.1      |
|    ep_rew_mean     | -4.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80556     |
|    fps             | 37        |
|    time_elapsed    | 102       |
|    total_timesteps | 2483829   |
| train/             |           |
|    actor_loss      | 8.08e+10  |
|    critic_loss     | 5.96e+15  |
|    ent_coef        | 2.26e+07  |
|    ent_coef_loss   | -3.93     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2483728   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -4.83e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80560     |
|    fps             | 37        |
|    time_elapsed    | 106       |
|    total_timesteps | 2483942   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.2      |
|    ep_rew_mean     | -4.92e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80612     |
|    fps             | 37        |
|    time_elapsed    | 144       |
|    total_timesteps | 2485413   |
| train/             |           |
|    actor_loss      | 8.08e+10  |
|    critic_loss     | 5.71e+15  |
|    ent_coef        | 2.19e+07  |
|    ent_coef_loss   | -4.24     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2485312   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.2     |
|    ep_rew_mean     | -5e+08   |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 80616    |
|    fps             | 37       |
|    time_elapsed    | 147      |
|    total_timesteps | 2485527  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.52e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80668     |
|    fps             | 37        |
|    time_elapsed    | 187       |
|    total_timesteps | 2487007   |
| train/             |           |
|    actor_loss      | 8.04e+10  |
|    critic_loss     | 8.58e+15  |
|    ent_coef        | 2.14e+07  |
|    ent_coef_loss   | -3.42     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2486906   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.58e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80672     |
|    fps             | 37        |
|    time_elapsed    | 190       |
|    total_timesteps | 2487121   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.4     |
|    ep_rew_mean     | -5.5e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 80724    |
|    fps             | 37       |
|    time_elapsed    | 230      |
|    total_timesteps | 2488594  |
| train/             |          |
|    actor_loss      | 7.97e+10 |
|    critic_loss     | 6e+15    |
|    ent_coef        | 2.1e+07  |
|    ent_coef_loss   | -6.02    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2488493  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80728     |
|    fps             | 37        |
|    time_elapsed    | 233       |
|    total_timesteps | 2488706   |
| train/             |           |
|  

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.4     |
|    ep_rew_mean     | -5.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 80776    |
|    fps             | 36       |
|    time_elapsed    | 2        |
|    total_timesteps | 2490086  |
| train/             |          |
|    actor_loss      | 7.93e+10 |
|    critic_loss     | 1.02e+16 |
|    ent_coef        | 2e+07    |
|    ent_coef_loss   | 0.785    |
|    learning_rate   | 0.0003   |
|    n_updates       | 2489985  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80780     |
|    fps             | 36        |
|    time_elapsed    | 5         |
|    total_timesteps | 2490200   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.76e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80832     |
|    fps             | 36        |
|    time_elapsed    | 45        |
|    total_timesteps | 2491679   |
| train/             |           |
|    actor_loss      | 7.87e+10  |
|    critic_loss     | 7.88e+15  |
|    ent_coef        | 1.91e+07  |
|    ent_coef_loss   | -4.11     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2491578   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80836     |
|    fps             | 36        |
|    time_elapsed    | 49        |
|    total_timesteps | 2491793   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.5      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80888     |
|    fps             | 36        |
|    time_elapsed    | 88        |
|    total_timesteps | 2493275   |
| train/             |           |
|    actor_loss      | 7.88e+10  |
|    critic_loss     | 9.27e+15  |
|    ent_coef        | 1.91e+07  |
|    ent_coef_loss   | -2.59     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2493174   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80892     |
|    fps             | 36        |
|    time_elapsed    | 91        |
|    total_timesteps | 2493387   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.5      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80944     |
|    fps             | 37        |
|    time_elapsed    | 131       |
|    total_timesteps | 2494867   |
| train/             |           |
|    actor_loss      | 7.78e+10  |
|    critic_loss     | 7.14e+15  |
|    ent_coef        | 1.91e+07  |
|    ent_coef_loss   | -1.71     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2494766   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.38e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 80948     |
|    fps             | 37        |
|    time_elapsed    | 134       |
|    total_timesteps | 2494979   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.4     |
|    ep_rew_mean     | -5.4e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 81000    |
|    fps             | 37       |
|    time_elapsed    | 174      |
|    total_timesteps | 2496457  |
| train/             |          |
|    actor_loss      | 7.74e+10 |
|    critic_loss     | 9.29e+15 |
|    ent_coef        | 1.94e+07 |
|    ent_coef_loss   | -0.572   |
|    learning_rate   | 0.0003   |
|    n_updates       | 2496356  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.4      |
|    ep_rew_mean     | -5.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81004     |
|    fps             | 37        |
|    time_elapsed    | 177       |
|    total_timesteps | 2496570   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.5      |
|    ep_rew_mean     | -5.37e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81056     |
|    fps             | 37        |
|    time_elapsed    | 217       |
|    total_timesteps | 2498057   |
| train/             |           |
|    actor_loss      | 7.73e+10  |
|    critic_loss     | 6.8e+15   |
|    ent_coef        | 1.92e+07  |
|    ent_coef_loss   | -3.32     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2497956   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.5      |
|    ep_rew_mean     | -5.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81060     |
|    fps             | 37        |
|    time_elapsed    | 220       |
|    total_timesteps | 2498170   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.6      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81112     |
|    fps             | 37        |
|    time_elapsed    | 260       |
|    total_timesteps | 2499659   |
| train/             |           |
|    actor_loss      | 7.61e+10  |
|    critic_loss     | 6.7e+15   |
|    ent_coef        | 1.9e+07   |
|    ent_coef_loss   | -4.58     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2499558   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.6      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81116     |
|    fps             | 37        |
|    time_elapsed    | 263       |
|    total_timesteps | 2499774   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.7      |
|    ep_rew_mean     | -5.93e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81164     |
|    fps             | 36        |
|    time_elapsed    | 32        |
|    total_timesteps | 2501176   |
| train/             |           |
|    actor_loss      | 7.56e+10  |
|    critic_loss     | 8.06e+15  |
|    ent_coef        | 1.91e+07  |
|    ent_coef_loss   | 2.32      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2501075   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.7      |
|    ep_rew_mean     | -5.97e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81168     |
|    fps             | 36        |
|    time_elapsed    | 35        |
|    total_timesteps | 2501291   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.7     |
|    ep_rew_mean     | -5.9e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 81220    |
|    fps             | 36       |
|    time_elapsed    | 75       |
|    total_timesteps | 2502781  |
| train/             |          |
|    actor_loss      | 7.54e+10 |
|    critic_loss     | 6.75e+15 |
|    ent_coef        | 1.92e+07 |
|    ent_coef_loss   | 7.3      |
|    learning_rate   | 0.0003   |
|    n_updates       | 2502680  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.7      |
|    ep_rew_mean     | -5.86e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81224     |
|    fps             | 36        |
|    time_elapsed    | 79        |
|    total_timesteps | 2502897   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.8      |
|    ep_rew_mean     | -5.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81276     |
|    fps             | 36        |
|    time_elapsed    | 119       |
|    total_timesteps | 2504398   |
| train/             |           |
|    actor_loss      | 7.49e+10  |
|    critic_loss     | 9.43e+15  |
|    ent_coef        | 1.89e+07  |
|    ent_coef_loss   | -1.14     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2504297   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.8      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81280     |
|    fps             | 36        |
|    time_elapsed    | 122       |
|    total_timesteps | 2504513   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.9      |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81332     |
|    fps             | 37        |
|    time_elapsed    | 162       |
|    total_timesteps | 2506020   |
| train/             |           |
|    actor_loss      | 7.43e+10  |
|    critic_loss     | 1.06e+16  |
|    ent_coef        | 1.93e+07  |
|    ent_coef_loss   | -3.14     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2505919   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 28.9      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81336     |
|    fps             | 37        |
|    time_elapsed    | 165       |
|    total_timesteps | 2506135   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.1      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81388     |
|    fps             | 37        |
|    time_elapsed    | 205       |
|    total_timesteps | 2507650   |
| train/             |           |
|    actor_loss      | 7.3e+10   |
|    critic_loss     | 1.1e+16   |
|    ent_coef        | 1.96e+07  |
|    ent_coef_loss   | 2.78      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2507549   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29        |
|    ep_rew_mean     | -5.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81392     |
|    fps             | 37        |
|    time_elapsed    | 208       |
|    total_timesteps | 2507765   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.1      |
|    ep_rew_mean     | -5.32e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81444     |
|    fps             | 37        |
|    time_elapsed    | 249       |
|    total_timesteps | 2509278   |
| train/             |           |
|    actor_loss      | 7.29e+10  |
|    critic_loss     | 9.37e+15  |
|    ent_coef        | 1.98e+07  |
|    ent_coef_loss   | -0.297    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2509177   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.1     |
|    ep_rew_mean     | -5.3e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 81448    |
|    fps             | 37       |
|    time_elapsed    | 252      |
|    total_timesteps | 2509394  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29        |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81496     |
|    fps             | 37        |
|    time_elapsed    | 21        |
|    total_timesteps | 2510811   |
| train/             |           |
|    actor_loss      | 7.24e+10  |
|    critic_loss     | 1.28e+16  |
|    ent_coef        | 2.1e+07   |
|    ent_coef_loss   | -0.908    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2510710   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.1      |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81500     |
|    fps             | 37        |
|    time_elapsed    | 24        |
|    total_timesteps | 2510928   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29        |
|    ep_rew_mean     | -5.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81552     |
|    fps             | 37        |
|    time_elapsed    | 64        |
|    total_timesteps | 2512440   |
| train/             |           |
|    actor_loss      | 7.21e+10  |
|    critic_loss     | 1.41e+16  |
|    ent_coef        | 2.23e+07  |
|    ent_coef_loss   | -2.07     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2512339   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29        |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81556     |
|    fps             | 37        |
|    time_elapsed    | 67        |
|    total_timesteps | 2512556   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.2      |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81608     |
|    fps             | 37        |
|    time_elapsed    | 108       |
|    total_timesteps | 2514076   |
| train/             |           |
|    actor_loss      | 7.08e+10  |
|    critic_loss     | 1.25e+16  |
|    ent_coef        | 2.19e+07  |
|    ent_coef_loss   | -0.43     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2513975   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.2      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81612     |
|    fps             | 37        |
|    time_elapsed    | 111       |
|    total_timesteps | 2514194   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.4      |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81664     |
|    fps             | 37        |
|    time_elapsed    | 151       |
|    total_timesteps | 2515729   |
| train/             |           |
|    actor_loss      | 7.04e+10  |
|    critic_loss     | 1.77e+16  |
|    ent_coef        | 2.19e+07  |
|    ent_coef_loss   | -1.37     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2515628   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.4      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81668     |
|    fps             | 37        |
|    time_elapsed    | 154       |
|    total_timesteps | 2515847   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.5      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81720     |
|    fps             | 37        |
|    time_elapsed    | 194       |
|    total_timesteps | 2517384   |
| train/             |           |
|    actor_loss      | 7.03e+10  |
|    critic_loss     | 1.66e+16  |
|    ent_coef        | 2.22e+07  |
|    ent_coef_loss   | -0.929    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2517283   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.5      |
|    ep_rew_mean     | -5.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81724     |
|    fps             | 37        |
|    time_elapsed    | 197       |
|    total_timesteps | 2517503   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.7      |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81776     |
|    fps             | 37        |
|    time_elapsed    | 238       |
|    total_timesteps | 2519050   |
| train/             |           |
|    actor_loss      | 6.96e+10  |
|    critic_loss     | 1.96e+16  |
|    ent_coef        | 2.28e+07  |
|    ent_coef_loss   | -2.78     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2518949   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.7      |
|    ep_rew_mean     | -5.49e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81780     |
|    fps             | 37        |
|    time_elapsed    | 241       |
|    total_timesteps | 2519169   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.8      |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81828     |
|    fps             | 38        |
|    time_elapsed    | 16        |
|    total_timesteps | 2520628   |
| train/             |           |
|    actor_loss      | 6.91e+10  |
|    critic_loss     | 3.19e+16  |
|    ent_coef        | 2.39e+07  |
|    ent_coef_loss   | -0.529    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2520527   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 29.8      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81832     |
|    fps             | 38        |
|    time_elapsed    | 19        |
|    total_timesteps | 2520750   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30        |
|    ep_rew_mean     | -5.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81884     |
|    fps             | 38        |
|    time_elapsed    | 60        |
|    total_timesteps | 2522312   |
| train/             |           |
|    actor_loss      | 6.81e+10  |
|    critic_loss     | 2.77e+16  |
|    ent_coef        | 2.56e+07  |
|    ent_coef_loss   | -0.318    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2522211   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30        |
|    ep_rew_mean     | -5.74e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81888     |
|    fps             | 38        |
|    time_elapsed    | 63        |
|    total_timesteps | 2522436   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.3      |
|    ep_rew_mean     | -5.81e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81940     |
|    fps             | 38        |
|    time_elapsed    | 105       |
|    total_timesteps | 2524016   |
| train/             |           |
|    actor_loss      | 6.8e+10   |
|    critic_loss     | 1e+17     |
|    ent_coef        | 2.65e+07  |
|    ent_coef_loss   | 4.27      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2523915   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.3      |
|    ep_rew_mean     | -5.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81944     |
|    fps             | 37        |
|    time_elapsed    | 110       |
|    total_timesteps | 2524135   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.5      |
|    ep_rew_mean     | -5.85e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 81996     |
|    fps             | 37        |
|    time_elapsed    | 153       |
|    total_timesteps | 2525727   |
| train/             |           |
|    actor_loss      | 6.72e+10  |
|    critic_loss     | 7.18e+16  |
|    ent_coef        | 2.73e+07  |
|    ent_coef_loss   | -1.49     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2525626   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.5      |
|    ep_rew_mean     | -5.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82000     |
|    fps             | 37        |
|    time_elapsed    | 156       |
|    total_timesteps | 2525847   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.7      |
|    ep_rew_mean     | -5.19e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82052     |
|    fps             | 37        |
|    time_elapsed    | 198       |
|    total_timesteps | 2527449   |
| train/             |           |
|    actor_loss      | 6.66e+10  |
|    critic_loss     | 1.24e+17  |
|    ent_coef        | 2.86e+07  |
|    ent_coef_loss   | -1.51     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2527348   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.7      |
|    ep_rew_mean     | -5.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82056     |
|    fps             | 37        |
|    time_elapsed    | 201       |
|    total_timesteps | 2527571   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.7      |
|    ep_rew_mean     | -5.22e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82108     |
|    fps             | 37        |
|    time_elapsed    | 243       |
|    total_timesteps | 2529166   |
| train/             |           |
|    actor_loss      | 6.63e+10  |
|    critic_loss     | 8.15e+16  |
|    ent_coef        | 2.98e+07  |
|    ent_coef_loss   | -5.37     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2529065   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.7      |
|    ep_rew_mean     | -5.28e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82112     |
|    fps             | 37        |
|    time_elapsed    | 246       |
|    total_timesteps | 2529292   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.8      |
|    ep_rew_mean     | -5.72e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82160     |
|    fps             | 39        |
|    time_elapsed    | 20        |
|    total_timesteps | 2530801   |
| train/             |           |
|    actor_loss      | 6.53e+10  |
|    critic_loss     | 4.78e+16  |
|    ent_coef        | 3.19e+07  |
|    ent_coef_loss   | -0.831    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2530700   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 30.9      |
|    ep_rew_mean     | -5.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82164     |
|    fps             | 39        |
|    time_elapsed    | 23        |
|    total_timesteps | 2530926   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.1      |
|    ep_rew_mean     | -5.71e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82216     |
|    fps             | 38        |
|    time_elapsed    | 65        |
|    total_timesteps | 2532551   |
| train/             |           |
|    actor_loss      | 6.5e+10   |
|    critic_loss     | 1.28e+17  |
|    ent_coef        | 3.35e+07  |
|    ent_coef_loss   | 1.5       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2532450   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.1      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82220     |
|    fps             | 38        |
|    time_elapsed    | 69        |
|    total_timesteps | 2532676   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.2      |
|    ep_rew_mean     | -5.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82272     |
|    fps             | 38        |
|    time_elapsed    | 112       |
|    total_timesteps | 2534301   |
| train/             |           |
|    actor_loss      | 6.44e+10  |
|    critic_loss     | 1.58e+17  |
|    ent_coef        | 3.52e+07  |
|    ent_coef_loss   | 1.72      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2534200   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.3      |
|    ep_rew_mean     | -5.35e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82276     |
|    fps             | 38        |
|    time_elapsed    | 115       |
|    total_timesteps | 2534431   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.6      |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82328     |
|    fps             | 37        |
|    time_elapsed    | 160       |
|    total_timesteps | 2536090   |
| train/             |           |
|    actor_loss      | 6.37e+10  |
|    critic_loss     | 2.6e+17   |
|    ent_coef        | 3.68e+07  |
|    ent_coef_loss   | 0.484     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2535989   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.7      |
|    ep_rew_mean     | -5.48e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82332     |
|    fps             | 37        |
|    time_elapsed    | 163       |
|    total_timesteps | 2536216   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.9      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82384     |
|    fps             | 37        |
|    time_elapsed    | 208       |
|    total_timesteps | 2537879   |
| train/             |           |
|    actor_loss      | 6.34e+10  |
|    critic_loss     | 9.62e+16  |
|    ent_coef        | 3.84e+07  |
|    ent_coef_loss   | 5.44      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2537778   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.9      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82388     |
|    fps             | 37        |
|    time_elapsed    | 211       |
|    total_timesteps | 2538007   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32        |
|    ep_rew_mean     | -5.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82440     |
|    fps             | 37        |
|    time_elapsed    | 255       |
|    total_timesteps | 2539667   |
| train/             |           |
|    actor_loss      | 6.28e+10  |
|    critic_loss     | 2.51e+17  |
|    ent_coef        | 3.97e+07  |
|    ent_coef_loss   | 0.0797    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2539566   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32        |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82444     |
|    fps             | 37        |
|    time_elapsed    | 258       |
|    total_timesteps | 2539801   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32        |
|    ep_rew_mean     | -5.66e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82492     |
|    fps             | 38        |
|    time_elapsed    | 34        |
|    total_timesteps | 2541339   |
| train/             |           |
|    actor_loss      | 6.26e+10  |
|    critic_loss     | 1.89e+17  |
|    ent_coef        | 4.22e+07  |
|    ent_coef_loss   | 0.211     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2541238   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32        |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82496     |
|    fps             | 38        |
|    time_elapsed    | 38        |
|    total_timesteps | 2541469   |
| train/            

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.8     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 82548    |
|    fps             | 37       |
|    time_elapsed    | 82       |
|    total_timesteps | 2543118  |
| train/             |          |
|    actor_loss      | 6.19e+10 |
|    critic_loss     | 1.67e+17 |
|    ent_coef        | 4.42e+07 |
|    ent_coef_loss   | 0.52     |
|    learning_rate   | 0.0003   |
|    n_updates       | 2543017  |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.8      |
|    ep_rew_mean     | -5.82e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82552     |
|    fps             | 37        |
|    time_elapsed    | 86        |
|    total_timesteps | 2543247   |
| train/             |           |
|  

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.8      |
|    ep_rew_mean     | -5.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82604     |
|    fps             | 37        |
|    time_elapsed    | 130       |
|    total_timesteps | 2544911   |
| train/             |           |
|    actor_loss      | 6.22e+10  |
|    critic_loss     | 2.84e+17  |
|    ent_coef        | 4.5e+07   |
|    ent_coef_loss   | 0.977     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2544810   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.9      |
|    ep_rew_mean     | -5.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82608     |
|    fps             | 37        |
|    time_elapsed    | 134       |
|    total_timesteps | 2545041   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.1      |
|    ep_rew_mean     | -5.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82660     |
|    fps             | 37        |
|    time_elapsed    | 178       |
|    total_timesteps | 2546712   |
| train/             |           |
|    actor_loss      | 6.16e+10  |
|    critic_loss     | 2.05e+17  |
|    ent_coef        | 4.56e+07  |
|    ent_coef_loss   | -2.88     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2546611   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.1      |
|    ep_rew_mean     | -5.71e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82664     |
|    fps             | 37        |
|    time_elapsed    | 182       |
|    total_timesteps | 2546842   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.1      |
|    ep_rew_mean     | -5.81e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82716     |
|    fps             | 37        |
|    time_elapsed    | 226       |
|    total_timesteps | 2548504   |
| train/             |           |
|    actor_loss      | 6.05e+10  |
|    critic_loss     | 2.03e+17  |
|    ent_coef        | 4.67e+07  |
|    ent_coef_loss   | 0.155     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2548403   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32        |
|    ep_rew_mean     | -5.72e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82720     |
|    fps             | 37        |
|    time_elapsed    | 229       |
|    total_timesteps | 2548631   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.9      |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82768     |
|    fps             | 38        |
|    time_elapsed    | 4         |
|    total_timesteps | 2550160   |
| train/             |           |
|    actor_loss      | 6e+10     |
|    critic_loss     | 1.33e+17  |
|    ent_coef        | 4.51e+07  |
|    ent_coef_loss   | 0.433     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2550059   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 31.9      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82772     |
|    fps             | 38        |
|    time_elapsed    | 7         |
|    total_timesteps | 2550287   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.1      |
|    ep_rew_mean     | -5.44e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82824     |
|    fps             | 38        |
|    time_elapsed    | 51        |
|    total_timesteps | 2551979   |
| train/             |           |
|    actor_loss      | 5.98e+10  |
|    critic_loss     | 1.68e+17  |
|    ent_coef        | 4.58e+07  |
|    ent_coef_loss   | -6.66     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2551878   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.2      |
|    ep_rew_mean     | -5.46e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82828     |
|    fps             | 38        |
|    time_elapsed    | 54        |
|    total_timesteps | 2552106   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.5      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82880     |
|    fps             | 38        |
|    time_elapsed    | 98        |
|    total_timesteps | 2553796   |
| train/             |           |
|    actor_loss      | 5.99e+10  |
|    critic_loss     | 1.23e+17  |
|    ent_coef        | 4.72e+07  |
|    ent_coef_loss   | -3.66     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2553695   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.5      |
|    ep_rew_mean     | -5.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82884     |
|    fps             | 38        |
|    time_elapsed    | 101       |
|    total_timesteps | 2553924   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.4      |
|    ep_rew_mean     | -5.58e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82936     |
|    fps             | 38        |
|    time_elapsed    | 145       |
|    total_timesteps | 2555610   |
| train/             |           |
|    actor_loss      | 5.91e+10  |
|    critic_loss     | 1.65e+17  |
|    ent_coef        | 4.78e+07  |
|    ent_coef_loss   | -0.677    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2555509   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.4      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82940     |
|    fps             | 38        |
|    time_elapsed    | 148       |
|    total_timesteps | 2555737   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.6      |
|    ep_rew_mean     | -5.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82992     |
|    fps             | 38        |
|    time_elapsed    | 192       |
|    total_timesteps | 2557437   |
| train/             |           |
|    actor_loss      | 5.85e+10  |
|    critic_loss     | 1.63e+17  |
|    ent_coef        | 4.8e+07   |
|    ent_coef_loss   | 1.9       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2557336   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.5      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 82996     |
|    fps             | 38        |
|    time_elapsed    | 195       |
|    total_timesteps | 2557564   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.7      |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83048     |
|    fps             | 38        |
|    time_elapsed    | 239       |
|    total_timesteps | 2559261   |
| train/             |           |
|    actor_loss      | 5.84e+10  |
|    critic_loss     | 1.49e+17  |
|    ent_coef        | 4.77e+07  |
|    ent_coef_loss   | -4.34     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2559160   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.7      |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83052     |
|    fps             | 38        |
|    time_elapsed    | 243       |
|    total_timesteps | 2559392   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.6      |
|    ep_rew_mean     | -5.53e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83100     |
|    fps             | 38        |
|    time_elapsed    | 25        |
|    total_timesteps | 2560979   |
| train/             |           |
|    actor_loss      | 5.79e+10  |
|    critic_loss     | 1.43e+17  |
|    ent_coef        | 4.78e+07  |
|    ent_coef_loss   | -1.78     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2560878   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.7      |
|    ep_rew_mean     | -5.55e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83104     |
|    fps             | 38        |
|    time_elapsed    | 29        |
|    total_timesteps | 2561111   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.7      |
|    ep_rew_mean     | -5.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83156     |
|    fps             | 38        |
|    time_elapsed    | 73        |
|    total_timesteps | 2562806   |
| train/             |           |
|    actor_loss      | 5.77e+10  |
|    critic_loss     | 1.31e+17  |
|    ent_coef        | 4.91e+07  |
|    ent_coef_loss   | -6.12     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2562705   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32.7     |
|    ep_rew_mean     | -5.8e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 83160    |
|    fps             | 38       |
|    time_elapsed    | 76       |
|    total_timesteps | 2562939  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.8      |
|    ep_rew_mean     | -5.81e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83212     |
|    fps             | 38        |
|    time_elapsed    | 120       |
|    total_timesteps | 2564651   |
| train/             |           |
|    actor_loss      | 5.72e+10  |
|    critic_loss     | 1.58e+17  |
|    ent_coef        | 4.87e+07  |
|    ent_coef_loss   | -1.34     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2564550   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.7      |
|    ep_rew_mean     | -5.87e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83216     |
|    fps             | 38        |
|    time_elapsed    | 124       |
|    total_timesteps | 2564781   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.9      |
|    ep_rew_mean     | -5.84e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83268     |
|    fps             | 38        |
|    time_elapsed    | 169       |
|    total_timesteps | 2566496   |
| train/             |           |
|    actor_loss      | 5.65e+10  |
|    critic_loss     | 1.36e+17  |
|    ent_coef        | 4.82e+07  |
|    ent_coef_loss   | 3.09      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2566395   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 32.9      |
|    ep_rew_mean     | -5.77e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83272     |
|    fps             | 38        |
|    time_elapsed    | 172       |
|    total_timesteps | 2566627   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.4      |
|    ep_rew_mean     | -5.81e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83324     |
|    fps             | 38        |
|    time_elapsed    | 217       |
|    total_timesteps | 2568386   |
| train/             |           |
|    actor_loss      | 5.61e+10  |
|    critic_loss     | 1.34e+17  |
|    ent_coef        | 4.79e+07  |
|    ent_coef_loss   | -1.88     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2568285   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.4      |
|    ep_rew_mean     | -5.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83328     |
|    fps             | 38        |
|    time_elapsed    | 221       |
|    total_timesteps | 2568519   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.9      |
|    ep_rew_mean     | -5.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83376     |
|    fps             | 39        |
|    time_elapsed    | 4         |
|    total_timesteps | 2570171   |
| train/             |           |
|    actor_loss      | 5.56e+10  |
|    critic_loss     | 1.01e+17  |
|    ent_coef        | 4.59e+07  |
|    ent_coef_loss   | 1.72      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2570070   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 33.9      |
|    ep_rew_mean     | -5.76e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83380     |
|    fps             | 39        |
|    time_elapsed    | 7         |
|    total_timesteps | 2570307   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.8      |
|    ep_rew_mean     | -5.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83432     |
|    fps             | 38        |
|    time_elapsed    | 55        |
|    total_timesteps | 2572152   |
| train/             |           |
|    actor_loss      | 5.53e+10  |
|    critic_loss     | 1.42e+17  |
|    ent_coef        | 4.51e+07  |
|    ent_coef_loss   | -3.05     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2572051   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.8      |
|    ep_rew_mean     | -5.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83436     |
|    fps             | 38        |
|    time_elapsed    | 59        |
|    total_timesteps | 2572291   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.25e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83488     |
|    fps             | 38        |
|    time_elapsed    | 105       |
|    total_timesteps | 2574077   |
| train/             |           |
|    actor_loss      | 5.52e+10  |
|    critic_loss     | 1.84e+17  |
|    ent_coef        | 4.79e+07  |
|    ent_coef_loss   | 0.709     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2573976   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83492     |
|    fps             | 38        |
|    time_elapsed    | 109       |
|    total_timesteps | 2574215   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.1      |
|    ep_rew_mean     | -5.14e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83544     |
|    fps             | 38        |
|    time_elapsed    | 156       |
|    total_timesteps | 2575992   |
| train/             |           |
|    actor_loss      | 5.44e+10  |
|    critic_loss     | 1.45e+17  |
|    ent_coef        | 4.89e+07  |
|    ent_coef_loss   | 0.429     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2575891   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.2      |
|    ep_rew_mean     | -5.21e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83548     |
|    fps             | 38        |
|    time_elapsed    | 160       |
|    total_timesteps | 2576132   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.2      |
|    ep_rew_mean     | -5.59e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83600     |
|    fps             | 38        |
|    time_elapsed    | 206       |
|    total_timesteps | 2577905   |
| train/             |           |
|    actor_loss      | 5.41e+10  |
|    critic_loss     | 1.56e+17  |
|    ent_coef        | 4.91e+07  |
|    ent_coef_loss   | -0.503    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2577804   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.1      |
|    ep_rew_mean     | -5.58e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83604     |
|    fps             | 38        |
|    time_elapsed    | 210       |
|    total_timesteps | 2578041   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.4      |
|    ep_rew_mean     | -5.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83656     |
|    fps             | 38        |
|    time_elapsed    | 258       |
|    total_timesteps | 2579833   |
| train/             |           |
|    actor_loss      | 5.4e+10   |
|    critic_loss     | 1.22e+17  |
|    ent_coef        | 4.91e+07  |
|    ent_coef_loss   | 0.334     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2579732   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.4      |
|    ep_rew_mean     | -5.83e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83660     |
|    fps             | 38        |
|    time_elapsed    | 262       |
|    total_timesteps | 2579971   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.5      |
|    ep_rew_mean     | -5.71e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83708     |
|    fps             | 38        |
|    time_elapsed    | 43        |
|    total_timesteps | 2581661   |
| train/             |           |
|    actor_loss      | 5.31e+10  |
|    critic_loss     | 1.6e+17   |
|    ent_coef        | 4.84e+07  |
|    ent_coef_loss   | 0.658     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2581560   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.5     |
|    ep_rew_mean     | -5.7e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 83712    |
|    fps             | 38       |
|    time_elapsed    | 47       |
|    total_timesteps | 2581802  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.8      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83764     |
|    fps             | 37        |
|    time_elapsed    | 96        |
|    total_timesteps | 2583618   |
| train/             |           |
|    actor_loss      | 5.27e+10  |
|    critic_loss     | 1.65e+17  |
|    ent_coef        | 4.93e+07  |
|    ent_coef_loss   | -0.818    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2583517   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.65e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83768     |
|    fps             | 37        |
|    time_elapsed    | 99        |
|    total_timesteps | 2583762   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.76e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83820     |
|    fps             | 37        |
|    time_elapsed    | 147       |
|    total_timesteps | 2585565   |
| train/             |           |
|    actor_loss      | 5.26e+10  |
|    critic_loss     | 1.24e+17  |
|    ent_coef        | 4.99e+07  |
|    ent_coef_loss   | 0.661     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2585464   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83824     |
|    fps             | 37        |
|    time_elapsed    | 151       |
|    total_timesteps | 2585705   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.6      |
|    ep_rew_mean     | -5.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83876     |
|    fps             | 37        |
|    time_elapsed    | 200       |
|    total_timesteps | 2587502   |
| train/             |           |
|    actor_loss      | 5.2e+10   |
|    critic_loss     | 9.73e+16  |
|    ent_coef        | 5.02e+07  |
|    ent_coef_loss   | 2.75      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2587401   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.6      |
|    ep_rew_mean     | -5.72e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83880     |
|    fps             | 37        |
|    time_elapsed    | 204       |
|    total_timesteps | 2587640   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.6      |
|    ep_rew_mean     | -5.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83932     |
|    fps             | 37        |
|    time_elapsed    | 252       |
|    total_timesteps | 2589446   |
| train/             |           |
|    actor_loss      | 5.19e+10  |
|    critic_loss     | 1.31e+17  |
|    ent_coef        | 5.01e+07  |
|    ent_coef_loss   | -1.89     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2589345   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.7      |
|    ep_rew_mean     | -5.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83936     |
|    fps             | 37        |
|    time_elapsed    | 256       |
|    total_timesteps | 2589588   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83984     |
|    fps             | 38        |
|    time_elapsed    | 34        |
|    total_timesteps | 2591310   |
| train/             |           |
|    actor_loss      | 5.18e+10  |
|    critic_loss     | 1.51e+17  |
|    ent_coef        | 5.02e+07  |
|    ent_coef_loss   | -1.95     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2591209   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.62e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 83988     |
|    fps             | 38        |
|    time_elapsed    | 38        |
|    total_timesteps | 2591452   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -6.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84040     |
|    fps             | 37        |
|    time_elapsed    | 88        |
|    total_timesteps | 2593369   |
| train/             |           |
|    actor_loss      | 5.13e+10  |
|    critic_loss     | 1.31e+17  |
|    ent_coef        | 5.13e+07  |
|    ent_coef_loss   | 1.61      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2593268   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 36.2     |
|    ep_rew_mean     | -6.2e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 84044    |
|    fps             | 37       |
|    time_elapsed    | 92       |
|    total_timesteps | 2593520  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.5      |
|    ep_rew_mean     | -6.11e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84096     |
|    fps             | 37        |
|    time_elapsed    | 142       |
|    total_timesteps | 2595394   |
| train/             |           |
|    actor_loss      | 5.11e+10  |
|    critic_loss     | 1.34e+17  |
|    ent_coef        | 5.23e+07  |
|    ent_coef_loss   | -2.93     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2595293   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.4      |
|    ep_rew_mean     | -6.15e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84100     |
|    fps             | 37        |
|    time_elapsed    | 146       |
|    total_timesteps | 2595535   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.88e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84152     |
|    fps             | 37        |
|    time_elapsed    | 195       |
|    total_timesteps | 2597360   |
| train/             |           |
|    actor_loss      | 5.08e+10  |
|    critic_loss     | 1.17e+17  |
|    ent_coef        | 5.3e+07   |
|    ent_coef_loss   | 1.77      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2597259   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.5      |
|    ep_rew_mean     | -6.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84156     |
|    fps             | 37        |
|    time_elapsed    | 198       |
|    total_timesteps | 2597497   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -6.01e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84208     |
|    fps             | 37        |
|    time_elapsed    | 248       |
|    total_timesteps | 2599331   |
| train/             |           |
|    actor_loss      | 5.05e+10  |
|    critic_loss     | 1.12e+17  |
|    ent_coef        | 5.27e+07  |
|    ent_coef_loss   | -1.13     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2599230   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.94e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84212     |
|    fps             | 37        |
|    time_elapsed    | 252       |
|    total_timesteps | 2599471   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84260     |
|    fps             | 37        |
|    time_elapsed    | 31        |
|    total_timesteps | 2601189   |
| train/             |           |
|    actor_loss      | 5.03e+10  |
|    critic_loss     | 1.2e+17   |
|    ent_coef        | 5.28e+07  |
|    ent_coef_loss   | -0.182    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2601088   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84264     |
|    fps             | 37        |
|    time_elapsed    | 35        |
|    total_timesteps | 2601333   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -5.42e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84316     |
|    fps             | 36        |
|    time_elapsed    | 85        |
|    total_timesteps | 2603137   |
| train/             |           |
|    actor_loss      | 5.03e+10  |
|    critic_loss     | 1.13e+17  |
|    ent_coef        | 5.25e+07  |
|    ent_coef_loss   | 1.76      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2603036   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.41e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84320     |
|    fps             | 36        |
|    time_elapsed    | 89        |
|    total_timesteps | 2603277   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.8      |
|    ep_rew_mean     | -5.59e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84372     |
|    fps             | 36        |
|    time_elapsed    | 138       |
|    total_timesteps | 2605090   |
| train/             |           |
|    actor_loss      | 5e+10     |
|    critic_loss     | 1.16e+17  |
|    ent_coef        | 5.11e+07  |
|    ent_coef_loss   | -1.49     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2604989   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.54e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84376     |
|    fps             | 36        |
|    time_elapsed    | 142       |
|    total_timesteps | 2605232   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.18e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84428     |
|    fps             | 36        |
|    time_elapsed    | 192       |
|    total_timesteps | 2607066   |
| train/             |           |
|    actor_loss      | 4.99e+10  |
|    critic_loss     | 1.11e+17  |
|    ent_coef        | 5.05e+07  |
|    ent_coef_loss   | 2.09      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2606965   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.06e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84432     |
|    fps             | 36        |
|    time_elapsed    | 196       |
|    total_timesteps | 2607210   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.13e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84484     |
|    fps             | 36        |
|    time_elapsed    | 245       |
|    total_timesteps | 2609041   |
| train/             |           |
|    actor_loss      | 4.94e+10  |
|    critic_loss     | 1.15e+17  |
|    ent_coef        | 5.04e+07  |
|    ent_coef_loss   | -0.43     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2608940   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.3      |
|    ep_rew_mean     | -5.17e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84488     |
|    fps             | 36        |
|    time_elapsed    | 249       |
|    total_timesteps | 2609183   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -5.52e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84536     |
|    fps             | 36        |
|    time_elapsed    | 24        |
|    total_timesteps | 2610890   |
| train/             |           |
|    actor_loss      | 4.91e+10  |
|    critic_loss     | 1.02e+17  |
|    ent_coef        | 4.92e+07  |
|    ent_coef_loss   | 3.5       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2610789   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.5      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84540     |
|    fps             | 36        |
|    time_elapsed    | 28        |
|    total_timesteps | 2611038   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84592     |
|    fps             | 36        |
|    time_elapsed    | 80        |
|    total_timesteps | 2612946   |
| train/             |           |
|    actor_loss      | 4.92e+10  |
|    critic_loss     | 1.05e+17  |
|    ent_coef        | 5e+07     |
|    ent_coef_loss   | -4.4      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2612845   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -5.75e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84596     |
|    fps             | 36        |
|    time_elapsed    | 83        |
|    total_timesteps | 2613084   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.2      |
|    ep_rew_mean     | -5.71e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84648     |
|    fps             | 36        |
|    time_elapsed    | 134       |
|    total_timesteps | 2614946   |
| train/             |           |
|    actor_loss      | 4.87e+10  |
|    critic_loss     | 1.19e+17  |
|    ent_coef        | 5.06e+07  |
|    ent_coef_loss   | 1.94      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2614845   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 36.1      |
|    ep_rew_mean     | -5.76e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84652     |
|    fps             | 36        |
|    time_elapsed    | 138       |
|    total_timesteps | 2615090   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.65e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84704     |
|    fps             | 36        |
|    time_elapsed    | 187       |
|    total_timesteps | 2616936   |
| train/             |           |
|    actor_loss      | 4.85e+10  |
|    critic_loss     | 1.33e+17  |
|    ent_coef        | 5.08e+07  |
|    ent_coef_loss   | -0.0415   |
|    learning_rate   | 0.0003    |
|    n_updates       | 2616835   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.6      |
|    ep_rew_mean     | -5.68e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84708     |
|    fps             | 36        |
|    time_elapsed    | 191       |
|    total_timesteps | 2617080   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -5.52e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84760     |
|    fps             | 37        |
|    time_elapsed    | 240       |
|    total_timesteps | 2618912   |
| train/             |           |
|    actor_loss      | 4.85e+10  |
|    critic_loss     | 1.02e+17  |
|    ent_coef        | 5.08e+07  |
|    ent_coef_loss   | -2.37     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2618811   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -5.52e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84764     |
|    fps             | 37        |
|    time_elapsed    | 244       |
|    total_timesteps | 2619054   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -5.43e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84812     |
|    fps             | 37        |
|    time_elapsed    | 19        |
|    total_timesteps | 2620728   |
| train/             |           |
|    actor_loss      | 4.83e+10  |
|    critic_loss     | 7.87e+16  |
|    ent_coef        | 5.01e+07  |
|    ent_coef_loss   | -2.79     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2620627   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -5.46e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84816     |
|    fps             | 37        |
|    time_elapsed    | 22        |
|    total_timesteps | 2620866   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.9      |
|    ep_rew_mean     | -5.51e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84868     |
|    fps             | 37        |
|    time_elapsed    | 71        |
|    total_timesteps | 2622688   |
| train/             |           |
|    actor_loss      | 4.81e+10  |
|    critic_loss     | 7.61e+16  |
|    ent_coef        | 4.9e+07   |
|    ent_coef_loss   | -1.8      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2622587   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35        |
|    ep_rew_mean     | -5.47e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84872     |
|    fps             | 37        |
|    time_elapsed    | 75        |
|    total_timesteps | 2622830   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.33e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84924     |
|    fps             | 37        |
|    time_elapsed    | 123       |
|    total_timesteps | 2624668   |
| train/             |           |
|    actor_loss      | 4.77e+10  |
|    critic_loss     | 7.82e+16  |
|    ent_coef        | 4.9e+07   |
|    ent_coef_loss   | 0.36      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2624567   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.3      |
|    ep_rew_mean     | -5.39e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84928     |
|    fps             | 37        |
|    time_elapsed    | 127       |
|    total_timesteps | 2624812   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.58e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84980     |
|    fps             | 37        |
|    time_elapsed    | 175       |
|    total_timesteps | 2626628   |
| train/             |           |
|    actor_loss      | 4.74e+10  |
|    critic_loss     | 1.29e+17  |
|    ent_coef        | 4.88e+07  |
|    ent_coef_loss   | 0.391     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2626527   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 84984     |
|    fps             | 37        |
|    time_elapsed    | 179       |
|    total_timesteps | 2626768   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.1      |
|    ep_rew_mean     | -5.57e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85036     |
|    fps             | 37        |
|    time_elapsed    | 229       |
|    total_timesteps | 2628604   |
| train/             |           |
|    actor_loss      | 4.75e+10  |
|    critic_loss     | 8.17e+16  |
|    ent_coef        | 4.81e+07  |
|    ent_coef_loss   | -2.16     |
|    learning_rate   | 0.0003    |
|    n_updates       | 2628503   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.56e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85040     |
|    fps             | 37        |
|    time_elapsed    | 232       |
|    total_timesteps | 2628744   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.63e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85088     |
|    fps             | 38        |
|    time_elapsed    | 11        |
|    total_timesteps | 2630451   |
| train/             |           |
|    actor_loss      | 4.73e+10  |
|    critic_loss     | 8.33e+16  |
|    ent_coef        | 4.79e+07  |
|    ent_coef_loss   | 3.36      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2630350   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.69e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85092     |
|    fps             | 38        |
|    time_elapsed    | 15        |
|    total_timesteps | 2630592   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.4      |
|    ep_rew_mean     | -5.82e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85144     |
|    fps             | 38        |
|    time_elapsed    | 63        |
|    total_timesteps | 2632451   |
| train/             |           |
|    actor_loss      | 4.7e+10   |
|    critic_loss     | 7.79e+16  |
|    ent_coef        | 4.65e+07  |
|    ent_coef_loss   | 4.51      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2632350   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.5      |
|    ep_rew_mean     | -5.79e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85148     |
|    fps             | 38        |
|    time_elapsed    | 67        |
|    total_timesteps | 2632595   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.8      |
|    ep_rew_mean     | -5.64e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85200     |
|    fps             | 38        |
|    time_elapsed    | 115       |
|    total_timesteps | 2634453   |
| train/             |           |
|    actor_loss      | 4.69e+10  |
|    critic_loss     | 1.08e+17  |
|    ent_coef        | 4.71e+07  |
|    ent_coef_loss   | 1.3       |
|    learning_rate   | 0.0003    |
|    n_updates       | 2634352   |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 35.8     |
|    ep_rew_mean     | -5.6e+08 |
|    phm_score       | 0        |
| time/              |          |
|    episodes        | 85204    |
|    fps             | 38       |
|    time_elapsed    | 119      |
|    total_timesteps | 2634591  |
| train/             |        

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.2      |
|    ep_rew_mean     | -5.61e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85256     |
|    fps             | 38        |
|    time_elapsed    | 166       |
|    total_timesteps | 2636406   |
| train/             |           |
|    actor_loss      | 4.67e+10  |
|    critic_loss     | 8.52e+16  |
|    ent_coef        | 4.74e+07  |
|    ent_coef_loss   | -0.349    |
|    learning_rate   | 0.0003    |
|    n_updates       | 2636305   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 35.3      |
|    ep_rew_mean     | -5.74e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85260     |
|    fps             | 38        |
|    time_elapsed    | 170       |
|    total_timesteps | 2636550   |
| train/            

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.8      |
|    ep_rew_mean     | -5.73e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85312     |
|    fps             | 38        |
|    time_elapsed    | 218       |
|    total_timesteps | 2638354   |
| train/             |           |
|    actor_loss      | 4.66e+10  |
|    critic_loss     | 8.2e+16   |
|    ent_coef        | 4.71e+07  |
|    ent_coef_loss   | 0.83      |
|    learning_rate   | 0.0003    |
|    n_updates       | 2638253   |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 34.8      |
|    ep_rew_mean     | -5.78e+08 |
|    phm_score       | 0         |
| time/              |           |
|    episodes        | 85316     |
|    fps             | 38        |
|    time_elapsed    | 223       |
|    total_timesteps | 2638496   |
| train/            

### SAC Corrective no Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_no_delay_env, "SAC", "SAC_model_corr_no_delay", "models/SAC_corr_no_delay", 1, 3600, True, sac_model_corr_no_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_no_delay_env, "SAC", "SAC_model_corr_no_delay", "models/SAC_corr_no_delay", 10, 3600, False,)    

### SAC Corrective with Delay

In [None]:
# FIRST RUN 
#run_model(c1_corrective_pred_with_delay_env, "SAC", "SAC_model_corr_with_delay", "models/SAC_corr_with_delay", 1, 3600, True, sac_model_corr_with_delay) #1hour   
#-------------
# MAIN RUN 
#run_model(c1_corrective_pred_with_delay_env, "SAC", "SAC_model_corr_with_delay", "models/SAC_corr_with_delay", 10, 3600, False,)    

# Evaluate policy 

Evaluate_policy, returns the mean reward per episode and the standard deviation of the reward per episode

-> The "mean reward per episode" represents the average reward obtained per episode over the evaluation period.
-> The "standard deviation of the reward per episode" indicates how much the rewards vary from the mean reward per episode.

Default Evalueate Policy

In [None]:
#evaluate_policy(model_xyz , non_corrective_pred_env_c4 , n_eval_episodes=10, render=False)

**Πρεπει να φτιάξω μια συνάρτηση για τα non corrective environment**

To πρόβλημα ποιο είναι ; 

Το πρόβλημα είναι ότι στα non corr env μπορεί να πέσω πάνω σε μοντέλο που δεν τερματίζει οπότε αξίζει να ψάξω 
ποιο τερματίζει και να χρησιμοποιήσω εκείνο. 

Σκέψου αναδρομικά πήγαινε προς τα πίσω μέχρι να βρεις κάποιο το οποίο τερματίζει. (Εάν δεν βρείς σταμάτα)

In [None]:
# Function to find the highest 'i' value from existing model files
def find_highest_i(models_dir):
    highest_i = 0
    for filename in os.listdir(models_dir):
        if filename.endswith(".zip"):  # Ensure we're only looking at model files
            try:
                # Extract 'i' from the filename based on the TIMESTEPS*i pattern
                i = int(filename.replace('.zip', '').split('/')[-1]) // TIMESTEPS
                if i > highest_i:
                    highest_i = i
            except ValueError:
                # In case the filename doesn't follow the expected pattern
                continue
    return highest_i

#=====================================================

def load_model(model_filename, algo, env):
    if Path(model_filename).exists():
        algo_class = getattr(stable_baselines3, algo)
        loaded_model = algo_class.load(model_filename, env)
        print(f"Successfully loaded model from {model_filename}")
        return loaded_model 
    else:
        print(f"Model file '{model_filename}' does not exist.")

In [None]:
def make_filename_i(models_dir, i): 
    if not os.path.exists(models_dir): 
        os.makedirs(models_dir)
        
    #highest_i = find_highest_i(models_dir)
    return f"{models_dir}/{TIMESTEPS*i}.zip"

In [None]:
import os
import sys
from contextlib import redirect_stdout
def find_314_non_corr(algo,  env, model_dir,  n_eval_episodes=1, render=False):
    
    reward_314_log = []
    phm_score_log = []  # keep all the phm_scores
    reward_timestep_log = []
    
    found = False
    reached_314 = False
    i = find_highest_i(model_dir)  #the latest trained model
    while not found :
            
        total_reward = 0.0
        #Load model at timestep i
        model_filename = make_filename_i(model_dir , i)
        #model = load_model(model_filename, algo , env)
        # Για να μην κάνει τόσα print
        with open(os.devnull, 'w') as fnull, redirect_stdout(fnull):
            model = load_model(model_filename, algo, env)
        
        for _ in range(n_eval_episodes):
            obs = env.reset()
            done = False
            episode_reward = 0.0
            phm_score = 0.0

            while not done:

                if render:
                    env.render()
                if isinstance(obs, tuple):  # Check if the observation is a tuple
                    obs = obs[0]  # If it is, extract the first element
                action, _ = model.predict(obs, deterministic=True)

                obs, reward, terminated, trunctated, info = env.step(action)
                episode_reward += reward
                #print(info)

                # Retrieve the cut information from the info dictionary
                current_step = info.get('cut', 0)
                phm_score = info.get('score', 0)
                #score_314 = info.get('score_314',0) # 0 is the default value assigned if the score_314 doesnt exist in the info dict
                
                #print(current_step, "-> " , score_314)
                #print(phm_score)
                #print("Current step is ", current_step, " AND the phm score is : " ,phm_score)

                if current_step > 313 :
                    
                    #print("Phm score is : ", phm_score)
                    #print("Reward is :", episode_reward)
                    
                    phm_score_log.append([phm_score, i])
                    reward_314_log.append([episode_reward, i])
                    reached_314 = True
                    done = True

                if terminated or trunctated : 
                    #print(current_step)
                    #print("Terminated is : ", terminated)
                    #print("Trunctuated is : ", trunctated)
                    done = True

            total_reward += episode_reward
            reward = total_reward / n_eval_episodes
            reward_timestep_log.append([reward, i])
        
        i = i-1
        if(i<=1): 
            if(reached_314 != True) : 
                print("-----------------------------------")
                print("No model has reached the cut '314' ")
                print("-----------------------------------")
            found = True 

    return reward_timestep_log, phm_score_log, reward_314_log

In [None]:
import matplotlib.pyplot as plt

arr = [1,2,3]

plt.plot(arr)

In [None]:
def print_results(reward_timestep_log, phm_score_log, reward_314_log) : 
    
    print()
    max_reward, timestep_i = max(reward_timestep_log, key=lambda entry: entry[0])
    print("Max Reward is : ", max_reward, " and it is found at timestep : ", timestep_i)

    if phm_score_log: 
        min_phm, timestep_i = min(phm_score_log, key=lambda entry: entry[0])
        print("Min Phm Score is : ", min_phm, " and it is found at timestep : ", timestep_i)
    
    if reward_314_log:
        max_reward_314, timestep_i =  max(reward_314_log, key=lambda entry: entry[0])
        print("Max Reward-314 is : ", max_reward_314, " and it is found at timestep : ", timestep_i)


In [None]:
max_reward, timestep_i = max(reward_timestep_log, key=lambda entry: entry[0])
print("Max Reward is : ", max_reward, " and it is found at timestep : ", timestep_i, " *10,000")

In [None]:
print("OG function ", evaluate_policy(ppo_model_non_corr_no_delay, c6_non_corrective_pred_no_delay_env, n_eval_episodes=10, render=False) )
evaluate_and_print_policy(ppo_model_non_corr_no_delay, c6_non_corrective_pred_no_delay_env, "PPO Non-Corr No Delay")

In [None]:
df_c1 = pd.read_csv("./normalised_time_domain_features_with_cut_and_flute_max.csv")
df_c4 = pd.read_csv("./normalised_time_domain_features_c4_with_cut_and_flute_max.csv")
df_c6 = pd.read_csv("./normalised_time_domain_features_c6_with_cut_and_flute_max.csv")

In [None]:
## -- CUTTER 1 -- ##
c1_non_corrective_pred_no_delay_env = Monitor(CNC_Env_Non_Corrective_No_Delay(df_c1), logdir)
c1_non_corrective_pred_with_delay_env = Monitor(CNC_Env_Non_Corrective_with_Delay(df_c1), logdir)
#------------------------
c1_corrective_pred_no_delay_env = Monitor(CNC_Env_Corrective_Pred_No_Delay(df_c1), logdir)
c1_corrective_pred_with_delay_env = Monitor(CNC_Env_Corrective_Pred_with_Delay(df_c1), logdir)

# -- CUTTER 2 -- ## 
c4_non_corrective_pred_no_delay_env = Monitor(CNC_Env_Non_Corrective_No_Delay(df_c4), logdir)
c4_non_corrective_pred_with_delay_env = Monitor(CNC_Env_Non_Corrective_with_Delay(df_c4), logdir)
#------------------------
c4_corrective_pred_no_delay_env = Monitor(CNC_Env_Corrective_Pred_No_Delay(df_c4), logdir)
c4_corrective_pred_with_delay_env = Monitor(CNC_Env_Corrective_Pred_with_Delay(df_c4), logdir)

# -- CUTTER 3 -- ## 
c6_non_corrective_pred_no_delay_env = Monitor(CNC_Env_Non_Corrective_No_Delay(df_c6), logdir)
c6_non_corrective_pred_with_delay_env = Monitor(CNC_Env_Non_Corrective_with_Delay(df_c6), logdir)
#------------------------
c6_corrective_pred_no_delay_env = Monitor(CNC_Env_Corrective_Pred_No_Delay(df_c6), logdir)
c6_corrective_pred_with_delay_env = Monitor(CNC_Env_Corrective_Pred_with_Delay(df_c6), logdir)

In [None]:
# Loading 

#PPO
model_filename = make_filename("models\PPO_non_corr_no_delay")
ppo_model_non_corr_no_delay = load_model(model_filename, "PPO", c1_non_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\PPO_non_corr_with_delay")
ppo_model_non_corr_with_delay = load_model(model_filename, "PPO", c1_non_corrective_pred_with_delay_env)
#-
model_filename = make_filename("models\PPO_corr_no_delay")
ppo_model_corr_no_delay = load_model(model_filename, "PPO", c1_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\PPO_corr_with_delay")
ppo_model_corr_with_delay = load_model(model_filename, "PPO", c1_corrective_pred_with_delay_env)
#------------------
#A2C
model_filename = make_filename("models\A2C_non_corr_no_delay")
a2c_model_non_corr_no_delay = load_model(model_filename, "A2C", c1_non_corrective_pred_no_delay_env) 
#-
model_filename = make_filename("models\A2C_non_corr_with_delay")
a2c_model_non_corr_with_delay = load_model(model_filename, "A2C", c1_non_corrective_pred_with_delay_env)
#-
model_filename = make_filename("models\A2C_corr_no_delay")
a2c_model_corr_no_delay = load_model(model_filename, "A2C", c1_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\A2C_corr_with_delay")
a2c_model_corr_with_delay = load_model(model_filename, "A2C", c1_corrective_pred_with_delay_env)
#------------------
#DDPG
model_filename = make_filename("models\DDPG_non_corr_no_delay")
ddpg_model_non_corr_no_delay = load_model(model_filename, "DDPG", c1_non_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\DDPG_non_corr_with_delay")
ddpg_model_non_corr_with_delay = load_model(model_filename, "DDPG", c1_non_corrective_pred_with_delay_env)
#-
model_filename = make_filename("models\DDPG_corr_no_delay")
ddpg_model_corr_no_delay = load_model(model_filename, "DDPG", c1_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\DDPG_corr_with_delay")
ddpg_model_corr_with_delay = load_model(model_filename, "DDPG", c1_corrective_pred_with_delay_env)
#------------------
#SAC
model_filename = make_filename("models\SAC_non_corr_no_delay")
sac_model_non_corr_no_delay = load_model(model_filename, "SAC", c1_non_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\SAC_non_corr_with_delay")
sac_model_non_corr_with_delay = load_model(model_filename, "SAC", c1_non_corrective_pred_with_delay_env)
#-
model_filename = make_filename("models\SAC_corr_no_delay")
sac_model_corr_no_delay = load_model(model_filename, "SAC", c1_corrective_pred_no_delay_env)
#-
model_filename = make_filename("models\SAC_corr_with_delay")
sac_model_corr_with_delay = load_model(model_filename, "SAC", c1_corrective_pred_with_delay_env)

In [None]:
def evaluate_policy_with_score(model, env, n_eval_episodes=10, render=False):
    total_reward = 0.0
    phm_score_log = []  # keep all the phm_scores
    for _ in range(n_eval_episodes):
        obs = env.reset()
        done = False
        episode_reward = 0.0
        phm_score = 0.0
        
        while not done:
            
            if render:
                env.render()
            if isinstance(obs, tuple):  # Check if the observation is a tuple
                obs = obs[0]  # If it is, extract the first element
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, trunctated, info = env.step(action)
            #print(info)
            episode_reward += reward
            # Retrieve the cut information from the info dictionary
            current_step = info.get('cut', 0)
            phm_score = info.get('score', 0)

            #print(episode_reward)
            print(phm_score) 
            # print(terminated, current_step, trunctated)
            print(current_step)
            if current_step > (MAX_STEPS - 10):
                # Assuming that the PHM score is the reward in this case
                print("hello")
                phm_score = info.get('score', 0)
                print(phm_score)
                phm_score_log.append(phm_score)
                done = True
                
            if terminated or trunctated : 
                done = True

        
        total_reward += episode_reward

    avg_reward = total_reward / n_eval_episodes
    return avg_reward, phm_score_log

In [None]:
def evaluate_and_print_policy(model, env, model_name):
    avg_reward, avg_phm_score = evaluate_policy_with_score(model, env, n_eval_episodes=1, render=False)
    print(f"{model_name} Evaluation:")
    print("Average Reward:", avg_reward)
    print("PHM Score:", avg_phm_score)
    return avg_reward, avg_phm_score

# Results

r = reward_timestep_log, <br>
p = phm_score_log, <br>
r_314 = reward_314_log <br>

# **PPO**

### PPO Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c4_corrective_pred_no_delay_env, model_dir="models\PPO_corr_no_delay" , )
print('\033[1m  PPO Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c6_corrective_pred_no_delay_env, model_dir="models\PPO_corr_no_delay" , )
print('\033[1m  PPO Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c4_corrective_pred_with_delay_env, model_dir="models\PPO_corr_with_delay" , )
print('\033[1m  PPO Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c6_corrective_pred_with_delay_env, model_dir="models\PPO_corr_with_delay" , )
print('\033[1m  PPO Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

### PPO Non Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c4_non_corrective_pred_no_delay_env, model_dir="models\PPO_non_corr_no_delay" ,)
print('\033[1m  PPO Non Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c6_non_corrective_pred_no_delay_env, model_dir="models\PPO_non_corr_no_delay" , )
print('\033[1m  PPO Non Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c4_non_corrective_pred_with_delay_env, model_dir="models\PPO_non_corr_with_delay" ,)
print('\033[1m  PPO Non Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314) 

In [None]:
r,p,r_314 = find_314_non_corr(algo="PPO", env=c6_non_corrective_pred_with_delay_env, model_dir="models\PPO_non_corr_with_delay" ,)
print('\033[1m  PPO Non Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314) 

# **A2C**

### A2C Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c4_corrective_pred_no_delay_env, model_dir="models\A2C_corr_no_delay" , )
print('\033[1m  A2C Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c6_corrective_pred_no_delay_env, model_dir="models\A2C_corr_no_delay" , )
print('\033[1m  A2C Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c4_corrective_pred_with_delay_env, model_dir="models\A2C_corr_with_delay" , )
print('\033[1m  A2C Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c6_corrective_pred_with_delay_env, model_dir="models\A2C_corr_with_delay" , )
print('\033[1m  A2C Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

### A2C Non Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c4_non_corrective_pred_no_delay_env, model_dir="models\A2C_non_corr_no_delay" , )
print('\033[1m  A2C Non Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c6_non_corrective_pred_no_delay_env, model_dir="models\A2C_non_corr_no_delay" , )
print('\033[1m  A2C Non Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c4_non_corrective_pred_with_delay_env, model_dir="models\A2C_non_corr_with_delay" , )
print('\033[1m  A2C Non Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="A2C", env=c6_non_corrective_pred_with_delay_env, model_dir="models\A2C_non_corr_with_delay" , )
print('\033[1m  A2C Non Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

# **DDPG**

### DDPG Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c4_corrective_pred_no_delay_env, model_dir="models\DDPG_corr_no_delay" , )
print('\033[1m  DDPG Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c6_corrective_pred_no_delay_env, model_dir="models\DDPG_corr_no_delay" , )
print('\033[1m  DDPG Corrective Env | No Delay | Cutter 6A\033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c4_corrective_pred_with_delay_env, model_dir="models\DDPG_corr_with_delay" , )
print('\033[1m  DDPG Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c6_corrective_pred_with_delay_env, model_dir="models\DDPG_corr_with_delay" , )
print('\033[1m  DDPG Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

### DDPG Non Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c4_non_corrective_pred_no_delay_env, model_dir="models\DDPG_non_corr_no_delay" , )
print('\033[1m  DDPG Non Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c6_non_corrective_pred_no_delay_env, model_dir="models\DDPG_non_corr_no_delay" , )
print('\033[1m  DDPG Non Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c4_non_corrective_pred_with_delay_env, model_dir="models\DDPG_non_corr_with_delay" , )
print('\033[1m  DDPG Non Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314) 

In [None]:
r,p,r_314 = find_314_non_corr(algo="DDPG", env=c6_non_corrective_pred_with_delay_env, model_dir="models\DDPG_non_corr_with_delay" , )
print('\033[1m  DDPG Non Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314) 

# **SAC**

### SAC Corr

In [None]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c4_corrective_pred_no_delay_env, model_dir="models\SAC_corr_no_delay" , )
print('\033[1m  SAC Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c6_corrective_pred_no_delay_env, model_dir="models\SAC_corr_no_delay" , )
print('\033[1m  SAC Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c4_corrective_pred_with_delay_env, model_dir="models\SAC_corr_with_delay" , )
print('\033[1m  SAC Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

In [None]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c6_corrective_pred_with_delay_env, model_dir="models\SAC_corr_with_delay" , )
print('\033[1m  SAC Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

### SAC Non Corr

In [75]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c4_non_corrective_pred_no_delay_env, model_dir="models\SAC_non_corr_no_delay" , )
print('\033[1m  SAC Non Corrective Env | No Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

{'cut': 314, 'score': 3112569700.844474}
Reached 314 None
{'cut': 314, 'score': 137990.21655224383}
Reached 314 None
{'cut': 314, 'score': 775.1419368898694}
Reached 314 None
{'cut': 314, 'score': 752.0644359551527}
Reached 314 None
{'cut': 314, 'score': 1413.1886518223914}
Reached 314 None
{'cut': 314, 'score': 4077.608271693491}
Reached 314 None
{'cut': 314, 'score': 5563.735280104939}
Reached 314 None
{'cut': 314, 'score': 2650.167854361031}
Reached 314 None
{'cut': 314, 'score': 18460.197008886204}
Reached 314 None
{'cut': 314, 'score': 56417.46643741999}
Reached 314 None
{'cut': 314, 'score': 930617.4393712846}
Reached 314 None
{'cut': 314, 'score': 10779.825262203014}
Reached 314 None
{'cut': 314, 'score': 8146.882517553841}
Reached 314 None
{'cut': 314, 'score': 37683.41788745915}
Reached 314 None
{'cut': 314, 'score': 10887.223676039039}
Reached 314 None
{'cut': 314, 'score': 18547.217584952097}
Reached 314 None
{'cut': 314, 'score': 157325.7799572666}
Reached 314 None
{'cut': 

In [76]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c6_non_corrective_pred_no_delay_env, model_dir="models\SAC_non_corr_no_delay" , )
print('\033[1m  SAC Non Corrective Env | No Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

{'cut': 314, 'score': 32045136.559304975}
Reached 314 None
{'cut': 314, 'score': 1352367.9273216543}
Reached 314 None
{'cut': 314, 'score': 20461500.205586582}
Reached 314 None
{'cut': 314, 'score': 17786245.351090614}
Reached 314 None
{'cut': 314, 'score': 5567386285.209656}
Reached 314 None
{'cut': 314, 'score': 3978999.3507647435}
Reached 314 None
{'cut': 314, 'score': 614069410.4345366}
Reached 314 None
{'cut': 314, 'score': 45746.854076351774}
Reached 314 None
{'cut': 314, 'score': 72722.03189794697}
Reached 314 None
{'cut': 314, 'score': 13494197.928148177}
Reached 314 None
{'cut': 314, 'score': 3140.837366733796}
Reached 314 None
{'cut': 314, 'score': 6254.185614665357}
Reached 314 None
{'cut': 314, 'score': 55968.63652584469}
Reached 314 None
{'cut': 314, 'score': 4083.9950979990153}
Reached 314 None
{'cut': 314, 'score': 9725.406257603883}
Reached 314 None
{'cut': 314, 'score': 1249059.5136913448}
Reached 314 None
{'cut': 314, 'score': 13203.818128797368}
Reached 314 None
{'cu

In [77]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c4_non_corrective_pred_with_delay_env, model_dir="models\SAC_non_corr_with_delay" , )
print('\033[1m  SAC Non Corrective Env | With Delay | Cutter 4   \033[0m')
print_results(r, p, r_314)

[1m  SAC Non Corrective Env | With Delay | Cutter 4   [0m

Max Reward is :  -23455.729347658093  and it is found at timestep :  20
Min Phm Score is :  11418.283896309002  and it is found at timestep :  20
Max Reward-314 is :  -23455.729347658093  and it is found at timestep :  20


In [78]:
r,p,r_314 = find_314_non_corr(algo="SAC", env=c6_non_corrective_pred_with_delay_env, model_dir="models\SAC_non_corr_with_delay" , )
print('\033[1m  SAC Non Corrective Env | With Delay | Cutter 6   \033[0m')
print_results(r, p, r_314)

[1m  SAC Non Corrective Env | With Delay | Cutter 6   [0m

Max Reward is :  -15048.317533337558  and it is found at timestep :  47
Min Phm Score is :  7712.814000433971  and it is found at timestep :  19
Max Reward-314 is :  -15048.317533337558  and it is found at timestep :  47
