## Importing Libraries

In [1]:
import numpy as np
import random

from EMS_Gym_Env import EMSGymEnv
from device_classes import Intermittent, Uninterruptible

import ray
from ray.tune.registry import register_env

from ray import tune

from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
from ray.rllib.algorithms.callbacks import DefaultCallbacks
from ray.air import RunConfig, CheckpointConfig

from typing import Dict, Optional, Union

from ray.rllib.env.base_env import BaseEnv
from ray.rllib.evaluation.episode import Episode
from ray.rllib.evaluation.episode_v2 import EpisodeV2
from ray.rllib.policy import Policy
from ray.rllib.utils.typing import PolicyID
from ray.rllib.evaluation import RolloutWorker
from ray.rllib.utils.deprecation import deprecation_warning

import wandb

## Setting up Custom Environment based on information found at : https://www.daftlogic.com/information-appliance-power-consumption.htm

### User that accepts most changes - Receptive

In [2]:
# Assigning data file
#data_file = "prices_for_one_day_inference.csv"
data_file = "nyiso_hourly_prices.csv"

# Creating the intermittent devices
intermittent_user_probabilities = np.array([0.90, 0.90, 0.90, 0.90])

intermittent_device_penalty = 100

intermittent_device_1 = Intermittent(name = "Small AC", device_power_consumption = 1, 
                                        user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_2 = Intermittent(name = "Big AC", device_power_consumption = 2.5, 
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_3 = Intermittent(name = "Ceiling Fan 1", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_4 = Intermittent(name = "Ceiling Fan 2", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermitent_device_5 = Intermittent(name = "Boiler", device_power_consumption = 3,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_6 = Intermittent(name = "Dehumidifier", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

array_of_intermittent_devices = [intermittent_device_1, intermittent_device_2, intermittent_device_3, intermittent_device_4, intermitent_device_5, intermittent_device_6]

# Creating the uninterruptible devices

uninterruptible_user_probabilities = np.array([0.90, 0.90, 0.90, 0.90])

uninterruptible_device_standard_penalty = 100

uninterruptible_device_override_penalty = 1000

uninterruptible_device_1 = Uninterruptible(name = "Dishwasher", device_power_consumption = 1.3,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 2.5, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_2 = Uninterruptible(name = "Washing Machine", device_power_consumption = 0.5,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 1, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_3 = Uninterruptible(name = "Clothes Dryer", device_power_consumption = 2.4,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 0.5, device_override_penalty = uninterruptible_device_override_penalty)

array_of_uninterruptible_devices = [uninterruptible_device_1, uninterruptible_device_2, uninterruptible_device_3]

# Creating the environment
receptive_train_env = EMSGymEnv(data_file = data_file, intermittent_devices = array_of_intermittent_devices,
                uninterruptible_devices = array_of_uninterruptible_devices, episode_horizon = 7, time_step_duration = 0.5)

Environment successfully initialized


### User that rejects most changes - Resistant

In [3]:
# Assigning data file
#data_file = "prices_for_one_day_inference.csv"
data_file = "nyiso_hourly_prices.csv"

# Creating the intermittent devices
intermittent_user_probabilities = np.array([0.10, 0.10, 0.10, 0.10])

intermittent_device_penalty = 100

intermittent_device_1 = Intermittent(name = "Small AC", device_power_consumption = 1, 
                                        user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_2 = Intermittent(name = "Big AC", device_power_consumption = 2.5, 
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_3 = Intermittent(name = "Ceiling Fan 1", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_4 = Intermittent(name = "Ceiling Fan 2", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermitent_device_5 = Intermittent(name = "Boiler", device_power_consumption = 3,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_6 = Intermittent(name = "Dehumidifier", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

array_of_intermittent_devices = [intermittent_device_1, intermittent_device_2, intermittent_device_3, intermittent_device_4, intermitent_device_5, intermittent_device_6]

# Creating the uninterruptible devices

uninterruptible_user_probabilities = np.array([0.10, 0.10, 0.10, 0.10])

uninterruptible_device_standard_penalty = 100

uninterruptible_device_override_penalty = 1000

uninterruptible_device_1 = Uninterruptible(name = "Dishwasher", device_power_consumption = 1.3,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 2.5, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_2 = Uninterruptible(name = "Washing Machine", device_power_consumption = 0.5,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 1, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_3 = Uninterruptible(name = "Clothes Dryer", device_power_consumption = 2.4,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 0.5, device_override_penalty = uninterruptible_device_override_penalty)

array_of_uninterruptible_devices = [uninterruptible_device_1, uninterruptible_device_2, uninterruptible_device_3]

# Creating the environment
resistant_train_env = EMSGymEnv(data_file = data_file, intermittent_devices = array_of_intermittent_devices,
                uninterruptible_devices = array_of_uninterruptible_devices, episode_horizon = 7, time_step_duration = 0.5)

Environment successfully initialized


### Neutral User

In [12]:
# Assigning data file
#data_file = "prices_for_one_day_inference.csv"
data_file = "nyiso_hourly_prices.csv"

# Set the seed
seed_value = 42
np.random.seed(seed_value)

# Generate a (6,4) array with random numbers between 0.4 and 0.6 for the intermittent user
user_prob = np.random.uniform(0.4, 0.6, size=(1, 4))

# Creating the intermittent devices
intermittent_user_probabilities = user_prob

intermittent_device_penalty = 100

intermittent_device_1 = Intermittent(name = "Small AC", device_power_consumption = 1, 
                                        user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_2 = Intermittent(name = "Big AC", device_power_consumption = 2.5, 
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_3 = Intermittent(name = "Ceiling Fan 1", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_4 = Intermittent(name = "Ceiling Fan 2", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermitent_device_5 = Intermittent(name = "Boiler", device_power_consumption = 3,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_6 = Intermittent(name = "Dehumidifier", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

array_of_intermittent_devices = [intermittent_device_1, intermittent_device_2, intermittent_device_3, intermittent_device_4, intermitent_device_5, intermittent_device_6]

# Creating the uninterruptible devices

uninterruptible_user_probabilities = user_prob

uninterruptible_device_standard_penalty = 100

uninterruptible_device_override_penalty = 1000

uninterruptible_device_1 = Uninterruptible(name = "Dishwasher", device_power_consumption = 1.3,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 2.5, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_2 = Uninterruptible(name = "Washing Machine", device_power_consumption = 0.5,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 1, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_3 = Uninterruptible(name = "Clothes Dryer", device_power_consumption = 2.4,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 0.5, device_override_penalty = uninterruptible_device_override_penalty)

array_of_uninterruptible_devices = [uninterruptible_device_1, uninterruptible_device_2, uninterruptible_device_3]

# Creating the environment
neutral_train_env = EMSGymEnv(data_file = data_file, intermittent_devices = array_of_intermittent_devices,
                uninterruptible_devices = array_of_uninterruptible_devices, episode_horizon = 7, time_step_duration = 0.5)

[[0.47490802 0.59014286 0.54639879 0.5197317 ]
 [0.43120373 0.4311989  0.41161672 0.57323523]
 [0.520223   0.54161452 0.4041169  0.59398197]
 [0.56648853 0.44246782 0.43636499 0.4366809 ]
 [0.46084845 0.50495129 0.486389   0.45824583]
 [0.52237058 0.42789877 0.45842893 0.47327237]]
[[0.491214   0.55703519 0.43993476 0.50284689]
 [0.51848291 0.40929008 0.52150897 0.43410482]
 [0.41301032 0.58977711 0.59312641 0.56167947]]
Environment successfully initialized


### Conservative user in terms of energy - Prudent

In [13]:
# Assigning data file
#data_file = "prices_for_one_day_inference.csv"
data_file = "nyiso_hourly_prices.csv"

# Creating the intermittent devices
intermittent_user_probabilities = np.array([0.20, 0.80, 0.90, 0.20])

intermittent_device_penalty = 100

intermittent_device_1 = Intermittent(name = "Small AC", device_power_consumption = 1, 
                                        user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_2 = Intermittent(name = "Big AC", device_power_consumption = 2.5, 
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_3 = Intermittent(name = "Ceiling Fan 1", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_4 = Intermittent(name = "Ceiling Fan 2", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermitent_device_5 = Intermittent(name = "Boiler", device_power_consumption = 3,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

intermittent_device_6 = Intermittent(name = "Dehumidifier", device_power_consumption = 0.07,
                                    user_probabilities = intermittent_user_probabilities, device_standard_penalty = intermittent_device_penalty)

array_of_intermittent_devices = [intermittent_device_1, intermittent_device_2, intermittent_device_3, intermittent_device_4, intermitent_device_5, intermittent_device_6]

# Creating the uninterruptible devices

uninterruptible_user_probabilities = np.array([0.20, 0.80, 0.90, 0.20])

uninterruptible_device_standard_penalty = 100

uninterruptible_device_override_penalty = 1000

uninterruptible_device_1 = Uninterruptible(name = "Dishwasher", device_power_consumption = 1.3,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 2.5, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_2 = Uninterruptible(name = "Washing Machine", device_power_consumption = 0.5,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 1, device_override_penalty = uninterruptible_device_override_penalty)

uninterruptible_device_3 = Uninterruptible(name = "Clothes Dryer", device_power_consumption = 2.4,
                                            user_probabilities = uninterruptible_user_probabilities, device_standard_penalty = uninterruptible_device_standard_penalty, 
                                            device_on_duration = 0.5, device_override_penalty = uninterruptible_device_override_penalty)

array_of_uninterruptible_devices = [uninterruptible_device_1, uninterruptible_device_2, uninterruptible_device_3]

# Creating the environment
prudent_train_env = EMSGymEnv(data_file = data_file, intermittent_devices = array_of_intermittent_devices,
                uninterruptible_devices = array_of_uninterruptible_devices, episode_horizon = 7, time_step_duration = 0.5)

Environment successfully initialized


## Training agents on the different user environments

### Setting CallBack Function to use that logs data to WandB service

In [3]:
class WandBCallback(DefaultCallbacks):
    def __init__(self, legacy_callbacks_dict: Dict[str, callable] = None):
        self.episodes_counter = 0
        self.episode_total_reward_counter = 0
        self.intermittent_reward_counter = 0
        self.uninterruptible_reward_counter = 0
        self.cost_counter = 0

        run = wandb.init(
            # set the wandb project where this run will be logged
            project="env_paper_experiments",
            name="receptive_train_env_t05_d07_A2C_100_100_1000_RUN",  # receptive, resistant, neutral, prudent
            tags=["7_days", "05_timestep", "6_pr_3_df", "A2C", "Receptive"],
            # # track hyperparameters and run metadata
            # config={
            # "architecture": "Keras Simple Neural Network",
            # "Environment": "SmartHomeGym_env_v2",
            # "episodes": 150,
            # }
        )

        if legacy_callbacks_dict:
            deprecation_warning(
                "callbacks dict interface",
                (
                    "a class extending rllib.algorithms.callbacks.DefaultCallbacks; see"
                    " `rllib/examples/custom_metrics_and_callbacks.py` for an example."
                ),
                error=True,
            )

        # print("Episode Initialized")

    def on_episode_created(
        self,
        *,
        worker: "RolloutWorker",
        base_env: BaseEnv,
        policies: Dict[PolicyID, Policy],
        env_index: int,
        episode: Union[Episode, EpisodeV2],
        **kwargs,
    ) -> None:
        # self.episode_id = env_index
        self.episode_id = random.randint(1, 10000)

    def on_episode_end(
        self,
        *,
        worker: "RolloutWorker",
        base_env: BaseEnv,
        policies: Dict[PolicyID, Policy],
        episode: Union[Episode, EpisodeV2, Exception],
        env_index: Optional[int] = None,
        **kwargs,
    ):
        self.episodes_counter += 1
        self.intermittent_reward_counter += episode._last_infos["agent0"][
            "intermittent_device_reward_total"
        ]
        self.uninterruptible_reward_counter += episode._last_infos["agent0"][
            "uninterruptible_device_reward_total"
        ]
        self.episode_total_reward_counter += episode.total_reward
        self.plot_kwh = episode._last_infos["agent0"]["kwh_device_history"]
        self.plot_price = episode._last_infos["agent0"]["price_history"]
        self.plot_time = episode._last_infos["agent0"]["time"]
        self.cost_counter += np.sum(
            np.array(self.plot_kwh) * np.array(self.plot_price) * 0.5
        )

        wandb.log(
            {
                "episode_reward_total": episode.total_reward,
                "episode_reward_mean": self.episode_total_reward_counter
                / self.episodes_counter,
                "intermittent_device_reward_total": episode._last_infos["agent0"][
                    "intermittent_device_reward_total"
                ],
                "uninterruptible_device_reward_total": episode._last_infos["agent0"][
                    "uninterruptible_device_reward_total"
                ],
                "intermittent_device_reward_mean": self.intermittent_reward_counter
                / self.episodes_counter,
                "uninterruptible_device_reward_mean": self.uninterruptible_reward_counter
                / self.episodes_counter,
                "cost": self.cost_counter / self.episodes_counter,
            }
        )

### Training the different agents

In [11]:
seed_value = 42

receptive_train_env.seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

register_env(
    "SmartHomeGymEnv", lambda config: prudent_train_env
)  # receptive, resistant, neutral, prudent

config = AlgorithmConfig()

# config = config.training(lr=0.001,train_batch_size=300, model={"fcnet_hiddens": [64,128,128], "fcnet_activation" : "relu"})
# config = config.training(lr=0.01,train_batch_size=336)
config = config.training(
    train_batch_size=300,
    model={
        "fcnet_hiddens": [256, 256],
        "fcnet_activation": "swish",
        "post_fcnet_hiddens": [],
        "post_fcnet_activation": "swish",
    },
)
config = config.environment(env="SmartHomeGymEnv")
config = config.resources(num_gpus=1)
config = config.rollouts(
    num_rollout_workers=0,
    batch_mode="complete_episodes",
    rollout_fragment_length=300,
    num_envs_per_worker=1,
)
#config = config.callbacks(WandBCallback)
config = config.framework(framework="tf2")
config = config.resources(num_gpus=0, num_trainer_workers=1)

config = config.debugging(seed=42)

config.offline_data(postprocess_inputs=True)

run_config = RunConfig()

run_config.name = "A2C/penalties_100_100_1000"
run_config.stop = {"episodes_total": 200}
run_config.local_dir = (
    "prudent_environment_logs"  # receptive, resistant, neutral, prudent
)

checkpoint_config = CheckpointConfig()

checkpoint_config.num_to_keep = 2
checkpoint_config.checkpoint_score_order = "max"
checkpoint_config.checkpoint_score_attribute = "episode_reward_mean"
checkpoint_config.checkpoint_frequency = 1

run_config.checkpoint_config = checkpoint_config

best_result = tune.Tuner(
    trainable="A2C", param_space=config.to_dict(), run_config=run_config
).fit()

wandb.finish()
ray.shutdown()

2024-07-17 14:33:51,945	INFO tune.py:922 -- Initializing Ray automatically.For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run`.
2024-07-17 14:33:54,822	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2024-07-17 14:47:44
Running for:,00:13:46.96
Memory:,14.8/15.2 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
A2C_SmartHomeGymEnv_7497d_00000,TERMINATED,127.0.0.1:8256,200,803.308,67200,-227172,-207792,-257264,336


2024-07-17 14:33:57,708	INFO algorithm_config.py:2888 -- Executing eagerly (framework='tf2'), with eager_tracing=tf2. For production workloads, make sure to set eager_tracing=True  in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
2024-07-17 14:33:57,709	INFO algorithm_config.py:2888 -- Executing eagerly (framework='tf2'), with eager_tracing=tf2. For production workloads, make sure to set eager_tracing=True  in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
[2m[36m(A2C pid=8256)[0m 2024-07-17 14:34:05,638	INFO algorithm_config.py:2888 -- Executing eagerly (framework='tf2'), with eager_tracing=tf2. For production workloads, make sure to set eager_tracing=True  in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
[2m[36m(A2C pid=8256)[0m 2024-07-

Trial name,agent_timesteps_total,connector_metrics,counters,custom_metrics,date,done,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,episodes_total,experiment_id,hostname,info,iterations_since_restore,node_ip,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_trained,num_env_steps_trained_this_iter,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,pid,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
A2C_SmartHomeGymEnv_7497d_00000,67200,"{'ObsPreprocessorConnector_ms': 0.0485682487487793, 'StateBufferConnector_ms': 0.010799169540405273, 'ViewRequirementAgentConnector_ms': 0.1335616111755371}","{'num_env_steps_sampled': 67200, 'num_env_steps_trained': 67200, 'num_agent_steps_sampled': 67200, 'num_agent_steps_trained': 67200}",{},2024-07-17_14-47-44,True,336,{},-207792,-227172,-257264,1,200,c6cf490cd7c94949837005e6910691e5,LAPTOP-20VM6M02,"{'learner': {'default_policy': {'learner_stats': {'cur_lr': 0.0010000000474974513, 'entropy_coeff': 0.009999999776482582, 'policy_loss': -inf, 'policy_entropy': 5209.105, 'var_gnorm': 22.72548, 'vf_loss': 365806700000.0}, 'grad_gnorm': 0.0, 'vf_explained_var': -0.00027763844, 'custom_metrics': {}, 'num_agent_steps_trained': 336, 'num_grad_updates_lifetime': 200, 'diff_num_grad_updates_vs_sampler_policy': 0.0}}, 'num_env_steps_sampled': 67200, 'num_env_steps_trained': 67200, 'num_agent_steps_sampled': 67200, 'num_agent_steps_trained': 67200}",200,127.0.0.1,67200,67200,67200,336,67200,336,0,0,0,0,336,"{'cpu_util_percent': 4.414285714285714, 'ram_util_percent': 97.45714285714284}",8256,{},{},{},"{'mean_raw_obs_processing_ms': 0.3854113005684867, 'mean_inference_ms': 10.257892434342613, 'mean_action_processing_ms': 0.3227326379466809, 'mean_env_wait_ms': 0.5600495017778483, 'mean_env_render_ms': 0.0}","{'episode_reward_max': -207792.45152509795, 'episode_reward_min': -257264.4572687586, 'episode_reward_mean': -227171.60427793875, 'episode_len_mean': 336.0, 'episode_media': {}, 'episodes_this_iter': 1, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [-220756.73268277873, -226931.58847334838, -239525.18434532644, -231641.93814757915, -232759.38813185523, -233961.7590762106, -229504.1306351432, -234618.0821383301, -219220.2879527188, -231255.68463683221, -223211.02762103683, -220798.73202328666, -213548.76592040248, -214684.68837276107, -231426.4226523542, -257264.4572687586, -212252.63304893495, -225010.86181185415, -233597.92927511584, -219230.0449535457, -239282.20056955595, -225694.5431875746, -228947.31185170158, -223192.08987688564, -228265.23477742236, -237106.92855237136, -233876.00302591655, -221684.06690231612, -223831.65004670314, -214502.94099020254, -221470.3976319886, -222253.00449732415, -230522.4463605329, -217621.96899376824, -223190.1663978473, -245949.6201999498, -226459.38252481227, -231791.3930311534, -235659.0857714662, -225948.9102788041, -207792.45152509795, -234179.283592744, -222264.4430058272, -230569.49449584816, -242688.1281920162, -232724.1991577191, -229875.47573041404, -223232.57107325076, -218342.38443063214, -213354.70401705525, -224275.51637198887, -230824.93218960383, -224454.26114317574, -224088.93997593448, -228328.98158074156, -239412.27160162205, -215070.98708176357, -234919.92345713527, -229001.00508607508, -234724.79450701355, -239761.0077800419, -225796.4796504628, -221622.2528933678, -221634.31941002308, -212506.4522605994, -224696.13057999167, -217488.35472274353, -227593.60122704325, -241046.2756751399, -225303.41145902366, -227749.88436590217, -229901.97733623188, -219455.48142572847, -231387.78706173133, -215623.3186440723, -219432.6919113, -234334.1628632744, -231129.04521980806, -215662.2431697751, -241176.63934030558, -238012.8130097922, -241909.19876280037, -224365.66588358447, -215293.97715332056, -230379.16771162435, -227268.33985039633, -216982.98051740593, -213751.5457383768, -233913.01104624363, -231252.6429290289, -226490.0059461095, -214586.98272462064, -238227.93399559805, -231406.79509348536, -228488.18997045653, -238780.20801724863, -226531.93198251008, -230410.36919452343, -227594.4039809173, -209638.314437137], 'episode_lengths': [336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336, 336]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.3854113005684867, 'mean_inference_ms': 10.257892434342613, 'mean_action_processing_ms': 0.3227326379466809, 'mean_env_wait_ms': 0.5600495017778483, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ObsPreprocessorConnector_ms': 0.0485682487487793, 'StateBufferConnector_ms': 0.010799169540405273, 'ViewRequirementAgentConnector_ms': 0.1335616111755371}}",803.308,4.64905,803.308,"{'training_iteration_time_ms': 4815.217, 'learn_time_ms': 54.149, 'learn_throughput': 6205.144, 'synch_weights_time_ms': 0.452}",1721216864,0,67200,200,7497d_00000,0.214103


NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf fou