In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('t_multisite_electricity_price_202405261055.csv')

df = df[df['distribution_id'] == 11]
df['date_time'] = pd.to_datetime(df['date_time'])

df['hour_of_day'] = df['date_time'].dt.hour + (df['date_time'].dt.minute > 0) * 0.5

# df = df[(df['hour_of_day'] < 8) | (df['hour_of_day'] > 16.5)]

df['hour_of_day'] = df['hour_of_day'].apply(lambda x: x - 12 if x >= 12 else x + 12)
df['electricity_price'] = df['electricty_price_fixed']

df = df.sort_values(by='date_time', ascending=True, ignore_index=True)

pd.set_option('display.max_rows', 99)

display(df.tail(99))

print(df['electricty_price_fixed'].max())
print(df['electricty_price_fixed'].min())


In [27]:
import gymnasium as gym
import numpy as np
import pandas as pd
import random

class MultiAgentEVChargingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, df, num_agents):
        super(MultiAgentEVChargingEnv, self).__init__()
        self.df = df

        # if self.df_starters.empty:
        #     raise ValueError("No data available for hours between 0 and 6.")

        self.num_agents = num_agents  # Number of vehicles

        self.max_battery_level = 100.0 * 3
        self.min_battery_level = 0
        # self.deadline = 13.0  # hours until the deadline
        self.target_soc = 90  # target state of charge
        self.possible_charge_powers = [11, 22, 25]
        
        self.max_charge_power = max(self.possible_charge_powers)
        self.min_charge_power = min(self.possible_charge_powers)
        
        self.max_battery_cap = 125
        self.min_battery_cap = 55
        
        self.max_deadline_hours = 15
        self.min_deadline_hours = 5

        self.agents = {i: self.create_agent() for i in range(self.num_agents)}
        
        self.action_space = gym.spaces.Box(low=np.array([-1], dtype=np.float32), 
                                           high=np.array([1], dtype=np.float32), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)

    
    def rescale(self, val, max_val, min_val):
        if min_val == max_val:
            return 0
        return 2 * (val - min_val) / (max_val - min_val) - 1

    def rescale_to_original(self, rescaled_data, original_max, original_min):
        return (rescaled_data + 1) * (original_max - original_min) / 2 + original_min

    def create_agent(self):
        """ Initialize a new agent with unique parameters. """

        agent = {}
        agent['deadline_hours'] = float(random.randint(self.min_deadline_hours, self.max_deadline_hours))
        agent['battery_level'] = random.randint(10, 60)
        agent['battery_capacity'] = random.randint(self.min_battery_cap, self.max_battery_cap)
        agent['max_draw'] = random.choice(self.possible_charge_powers)

        agent['deadline_hours'] = self.rescale(agent['deadline_hours'], self.max_deadline_hours, self.min_deadline_hours)
        agent['battery_level'] = self.rescale(agent['battery_level'], self.max_battery_level, self.min_battery_level)
        agent['battery_capacity'] = self.rescale(agent['battery_capacity'], self.max_battery_cap, self.min_battery_cap)
        agent['max_draw'] = self.rescale(agent['max_draw'], self.max_charge_power, self.min_charge_power)
        
        return agent

    def setup_time_prices(self):
        
        self.current_index = random.choice(self.df.index.tolist())
        rows = self.df.iloc[self.current_index:self.current_index + 24]

        # self.hours = rows['hour_of_day'].values
        self.prices = rows['electricity_price'].values

        if len(self.prices) < ((self.max_deadline_hours/2)+1):
            return setup_time_prices()

    def increment_time_prices(self):
        # self.current_time = self.hours[0]
        self.current_price = self.prices[0]
        # self.hours = self.hours[1:]
        self.prices = self.prices[1:]

    def reset(self):
        """ Reset the state of each agent. """
        self.setup_time_prices()
        self.increment_time_prices()
        
        states = {}
        for agent_id in self.agents:
            states[agent_id] = self.reset_agent(agent_id)
        return states, {}

    def reset_agent(self, agent_id):
        """ Reset a single agent's state. """
        agent = self.agents[agent_id]

        agent['deadline_hours'] = float(random.randint(self.min_deadline_hours, self.max_deadline_hours))
        agent['battery_level'] = random.randint(10, 60)
        agent['battery_capacity'] = random.randint(self.min_battery_cap, self.max_battery_cap)
        agent['max_draw'] = random.choice(self.possible_charge_powers)

        return self.calculate_observation(agent)

    def calculate_observation(self, agent):
        
        agent['deadline_hours'] = self.rescale(agent['deadline_hours'], self.max_deadline_hours, self.min_deadline_hours)
        agent['battery_level'] = self.rescale(agent['battery_level'], self.max_battery_level, self.min_battery_level)
        agent['battery_capacity'] = self.rescale(agent['battery_capacity'], self.max_battery_cap, self.min_battery_cap)
        agent['max_draw'] = self.rescale(agent['max_draw'], self.max_charge_power, self.min_charge_power)
        
        return np.array(
            [
                float(agent['battery_level']), 
                float(agent['deadline_hours']), 
                float(self.current_price), 
                float(agent['battery_capacity']), 
                float(agent['max_draw'])
            ], 
            dtype=np.float32)

    def step(self, actions):
        """ Apply actions, update states, and return observations and rewards for all agents. """
        next_states = {}
        rewards = {}
        dones = {}

        self.increment_time_prices()
        
        for agent_id, action in actions.items():
            next_states[agent_id], rewards[agent_id], dones[agent_id] = self.step_agent(agent_id, action)
        return next_states, rewards, dones

    def step_agent(self, agent_id, action):
        """ Update state for a single agent based on its action. """
        agent = self.agents[agent_id]
        # charge_power = (+1)*(self.max_draw/2)

        agent['deadline_hours'] = self.rescale_to_original(agent['deadline_hours'], self.max_deadline_hours, self.min_deadline_hours)
        agent['battery_level'] = self.rescale_to_original(agent['battery_level'], self.max_battery_level, self.min_battery_level)
        agent['battery_capacity'] = self.rescale_to_original(agent['battery_capacity'], self.max_battery_cap, self.min_battery_cap)
        agent['max_draw'] = self.rescale_to_original(agent['max_draw'], self.max_charge_power, self.min_charge_power)

        agent['deadline_hours'] -= 0.5
        
        charge_power = self.rescale_to_original(action, agent['max_draw'], 0)
        reward = 0.0

        if True:

            added_soc = (charge_power * 0.5 * 100) / self.battery_capacity
            prev_battery_level = agent['battery_level']
            agent['battery_level'] = agent['battery_level'] + added_soc
            # agent['battery_level'] = min(self.max_battery_level, agent['battery_level'] + added_soc)
            changed_soc = agent['battery_level'] - prev_battery_level 

            if abs(agent['battery_level'] - self.target_soc) < 5:
                reward += 20  # Reward for reaching or exceeding target SOC

            reward -= float(abs(self.target_soc - agent['battery_level']))

            reward -= pow(float(charge_power * self.current_price * 3) / 2, 2)

        done = bool(agent['deadline_hours'] == 0)

        return self.calculate_observation(agent), reward, done, False, {}
        
    def render(self, mode='human'):
        for agent_id, agent in self.agents.items():
            print(f'Agent {agent_id}: Battery Level: {agent["battery_level"]:.2f}%')


In [28]:
# from pettingzoo.utils import from_gym
from pettingzoo.utils.env import ParallelEnv
import gymnasium as gym

class MultiAgentEVChargingParaEnv(gym.Env, ParallelEnv):
    def __init__(self, df, num_agents):
        self.env = MultiAgentEVChargingEnv(df=df, num_agents=num_agents)
        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        super().__init__()

    def observe(self, agent):
        return self.env.state[agent]

    def step(self, action):
        self.env.step(action)
        self._cumulative_rewards[self.agent_selection] += self.env.rewards[self.agent_selection]
        self.rewards[self.agent_selection] = self.env.rewards[self.agent_commonction]
        self.dones[self.agent_selection] = self.env.dones[self.agent_selection]
        self._accumulate_rewards()
        self._dones_step_first()

    def reset(self, seed=0):
        self.agents = self.env.agents
        obs, info =  self.env.reset()
        return obs, info

# Use the from_gym utility if your environment follows the Gym interface
# def make_env(df, num_agents):
#     return from_gym(MultiAgentEVChargingParaEnv(df, num_agents))


In [30]:
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3 import PPO
from pettingzoo.utils.conversions import aec_to_parallel_wrapper
from stable_baselines3.common.env_checker import check_env


env = MultiAgentEVChargingParaEnv(df, num_agents=5)
# check_env(env)

# Vectorized environment

# vec_env = make_vec_env(lambda: env, n_envs=1)

# Instantiate the agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=20000)

model.save("ppo_multiagent_ev_charging")


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




TypeError: float() argument must be a string or a real number, not 'dict'

In [34]:
import ray
from ray.rllib.algorithms.ppo import PPO

ray.init()

2024-06-14 00:56:17,540	ERROR services.py:1329 -- Failed to start the dashboard , return code -11
2024-06-14 00:56:17,541	ERROR services.py:1354 -- Error should be written to 'dashboard.log' or 'dashboard.err'. We are printing the last 20 lines for you. See 'https://docs.ray.io/en/master/ray-observability/ray-logging.html#logging-directory-structure' to find where the log file is.
2024-06-14 00:56:17,542	ERROR services.py:1398 -- 
The last 20 lines of /tmp/ray/session_2024-06-14_00-56-15_898359_4583/logs/dashboard.log (it contains the error message from the dashboard): 
2024-06-14 00:56:17,320	INFO head.py:254 -- Starting dashboard metrics server on port 44227

2024-06-14 00:56:18,631	INFO worker.py:1724 -- Started a local Ray instance.


0,1
Python version:,3.10.14
Ray version:,2.9.0


[33m(raylet)[0m [2024-06-14 00:56:19,190 E 10637 10689] (raylet) agent_manager.cc:84: The raylet exited immediately because one Ray agent failed, agent_name = dashboard_agent/424238335.
[33m(raylet)[0m The raylet fate shares with the agent. This can happen because
[33m(raylet)[0m - The version of `grpcio` doesn't follow Ray's requirement. Agent can segfault with the incorrect `grpcio` version. Check the grpcio version `pip freeze | grep grpcio`.
[33m(raylet)[0m - The agent failed to start because of unexpected error or port conflict. Read the log `cat /tmp/ray/session_latest/logs/{dashboard_agent|runtime_env_agent}.log`. You can find the log file structure here https://docs.ray.io/en/master/ray-observability/ray-logging.html#logging-directory-structure.
[33m(raylet)[0m - The agent is killed by the OS (e.g., out of memory).


In [32]:
from pettingzoo.utils import conversions
from pettingzoo.utils.env import ParallelEnv
import gymnasium as gym
import numpy as np

# Assuming MultiAgentEVChargingEnv is correctly defined somewhere
class MultiAgentEVChargingParaEnv(ParallelEnv):
    metadata = {'render_modes': ['human'], 'name': "MultiAgentEVCharging"}

    def __init__(self, df, num_agents):
        super().__init__()
        self.env = MultiAgentEVChargingEnv(df=df, num_agents=num_agents)
        self.agents = ['agent_' + str(i) for i in range(num_agents)]
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents)))))

        # Setup observation and action spaces
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)
        self.action_space = gym.spaces.Box(low=np.array([-1], dtype=np.float32), high=np.array([1], dtype=np.float32), dtype=np.float32)

    def observe(self, agent):
        # Implement your observation here
        return np.random.rand(5).astype(np.float32)  # Dummy implementation

    def step(self, actions):
        # Implement your step logic here
        obs = {agent: np.random.rand(5).astype(np.float32) for agent in self.agents}  # Dummy implementation
        rewards = {agent: np.random.rand() for agent in self.agents}
        dones = {agent: False for agent in self.agents}
        infos = {agent: {} for agent in self.agents}
        return obs, rewards, dones, infos

    def reset(self):
        return {agent: np.random.rand(5).astype(np.float32) for agent in self.agents}  # Dummy implementation

    def render(self, mode='human'):
        pass

    def close(self):
        pass

# Import necessary libraries from Stable Baselines3 and PettingZoo
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from pettingzoo.utils.conversions import parallel_to_gym_wrapper

# Create your environment and wrap it
env = MultiAgentEVChargingParaEnv(df="your_dataframe", num_agents=5)
gym_env = parallel_to_gym_wrapper(env)  # Wrapping to Gym environment

# Vectorize the environment
vec_env = DummyVecEnv([lambda: gym_env])

# Create and train the PPO model
model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=20000)

# Save the model
model.save("ppo_multiagent_ev_charging")


ImportError: cannot import name 'parallel_to_gym_wrapper' from 'pettingzoo.utils.conversions' (/home/fpsaimlserver/miniconda3/envs/karan_3.10/lib/python3.10/site-packages/pettingzoo/utils/conversions.py)

In [None]:
import gymnasium as gym
import ray
from ray.rllib.algorithms import ppo

class WrappedMultiAgentEVChargingEnv(MultiAgentEnv):
    def __init__(self, env_config):
        self.env = MultiAgentEVChargingEnv(df=pd.DataFrame(env_config['df']), num_agents=env_config['num_agents'])
        self.observation_space = self.env.observation_space  # Assuming the env defines this
        self.action_space = self.env.action_space  # Assuming the env defines this

    def reset(self):
        return self.env.reset()
    
    def step(self, action_dict):
        return self.env.step(action_dict)
    
    def render(self, mode='human'):
        return self.env.render(mode)

ray.init()

config = {
    "env": "multi_agent_ev_charging",
    "env_config": {
        "df": df, 
        "num_agents": 5
    },
    "num_workers": 1,  # Parallelism
    "framework": "torch",
}

algo = ppo.PPO(env=WrappedMultiAgentEVChargingEnv, config=config)

for i in range(5000):  # Number of training iterations
    result = ppo_trerun.train()
    print(f"Iteration {i}: {result['metrics']['episode_reward_mean']}")


In [None]:
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPO
from ray.rllib.env import MultiAgentEnv
from ray.tune.registry import register_env
import pandas as pd

# Assuming MultiAgentEVChargingEnv is already imported and available
class WrappedMultiAgentEVChargingEnv(MultiAgentEnv):
    def __init__(self, env_config):
        self.env = MultiAgentEVChargingEnv(df=pd.DataFrame(env_config['df']), num_agents=env_config['num_agents'])
        self.observation_space = self.env.observation_space  # Assuming the env defines this
        self.action_space = self.env.action_space  # Assuming the env defines this

    def reset(self):
        return self.env.reset()
    
    def step(self, action_dict):
        return self.env.step(action_dict)
    
    def render(self, mode='human'):
        return self.env.render(mode)

ray.init()

# Register the environment
register_env("multi_agent_ev_charging", lambda config: WrappedMultiAgentEVChargingEnv(config))

# Configuration for Multi Actor Single Critic using PPO
config = {
    "env": "multi_agent_ev_charging",
    "env_config": {
        "df": df, 
        "num_agents": 5
    },
    "multiagent": {
        "policies": {
            "shared_policy": (None, None, None, {}),
        },
        "policy_mapping_fn": lambda agent_id: "shared_policy",
    },
    "num_workers": 1,  # Parallelism
    "framework": "torch",
}


# Create the PPO trainer with the configuration
ppo_trainer = PPO(config=config)

# Train the model
for i in range(5000):  # Number of training iterations
    result = ppo_trerun.train()
    print(f"Iteration {i}: {result['metrics']['episode_reward_mean']}")

In [None]:
# Shutdown Ray
ray.shutdown()
ray.init()
