In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('t_multisite_electricity_price_202405261055.csv')

df = df[df['distribution_id'] == 11]
df['date_time'] = pd.to_datetime(df['date_time'])

df['hour_of_day'] = df['date_time'].dt.hour + (df['date_time'].dt.minute > 0) * 0.5

# df = df[(df['hour_of_day'] < 8) | (df['hour_of_day'] > 16.5)]

df['hour_of_day'] = df['hour_of_day'].apply(lambda x: x - 12 if x >= 12 else x + 12)
df['electricity_price'] = df['electricty_price_fixed']

df = df.sort_values(by='date_time', ascending=True, ignore_index=True)

pd.set_option('display.max_rows', 99)

display(df.tail(99))

print(df['electricty_price_fixed'].max())
print(df['electricty_price_fixed'].min())


In [125]:
import gymnasium as gym
import numpy as np
import pandas as pd
import random

class MultiAgentEVChargingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, df, num_agents):
        super(MultiAgentEVChargingEnv, self).__init__()
        self.df = df

        self.num_agents = num_agents  # Number of vehicles

        self.max_battery_level = 100.0 * 3
        self.min_battery_level = 0

        self.target_soc = 90  # target state of charge
        self.possible_charge_powers = [11, 22, 25]
        
        self.max_charge_power = max(self.possible_charge_powers)
        self.min_charge_power = min(self.possible_charge_powers)
        
        self.max_battery_cap = 125
        self.min_battery_cap = 55
        
        self.max_deadline_hours = 24
        self.min_deadline_hours = 6

        self.agents = {i: self.create_agent() for i in range(self.num_agents)}
        
        self.action_space = gym.spaces.Box(low=np.array([-1], dtype=np.float32), 
                                           high=np.array([1], dtype=np.float32), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)

    
    def rescale(self, val, max_val, min_val):
        if min_val == max_val:
            return 0
        return 2 * (val - min_val) / (max_val - min_val) - 1

    def rescale_to_original(self, rescaled_data, original_max, original_min):
        return (rescaled_data + 1) * (original_max - original_min) / 2 + original_min

    def create_agent(self):
        """ Initialize a new agent with unique parameters. """

        agent = {}
        agent['deadline_hours'] = float(random.randint(self.min_deadline_hours, self.max_deadline_hours))
        agent['battery_level'] = random.randint(10, 60)
        agent['battery_capacity'] = random.randint(self.min_battery_cap, self.max_battery_cap)
        agent['max_draw'] = random.choice(self.possible_charge_powers)

        # agent['deadline_hours'] = self.rescale(agent['deadline_hours'], self.max_deadline_hours, self.min_deadline_hours)
        # agent['battery_level'] = self.rescale(agent['battery_level'], self.max_battery_level, self.min_battery_level)
        # agent['battery_capacity'] = self.rescale(agent['battery_capacity'], self.max_battery_cap, self.min_battery_cap)
        # agent['max_draw'] = self.rescale(agent['max_draw'], self.max_charge_power, self.min_charge_power)
        
        return agent

    def setup_time_prices(self):
        
        self.current_index = random.choice(self.df.index.tolist())
        price_size = ((self.max_deadline_hours*2) +5)
        rows = self.df.iloc[self.current_index:self.current_index + price_size]

        self.prices = rows['electricity_price'].values
        if len(self.prices) < (self.max_deadline_hours*2):
            return self.setup_time_prices()

    def increment_time_prices(self):
        self.current_price = self.prices[0]
        self.prices = self.prices[1:]

    def reset(self):
        """ Reset the state of each agent. """
        self.setup_time_prices()
        self.increment_time_prices()
        
        states = {}
        for agent_id in self.agents:
            states[agent_id] = self.reset_agent(agent_id)
        return states, {}

    def reset_agent(self, agent_id):
        """ Reset a single agent's state. """
        agent = self.agents[agent_id]

        agent['deadline_hours'] = float(random.randint(self.min_deadline_hours, self.max_deadline_hours))
        agent['battery_level'] = random.randint(10, 60)
        agent['battery_capacity'] = random.randint(self.min_battery_cap, self.max_battery_cap)
        agent['max_draw'] = random.choice(self.possible_charge_powers)

        return self.calculate_observation(agent)

    def calculate_observation(self, agent):
        
        deadline_hours = self.rescale(agent['deadline_hours'], self.max_deadline_hours, 0)
        battery_level = self.rescale(agent['battery_level'], self.max_battery_level, self.min_battery_level)
        battery_capacity = self.rescale(agent['battery_capacity'], self.max_battery_cap, self.min_battery_cap)
        max_draw = self.rescale(agent['max_draw'], self.max_charge_power, self.min_charge_power)

        # print(deadline_hours, agent['deadline_hours'])
        
        return np.array(
            [
                float(battery_level), 
                float(deadline_hours), 
                float(self.current_price), 
                float(battery_capacity), 
                float(max_draw)
            ], 
            dtype=np.float32)

    def step(self, actions):
        """ Apply actions, update states, and return observations and rewards for all agents. """
        next_states = {}
        rewards = {}
        dones = {}
        truncateds = {'__all__': False}
        infos = {}

        self.increment_time_prices()
        
        for agent_id, action in actions.items():
            next_states[agent_id], rewards[agent_id], dones[agent_id], truncateds[agent_id], infos[agent_id] = self.step_agent(agent_id, action)
        # print(np.alltrue(dones.values()))
        dones['__all__'] = np.alltrue(list(dones.values()))
        # print(dones)
        return next_states, rewards, dones, truncateds, infos

    def step_agent(self, agent_id, action):
        """ Update state for a single agent based on its action. """
        agent = self.agents[agent_id]

        # agent['deadline_hours'] = self.rescale_to_original(agent['deadline_hours'], self.max_deadline_hours, self.min_deadline_hours)
        # agent['battery_level'] = self.rescale_to_original(agent['battery_level'], self.max_battery_level, self.min_battery_level)
        # agent['battery_capacity'] = self.rescale_to_original(agent['battery_capacity'], self.max_battery_cap, self.min_battery_cap)
        # agent['max_draw'] = self.rescale_to_original(agent['max_draw'], self.max_charge_power, self.min_charge_power)

        
        charge_power = self.rescale_to_original(action, agent['max_draw'], 0)
        reward = 0.0

        if agent['deadline_hours'] > 0:
            agent['deadline_hours'] -= 0.5

            added_soc = (charge_power * 0.5 * 100) / agent['battery_capacity']
            prev_battery_level = agent['battery_level']
            agent['battery_level'] = agent['battery_level'] + added_soc

            if agent['battery_level'] > self.max_battery_level:
                agent['battery_level'] = self.max_battery_level
            # agent['battery_level'] = min(self.max_battery_level, agent['battery_level'] + added_soc)

            changed_soc = agent['battery_level'] - prev_battery_level 

            if abs(agent['battery_level'] - self.target_soc) < 5:
                reward += 20  # Reward for reaching or exceeding target SOC

            reward -= float(abs(self.target_soc - agent['battery_level']))

            reward -= pow(float(charge_power * self.current_price * 3) / 2, 2)

        done = bool(agent['deadline_hours'] <= 0)

        # print(agent)

        return self.calculate_observation(agent), reward, done, False, {}
        
    def render(self, mode='human'):
        for agent_id, agent in self.agents.items():
            print(f'Agent {agent_id}: Battery Level: {agent["battery_level"]:.2f}%')


In [None]:
env = MultiAgentEVChargingEnv(df, num_agents=5)

env.reset()

for _ in range(24):
    a = env.step({0:0,1:0,2:0,3:0,4:0})
    print(a[0])
    

In [None]:
from pettingzoo.utils import conversions
from pettingzoo.utils.env import ParallelEnv
import gymnasium as gym
import numpy as np

# Assuming MultiAgentEVChargingEnv is correctly defined somewhere
class MultiAgentEVChargingParaEnv(ParallelEnv):
    metadata = {'render_modes': ['human'], 'name': "MultiAgentEVCharging"}

    def __init__(self, df, num_agents):
        super().__init__()
        self.env = MultiAgentEVChargingEnv(df=df, num_agents=num_agents)
        self.agents = ['agent_' + str(i) for i in range(num_agents)]
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = dict(zip(self.possible_agents, list(range(len(self.possible_agents)))))

        # Setup observation and action spaces
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)
        self.action_space = gym.spaces.Box(low=np.array([-1], dtype=np.float32), high=np.array([1], dtype=np.float32), dtype=np.float32)

    def observe(self, agent):
        # Implement your observation here
        return np.random.rand(5).astype(np.float32)  # Dummy implementation

    def step(self, actions):
        # Implement your step logic here
        obs = {agent: np.random.rand(5).astype(np.float32) for agent in self.agents}  # Dummy implementation
        rewards = {agent: np.random.rand() for agent in self.agents}
        dones = {agent: False for agent in self.agents}
        infos = {agent: {} for agent in self.agents}
        return obs, rewards, dones, infos

    def reset(self):
        return {agent: np.random.rand(5).astype(np.float32) for agent in self.agents}  # Dummy implementation

    def render(self, mode='human'):
        pass

    def close(self):
        pass

# Import necessary libraries from Stable Baselines3 and PettingZoo
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from pettingzoo.utils.conversions import parallel_to_gym_wrapper

# Create your environment and wrap it
env = MultiAgentEVChargingParaEnv(df="your_dataframe", num_agents=5)
gym_env = parallel_to_gym_wrapper(env)  # Wrapping to Gym environment

# Vectorize the environment
vec_env = DummyVecEnv([lambda: gym_env])

# Create and train the PPO model
model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=20000)

# Save the model
model.save("ppo_multiagent_ev_charging")


In [None]:
import gymnasium as gym
import ray
from ray.rllib.algorithms import ppo
from ray.rllib.env import MultiAgentEnv

class WrappedMultiAgentEVChargingEnv(MultiAgentEnv):
    def __init__(self, env_config):
        self.env = MultiAgentEVChargingEnv(df=pd.DataFrame(env_config['df']), num_agents=env_config['num_agents'])
        self.observation_space = self.env.observation_space  # Assuming the env defines this
        self.action_space = self.env.action_space  # Assuming the env defines this

    def reset(self, seed, options):
        return self.env.reset()
    
    def step(self, action_dict):
        return self.env.step(action_dict)
    
    def render(self, mode='human'):
        return self.env.render(mode)

# ray.init()

config = {
    "env": "multi_agent_ev_charging",
    "env_config": {
        "df": df, 
        "num_agents": 5
    },
    "num_workers": 1,  # Parallelism
    "framework": "torch",
}

algo = ppo.PPO(env=WrappedMultiAgentEVChargingEnv, config=config)

# for i in range(5000):  # Number of training iterations
#     result = algo.train()
#     # print(result)
#     print(f"Iteration {i}")
#     print(f"Mean Rew: {result['env_runners']['episode_reward_mean']}")
#     print(f"Max Rew: {result['env_runners']['episode_reward_max']}")
#     print(f"Min Rew: {result['env_runners']['episode_reward_min']}")
#     print(f"Len: {result['env_runners']['episode_len_mean']}")
#     print('-----')


In [123]:
for i in range(50):  # Number of training iterations
    result = algo.train()
    # print(result)
    print(f"Iteration {i}")
    print(f"Mean Rew: {result['env_runners']['episode_reward_mean']}")
    print(f"Max Rew: {result['env_runners']['episode_reward_max']}")
    print(f"Min Rew: {result['env_runners']['episode_reward_min']}")
    print(f"Len: {result['env_runners']['episode_len_mean']}")
    print('-----')


Iteration 0
Mean Rew: -9274.77702978719
Max Rew: -2569.263101229062
Min Rew: -19828.432112211616
Len: 42.68327402135231
-----
Iteration 1
Mean Rew: -7972.398615147177
Max Rew: -3076.499425424049
Min Rew: -18428.48056056974
Len: 42.1
-----
Iteration 2
Mean Rew: -7193.858951010754
Max Rew: -2608.7546023883315
Min Rew: -13126.44188993684
Len: 42.22
-----
Iteration 3
Mean Rew: -6413.379787264503
Max Rew: -2349.660227917722
Min Rew: -15826.988038714848
Len: 43.14
-----
Iteration 4
Mean Rew: -5664.099747275227
Max Rew: -2842.8071452952827
Min Rew: -10942.158248440312
Len: 43.3
-----
Iteration 5
Mean Rew: -4837.8263145826095
Max Rew: -2365.0265080002077
Min Rew: -8771.960169247646
Len: 41.9
-----
Iteration 6
Mean Rew: -4490.326909340339
Max Rew: -1839.297871429215
Min Rew: -7348.455090438435
Len: 42.3
-----
Iteration 7
Mean Rew: -3979.5656005393607
Max Rew: -2041.7192543289573
Min Rew: -6997.796490572879
Len: 41.64
-----
Iteration 8
Mean Rew: -3566.8492118416743
Max Rew: -1551.6821689166063
M

In [None]:
import ray
# import supersuit as ss
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.tune.registry import register_env
# from torch import nn

env_config={
    "df": df, 
    "num_agents": 5
}

config = (
    PPOConfig()
    .environment(env=WrappedMultiAgentEVChargingEnv, clip_actions=True, env_config=env_config)
    .rollouts(num_rollout_workers=4, rollout_fragment_length=128)
    .training(
        train_batch_size=512,
        lr=2e-5,
        gamma=0.99,
        lambda_=0.9,
        use_gae=True,
        clip_param=0.4,
        grad_clip=None,
        entropy_coeff=0.1,
        vf_loss_coeff=0.25,
        sgd_minibatch_size=64,
        num_sgd_iter=10,
    )
    .debugging(log_level="ERROR")
    .framework(framework="torch")
    .resources(num_gpus=int(0))
)

tune.run(
    "PPO",
    name="PPO",
    stop={"timesteps_total": 500000},
    # checkpoint_freq=10,
    # local_dir="~/ray_results/" + env_name,
    config=config.to_dict(),
)


In [145]:
num_agents = 5
env = MultiAgentEVChargingEnv(df, num_agents=num_agents)

battery_caps = {}
battery_levels = {}
time = {}
reward = {}
action = {}

max_draw = {}

for i in range(num_agents):
    battery_levels[i] = []
    time[i] = []
    reward[i] = []
    action[i] = []

obs = env.reset()
closed = False

obs = obs[0]


# float(battery_level), 
# float(deadline_hours), 
# float(self.current_price), 
# float(battery_capacity), 
# float(max_draw)

current_time = 0
for id, ob in obs.items():
    battery_capacity = env.rescale_to_original(obs[id][3], env.max_battery_cap, env.min_battery_cap)
    battery_caps[id] = battery_capacity
    battery_level = env.rescale_to_original(obs[id][0], env.max_battery_level, env.min_battery_level)
    battery_levels[id].append(battery_level)

    max_draw[id] = env.rescale_to_original(obs[id][4], env.max_charge_power, env.min_charge_power)
    
    time[id].append(current_time)
    reward[id].append(None)
    action[id].append(None)


while not closed:
    current_time += 1
    # print(obs)
    actions = algo.compute_actions(obs)
    next_states, rewards, dones, truncateds, infos = env.step(actions)
    closed = dones['__all__']

    obs = next_states

    for id, ob in obs.items():
        battery_level = env.rescale_to_original(obs[id][0], env.max_battery_level, env.min_battery_level)
        battery_levels[id].append(battery_level)
        time[id].append(current_time)
        reward[id].append(rewards[id])

        act = actions[id]
        # print(max_draw)
        act = env.rescale_to_original(act, max_draw[id], 0)
        action[id].append(act)


print(reward)

# print(next_states.items())

{0: [None, -66.74464331442232, -55.74464331442232, -44.74464331442232, -33.74464331442232, -9.114815742158854, -6.044502258300781, -6.044502258300781, -6.044502258300781, -6.044502258300781, -6.044502258300781, -6.044502258300781, 14.78717340503475, 16.274637845512586, 17.63678741455078, 17.63678741455078, 17.63678741455078, 17.63678741455078, 17.63678741455078, 17.63678741455078, 17.63678741455078, 18.36509897674048, 18.443946838378906, 19.211196750383973, 19.76877607776055, 19.846824645996094, 19.846824645996094, 18.064430984255296, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.315711975097656, 18.22179030239767, 18.22271728515625, 18.22271728515625, 16.864607569900215, 17.09259033203125, 17.09259033203125, 17.09259033203125], 1: [None, -77.770096751643, -60.878205272150

In [146]:
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource, Range1d, LinearAxis
from bokeh.io import output_notebook


for i in range(num_agents):

    output_notebook()

    source = ColumnDataSource(data={
        'time': time[i],
        'battery_level': battery_levels[i],
        'action': action[i],
        # 'price': prices,
        'reward': reward[i]
    })

    print(' Capacity', battery_caps[i])
    print(' Max Charger OP', max_draw[i])
    # print(' Init Battery', env.init_battery_level)
    # print(time[i])
    
    # Create the main plot
    p = figure(title="Combined Graph", x_axis_label='Time (hours)', width=1200, height=400)

    # Battery level plot
    p.line('time', 'battery_level', source=source, line_width=2, color='blue', legend_label="Battery Level")
    p.line(x=[0, max(time[i])], y=[env.target_soc+5, env.target_soc+5], line_color='red', line_width=2)
    p.line(x=[0, max(time[i])], y=[env.target_soc-5, env.target_soc-5], line_color='red', line_width=2)
    p.yaxis.axis_label = "Battery Level (%)"
    
    # Actions plot
    p.extra_y_ranges = {"action": Range1d(start=0, end=max_draw[i] + 1)}
    p.add_layout(LinearAxis(y_range_name="action", axis_label="Action (charge rate)"), 'right')
    p.step('time', 'action', source=source, line_width=2, mode='after', color='orange', y_range_name="action", legend_label="Action")

    # Rewards plot
    p.extra_y_ranges["reward"] = Range1d(start=min(reward[i][1:])-1, end=max(reward[i][1:])+1)
    p.add_layout(LinearAxis(y_range_name="reward", axis_label="Reward"), 'right')
    p.line('time', 'reward', source=source, line_width=2, color='green', y_range_name="reward", legend_label="Reward")

    # Price plot
    # p.extra_y_ranges["price"] = Range1d(start=-1, end=1)
    # p.add_layout(LinearAxis(y_range_name="price", axis_label="Price"), 'right')
    # p.line('time', 'price', source=source, line_width=2, color='purple', y_range_name="price", legend_label="Price")

    # Add legends and position them
    # p.legend.location = "top_left"

    # Show the plot
    show(p)


 Capacity 100.00000044703484
 Max Charger OP 22.000000178813934


 Capacity 73.99999991059303
 Max Charger OP 25.0


 Capacity 108.00000101327896
 Max Charger OP 25.0


 Capacity 82.99999989569187
 Max Charger OP 22.000000178813934


 Capacity 122.00000017881393
 Max Charger OP 22.000000178813934


In [None]:
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPO
from ray.rllib.env import MultiAgentEnv
from ray.tune.registry import register_env
import pandas as pd

# Assuming MultiAgentEVChargingEnv is already imported and available
class WrappedMultiAgentEVChargingEnv(MultiAgentEnv):
    def __init__(self, env_config):
        self.env = MultiAgentEVChargingEnv(df=pd.DataFrame(env_config['df']), num_agents=env_config['num_agents'])
        self.observation_space = self.env.observation_space  # Assuming the env defines this
        self.action_space = self.env.action_space  # Assuming the env defines this

    def reset(self, seed=0, options={}):


        return self.env.reset()
    
    def step(self, action_dict):
        return self.env.step(action_dict)
    
    def render(self, mode='human'):
        return self.env.render(mode)

# ray.init()

# Register the environment
register_env("multi_agent_ev_charging", lambda config: WrappedMultiAgentEVChargingEnv(config))

def policy_mapping_fn(agent_id, episode, worker, **kwargs):
    pol_id = 'shared_policy'
    return pol_id

# Configuration for Multi Actor Single Critic using PPO
config = {
    "env": "multi_agent_ev_charging",
    "env_config": {
        "df": df, 
        "num_agents": 5
    },
    "multiagent": {
        "policies": {
            "shared_policy": (None, None, None, {}),
        },
        "policy_mapping_fn": policy_mapping_fn,
    },
    "num_workers": 1,  # Parallelism
    "framework": "torch",
}


# Create the PPO trainer with the configuration
ppo_trainer = PPO(config=config)

# Train the model
for i in range(5000):  # Number of training iterations
    result = ppo_trainer.train()
    print(f"Iteration {i}: {result['metrics']['episode_reward_mean']}")

In [None]:
# Shutdown Ray
ray.shutdown()
ray.init()

