In [34]:

from stable_baselines3 import PPO

import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print(x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [35]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('t_state_data_site_202405011106.csv')

# Parse the 'timestamp' column to datetime if needed
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Check the first few rows of the dataframe
display(df.head())


Unnamed: 0,id,site_id,timestamp,asc_power_kva,total_power_capacity_kw,forecast_non_ev_power_kw,forecast_headroom_power_kw,electricity_price,actual_total_power_kw,scheduled_total_power_kw,no_of_available_chargers_11kw,no_of_available_chargers_7kw,no_of_available_chargers_50kw,time_slot_no,base_time_slot_no
0,275985,753,2023-02-13 06:30:00,400.0,,141.428452,258.571548,0.108902,,,8,,,,24.0
1,325596,753,2023-06-10 04:00:00,400.0,,184.829665,215.170335,0.111245,,,8,,,,19.0
2,325597,753,2023-06-10 04:30:00,400.0,,186.325789,213.674211,0.111245,,,8,,,,20.0
3,325598,753,2023-06-10 05:00:00,400.0,,178.370818,221.629182,0.111245,,,8,,,,21.0
4,325599,753,2023-06-10 05:30:00,400.0,,181.848093,218.151907,0.111245,,,8,,,,22.0


In [39]:
import gymnasium as gym
import numpy as np
import pandas as pd
import random

class EVChargingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, df):
        super(EVChargingEnv, self).__init__()
        self.df = df
        self.battery_capacity = 75.0  # Maximum battery capacity in kWh
        self.battery_level = random.randint(35, 75)
        self.max_draw = 22.0  # Max charger capacity in kW
        self.dt = 1.0  # Time step in hours
        self.deadline = 24.0  # 24 hours until full charge needed
        self.targer_soc = 95
        self.session_price = 0

        # Define action and observation space
        self.action_space = gym.spaces.Box(low=np.array([0], dtype=np.float32), 
                                           high=np.array([self.max_draw], dtype=np.float32), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=np.array([0, 0, 0], dtype=np.float32), 
                                                high=np.array([self.targer_soc, 1, self.deadline], dtype=np.float32), 
                                                dtype=np.float32)

    def reset(self,seed=0):
        self.current_index = 0  # Start from the first row of the dataframe
        self.battery_level = random.randint(35, 75)
        self.time = 0.0
        row = self.df.iloc[self.current_index]
        self.current_price = row['electricity_price']
        self.session_price = 0
        return np.array([self.battery_level, self.current_price, self.time], dtype=np.float32), {}

    def step(self, action):
        charge_power = action[0]

        reward = 0
        added_soc = (charge_power * 0.5 * 100) / self.battery_capacity

        if self.battery_level > 94 and charge_power == 0:
            reward += 100
        elif self.battery_level > 94 and charge_power > 0:
            reward -= 100

        self.battery_level += added_soc
        reward -= 95 - min(self.battery_level, 95)
        
        if (self.time >= self.deadline-1 and self.battery_level < 95):
            reward -= 300

        self.time += self.dt
        self.current_index = min(self.current_index + 1, len(self.df) - 1)  # Update index, preventing overflow
        row = self.df.iloc[self.current_index]
        self.current_price = row['electricity_price']

        self.session_price += self.current_price * 0.5 * charge_power

        done = self.time >= self.deadline # or self.battery_level >= self.max_battery

        trunc = self.battery_level > 94
        # if self.battery_level > 94:
        #     trunc

        return np.array([self.battery_level, self.current_price, self.time], dtype=np.float32), reward, bool(done), bool(trunc), {}

    def render(self, mode='human'):
        print(f'Time: {self.time}, Battery Level: {self.battery_level:.2f} %, Price: ${self.current_price:.3f} per kWh')


In [43]:
from stable_baselines3 import TD3
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.noise import NormalActionNoise

# Check the environment
env = EVChargingEnv(df)
check_env(env)

# Create the TD3 agent
n_actions = env.action_space.shape[0]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

model = TD3("MlpPolicy", env, action_noise=action_noise, verbose=1, device=device)

# Train the model
model.learn(total_timesteps=100000)

# Save the model
model.save("td3_ev_charging")


Using mps device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 6.25     |
|    ep_rew_mean     | -132     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 2684     |
|    time_elapsed    | 0        |
|    total_timesteps | 25       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5.75     |
|    ep_rew_mean     | -105     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 2634     |
|    time_elapsed    | 0        |
|    total_timesteps | 46       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5.83     |
|    ep_rew_mean     | -112     |
| time/              |          |
|    episodes        | 12       |
|    fps             |

In [None]:
model.learn(total_timesteps=100000)


----------------------------------
| rollout/           |           |
|    ep_len_mean     | 24        |
|    ep_rew_mean     | -1.74e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 2525      |
|    time_elapsed    | 0         |
|    total_timesteps | 96        |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 24        |
|    ep_rew_mean     | -1.25e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 225       |
|    time_elapsed    | 0         |
|    total_timesteps | 192       |
| train/             |           |
|    actor_loss      | 635       |
|    critic_loss     | 1.13e+03  |
|    learning_rate   | 0.001     |
|    n_updates       | 9984      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 24        |
|    ep_rew_mean    

KeyboardInterrupt: 

In [None]:
obs, reward, done,trunc, _ = env.step([9])  # Unpack and use only the observation for the next predict call
env.render()


Time: 1.0, Battery Level: 44.00 %, Price: $0.111 per kWh


In [None]:
obs, _ = env.reset()  # Unpack to get only the observation
for _ in range(100):
    action, _states = model.predict(obs, deterministic=True)
    print(f'Charge at {action[0]} kwh')
    obs, reward, done,trunc, _ = env.step(action)  # Unpack and use only the observation for the next predict call
    env.render()
    print(f'Reward {reward}')
    if done:
        obs, _ = env.reset()  # Reset the environment and unpack the observation

Charge at 0.0 kwh
Time: 1.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 2.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 3.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 4.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 5.0, Battery Level: 36.00 %, Price: $0.143 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 6.0, Battery Level: 36.00 %, Price: $0.143 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 7.0, Battery Level: 36.00 %, Price: $0.109 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 8.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 9.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 10.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 11.0, Battery Level: 36.00 %, Price: $0.111 per kWh
Reward -59.0
Charge at 0.0 kwh
Time: 12.0, 