In [1]:
import numpy as np
from gym import Env
from gym.spaces import Box, Discrete
import random

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

# Smart Charging with Reinforcement Learning

This notebook trains a deep Qnetwork agent to regulate the charging of an electric taxi in the time interval of two hours where the charging rate is adjusted every 15 minutes. The maximum charging rate per hour is set to 22kW. After the two hours the battery demand is calculated with a normal distribution with the mean 30kW and sigma 5kW. The charging cost formula is given by a time coefficient alpha, which we set to a constant value here, that is multiplied with e^power where power is the charging rate/4 because we calculate the costs for 15min intervals.

We used the following tutorial for our coding: https://www.section.io/engineering-education/building-a-reinforcement-learning-environment-using-openai-gym/

In [2]:
class ChargingEnv(Env):
    def __init__(self, max_charging_rate=22, mu=30, sigma=5, alpha_t=0.25):
        self.max_charging_rate = max_charging_rate
        self.battery_capacity = 100 # maximum battery capacity
        self.mu = mu
        self.sigma = sigma
        self.alpha_t = alpha_t
        self.action_space = Discrete(4) # 4 charging actions: zero, low, medium, high
        self.observation_space = Box(low=np.array([0]), high=np.array([self.battery_capacity]))
        self.reset_state() # reset battery
        self.charging_length = 120 # 120 minutes of charging
    
    def reset_state(self):
        self.state = 10 + random.randint(-5,5) # random initial battery between 5 and 15

    def step(self, action): 
        charging_rate = action * (self.max_charging_rate/3) # assuming highest rate is 22 per hour
        power = charging_rate * 0.25 # assuming 15min interval
        self.state += power  
        self.state = min(self.state, self.battery_capacity)
        self.charging_length -= 15 # subtract 15min from charging length

        # Checking if charging is done
        if self.charging_length <= 0: 
            done = True
        else:
            done = False

        # Calculate charging cost
        charging_cost = self.alpha_t * np.exp(power)  # as per the given formula
        
        # Calculating the reward
        if done:
            energy_demand = np.random.normal(self.mu, self.sigma)
            if self.state < energy_demand:
                reward = -10000  # Penalty for running out of energy
            else:
                # Reward is proportional to the energy saved
                reward = -charging_cost
        else:
            reward = -charging_cost
        
        
        
        # Setting the placeholder for info
        info = {}
        
        # Returning the step information
        return self.state, reward, done, info
    
    def reset(self):
        self.reset_state()
        self.charging_length = 120 
        return self.state

In [3]:
env = ChargingEnv()

states = env.observation_space.shape
actions = env.action_space.n

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [5]:
model = Sequential()    
model.add(Dense(24, activation='relu', input_shape=states))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                48        
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 4)                 100       
                                                                 
Total params: 748
Trainable params: 748
Non-trainable params: 0
_________________________________________________________________


In [6]:
def build_agent(model, actions, warmup_steps=1000):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=warmup_steps, target_model_update=1)
    return dqn

In [7]:
dqn = build_agent(model, actions, warmup_steps=1000)
dqn.compile(Adam(learning_rate=0.01), metrics=['mae'])
dqn.fit(env, nb_steps=20000, visualize=False, verbose=1)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


2023-08-14 11:22:03.359793: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-08-14 11:22:03.365623: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-08-14 11:22:03.379250: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_2_1/kernel/Assign' id:135 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_2_1/kernel, dense_2_1/kernel/Initializer/stateless_random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Training for 20000 steps ...
Interval 1 (0 steps performed)
  103/10000 [..............................] - ETA: 14s - reward: -1170.9030

  updates=self.state_updates,
2023-08-14 11:22:03.604376: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_2/BiasAdd' id:73 op device:{requested: '', assigned: ''} def:{{{node dense_2/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2/MatMul, dense_2/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-08-14 11:22:03.615322: W tensorflow/c/c_api.cc:291] Operation '{name:'count_1/Assign' id:242 op device:{requested: '', assigned: ''} def:{{{node count_1/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](count_1, count_1/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modif

  966/10000 [=>............................] - ETA: 13s - reward: -1236.4821

2023-08-14 11:22:05.168861: W tensorflow/c/c_api.cc:291] Operation '{name:'dense_2_1/BiasAdd' id:145 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2_1/MatMul, dense_2_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-08-14 11:22:05.241573: W tensorflow/c/c_api.cc:291] Operation '{name:'loss_3/AddN' id:358 op device:{requested: '', assigned: ''} def:{{{node loss_3/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_3/mul, loss_3/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-08-14 11:22:0

1250 episodes - episode_reward: -4752.121 [-10265.816, -12.509] - loss: 2067529.422 - mae: 2429.103 - mean_q: -2652.008

Interval 2 (10000 steps performed)
done, took 80.208 seconds


<keras.callbacks.History at 0x166323520>

In [8]:
results = dqn.test(env, nb_episodes=5, visualize=False)
print(np.mean(results.history['episode_reward']))

Testing for 5 episodes ...
Episode 1: reward: -370.165, steps: 8
Episode 2: reward: -489.384, steps: 8
Episode 3: reward: -489.384, steps: 8
Episode 4: reward: -310.556, steps: 8
Episode 5: reward: -489.384, steps: 8
-429.77455670036977


In [9]:
class PrintEnv(Env):
    def __init__(self, max_charging_rate=22, mu=30, sigma=5, alpha_t=0.25):
        self.max_charging_rate = max_charging_rate
        self.battery_capacity = 100 # maximum battery capacity
        self.mu = mu
        self.sigma = sigma
        self.alpha_t = alpha_t
        self.action_space = Discrete(4) # 4 charging actions: zero, low, medium, high
        self.observation_space = Box(low=np.array([0]), high=np.array([self.battery_capacity]))
        self.reset_state() # reset battery
        self.charging_length = 120 # 120 minutes of charging
    
    def reset_state(self):
        self.state = 10 + random.randint(-5,5) # random initial battery between 5 and 15
        print("Initial battery level: ", self.state)

    def step(self, action): 
        charging_rate = action * (self.max_charging_rate/3) # assuming highest rate is 22 per hour
        power = charging_rate * 0.25 # assuming 15min interval
        self.state += power  
        self.state = min(self.state, self.battery_capacity)
        self.charging_length -= 15 # subtract 15min from charging length

        # Checking if charging is done
        if self.charging_length <= 0: 
            done = True
        else:
            done = False

        # Calculate charging cost
        charging_cost = self.alpha_t * np.exp(power)  # as per the given formula

        print("Time left: ", self.charging_length, "min | Power:", power.__round__(2), "| Battery level:", self.state.__round__(2), "| Charging cost:", charging_cost.__round__(2))
        
        # Calculating the reward
        if done:
            energy_demand = np.random.normal(self.mu, self.sigma)
            if self.state < energy_demand:
                reward = -10000  # Penalty for running out of energy
                print("Running out of energy!")
            else:
                # Reward is proportional to the energy saved
                reward = -charging_cost
                print("Charging done!")
        else:
            reward = -charging_cost
        
        
        
        # Setting the placeholder for info
        info = {}
        
        # Returning the step information
        return self.state, reward, done, info
    
    def reset(self):
        self.reset_state()
        self.charging_length = 120 
        return self.state

In [10]:
printEnv = PrintEnv()

Initial battery level:  9


In [15]:
num_episodes = 2
for episode in range(num_episodes):
    state = printEnv.reset()
    done = False
    total_reward = 0

    while not done:
        action = dqn.forward(state)
        next_state, reward, done, _ = printEnv.step(action)
        total_reward += reward
        state = next_state

    print(f"Episode {episode+1}: Total Reward = {total_reward}")



Initial battery level:  7
Time left:  105 min | Power: 1.83 | Battery level: 8.83 | Charging cost: 1.56
Time left:  90 min | Power: 1.83 | Battery level: 10.67 | Charging cost: 1.56
Time left:  75 min | Power: 5.5 | Battery level: 16.17 | Charging cost: 61.17
Time left:  60 min | Power: 5.5 | Battery level: 21.67 | Charging cost: 61.17
Time left:  45 min | Power: 5.5 | Battery level: 27.17 | Charging cost: 61.17
Time left:  30 min | Power: 5.5 | Battery level: 32.67 | Charging cost: 61.17
Time left:  15 min | Power: 5.5 | Battery level: 38.17 | Charging cost: 61.17
Time left:  0 min | Power: 5.5 | Battery level: 43.67 | Charging cost: 61.17
Charging done!
Episode 1: Total Reward = -370.1652488722988
Initial battery level:  8
Time left:  105 min | Power: 1.83 | Battery level: 9.83 | Charging cost: 1.56
Time left:  90 min | Power: 5.5 | Battery level: 15.33 | Charging cost: 61.17
Time left:  75 min | Power: 5.5 | Battery level: 20.83 | Charging cost: 61.17
Time left:  60 min | Power: 5.5