# Basic implementation of a DQN-based VPP scheduling:

> To implement a model-free deep Q-learning (DQN) based optimal Virtual Power Plant (VPP) scheduling technique for scheduling three Microgrids, we need to define:

1. The state space, which represents the current power demand, available power from each Microgrid, and other relevant parameters.
2. The action space, which will indicate the power to be drawn from each Microgrid.
3. The reward function, which rewards the VPP for efficiently meeting the demand while minimizing costs and inefficiencies.

In [1]:
import numpy as np
import tensorflow as tf
from collections import deque

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        
    def _build_model(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(tf.keras.layers.Dense(24, activation='relu'))
        model.add(tf.keras.layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(0, self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.train_on_batch(state, target_f)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

# Define the VPP environment
class VPPEnv:
    def __init__(self):
        self.microgrids = [100, 200, 150]  # Example capacities for 3 microgrids
        self.demand = None
        self.reset()

    def reset(self):
        self.demand = np.random.randint(50, 400)  # Random demand value as an example
        return np.array([self.demand] + self.microgrids)

    def step(self, action):
        # For simplicity, let's say action can be 0, 1, or 2, each indicating which microgrid to use
        power_drawn = min(self.demand, self.microgrids[action])
        self.microgrids[action] -= power_drawn
        self.demand -= power_drawn
        reward = -abs(self.demand)  # Reward is negative of the remaining demand
        done = self.demand <= 0
        return np.array([self.demand] + self.microgrids), reward, done

In [3]:
import random
if __name__ == "__main__":
    env = VPPEnv()
    state_size = 4  # Demand + 3 microgrid states
    action_size = 3  # One action for each microgrid
    agent = DQNAgent(state_size, action_size)
    episodes = 1000
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        while True:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print(f"Episode: {e+1}/{episodes}, Demand Left: {env.demand}")
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

Episode: 1/1000, Demand Left: 0
