<a href="https://colab.research.google.com/github/ShraddhaSharma24/Reinforcement-Learning/blob/main/Reinforcement_Learning_for_Voltage_Control_in_Power_Systems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pandapower gymnasium stable-baselines3

Collecting pandapower
  Downloading pandapower-3.0.0-py3-none-any.whl.metadata (10 kB)
Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting scipy<1.14 (from pandapower)
  Downloading scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
Collecting deepdiff (from pandapower)
  Downloading deepdiff-8.4.0-py3-none-any.whl.metadata (6.7 kB)
Collecting geojson (from pandapower)
  Downloading geojson-3.2.0-py3-none-any.whl.metadata (16 kB)
Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-b

In [2]:
import gymnasium as gym
import pandapower as pp
import numpy as np
from gymnasium import spaces

In [6]:
class VoltageControlEnv(gym.Env):
    def __init__(self):
        super().__init__()

        # Create a simple power grid
        self.net = pp.create_empty_network()
        b1 = pp.create_bus(self.net, vn_kv=110)  # Slack Bus
        b2 = pp.create_bus(self.net, vn_kv=110)  # Load Bus
        b3 = pp.create_bus(self.net, vn_kv=110)  # Load Bus

        pp.create_ext_grid(self.net, b1, vm_pu=1.02)  # External Grid (Slack Bus)
        pp.create_line(self.net, b1, b2, length_km=10, std_type="NAYY 4x50 SE")
        pp.create_line(self.net, b2, b3, length_km=10, std_type="NAYY 4x50 SE")

        pp.create_load(self.net, b2, p_mw=5, q_mvar=2)  # Load at Bus 2
        pp.create_load(self.net, b3, p_mw=3, q_mvar=1.5)  # Load at Bus 3

        self.gen = pp.create_sgen(self.net, b1, p_mw=0, q_mvar=0)  # Reactive power control

        # Define RL action & observation spaces
        self.action_space = spaces.Box(low=-0.5, high=0.5, shape=(1,), dtype=np.float32)  # Adjust Q
        self.observation_space = spaces.Box(low=0.9, high=1.1, shape=(2,), dtype=np.float32)  # Voltages

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        pp.runpp(self.net)  # Run power flow
        return self._get_obs(), {}

    def step(self, action):
        self.net.sgen.at[self.gen, "q_mvar"] += action[0]  # Adjust reactive power
        pp.runpp(self.net)  # Run power flow

        obs = self._get_obs()
        reward = -np.sum(np.abs(obs - 1.0))  # Reward: Keep voltages near 1.0 p.u.
        done = False

        return obs, reward, done, False, {}

    def _get_obs(self):
        pp.runpp(self.net)
        return np.array(self.net.res_bus.vm_pu[1:])  # Bus voltages (except slack bus)

env = VoltageControlEnv()

# Test Environment
obs, _ = env.reset()
print("Initial Observation:", obs)



Initial Observation: [1.01640435 1.01501148]


In [7]:
from stable_baselines3 import PPO

# Wrap in Stable Baselines3 compatible format
from stable_baselines3.common.vec_env import DummyVecEnv
vec_env = DummyVecEnv([lambda: env])

# Define PPO agent
model = PPO("MlpPolicy", vec_env, verbose=1)

# Train the agent
model.learn(total_timesteps=10000)

# Save model
model.save("voltage_control_agent")

Using cpu device
-----------------------------
| time/              |      |
|    fps             | 13   |
|    iterations      | 1    |
|    time_elapsed    | 150  |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 2           |
|    time_elapsed         | 301         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.002485592 |
|    clip_fraction        | 0.0222      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | -1.19e-07   |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00412    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00136    |
|    std                  | 0.987       |
|    value_loss           | 0.00661     |
-----------------

In [None]:
# Load Trained Agent
model = PPO.load("voltage_control_agent")

obs, _ = env.reset()
done = False
total_reward = 0

while not done:
    action, _ = model.predict(obs)
    obs, reward, done, _, _ = env.step(action)
    total_reward += reward

print("Total Reward:", total_reward)