# Generating DG Data using Simbench and Powerdata-gen

[1] https://github.com/e2nIEE/simbench  
[2] https://github.com/bdonon/powerdata-gen

### Load dependencies, including the data generator library

In [None]:
import sys, os
DATA_GEN_PATH = os.path.abspath('powerdata-gen/')
sys.path.append(DATA_GEN_PATH)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import numpy as np
import pandas as pd
import pandapower as pp
import simbench as sb
import powerdata_gen
from omegaconf import OmegaConf
import torch
from torch_geometric.data import Data

import time
import logging

### Helper functions to load pandapower grids from simbench

In [19]:
def create_output_dir():
    identifier = time.strftime('%Y-%m-%d_%H:%M:%S')
    output_dir = os.path.join('outputs', identifier)
    os.makedirs(output_dir, exist_ok=True)
    return output_dir

In [38]:
def get_dist_grid_codes():
    # Create the codes for the distribution grid cases of Simbench (LV and MV and any combination of the two)
    codes = sb.collect_all_simbench_codes(scenario=0)
    dist_grid_codes = list(filter(lambda x: "-MV-" in x or "-LV-" in x or "-MVLV-" in x, codes))
    return sorted(dist_grid_codes)

In [21]:
def save_pandapower_grid_to_json(sb_code: str, filename: str):
    net = sb.get_simbench_net(sb_code)
    pp.to_json(net, filename)
    return filename

### Helper functions for extracting node features and edge features

In [39]:
def get_node_features(net):
    # List of bus features
    #   x: np.array([Slack?, PV?, PQ?, p_mw, q_mvar, vm_pu, va_degree])
    #   y: np.array([p_mw, q_mvar, vm_pu, va_degree])
    #
    node_features_x, node_features_y = [], [] # map from bus_id to features
    for bus_id in net.bus.index:
        # (Slack?, PV?, PQ?)
        bus_type = (0, 0, 1)

        gens = net.gen.loc[net.gen['bus'] == bus_id]
        if len(gens) > 0:
            bus_type = (0, 1, 0)

        slack = net.ext_grid.loc[net.ext_grid['bus'] == bus_id,
                        ['vm_pu', 'va_degree']]
        if len(slack) > 0:
            assert len(gens) == 0, ("PV and Swing generators cannot be placed"
                                    " on the same bus. This is because they"
                                    " will both try to control the bus voltage.")
            bus_type = (1, 0, 0)
        
        # net.res_bus should already take into account all the components that
        # contribute to these four bus parameters so we do not have to do this
        # again (ex. loads, sgens, gens, storages, ext_grid, etc.).
        features = net.res_bus.loc[bus_id, ['p_mw', 'q_mvar', 'vm_pu', 'va_degree']]
        masked_features = features.copy()
        if bus_type[0]:
            masked_features['p_mw'] = np.nan
            masked_features['q_mvar'] = np.nan
        elif bus_type[1]:
            masked_features['q_mvar'] = np.nan
            masked_features['va_degree'] = np.nan
        else:
            masked_features['vm_pu'] = np.nan
            masked_features['va_degree'] = np.nan

        node_features_x.append(np.append(bus_type, masked_features.values))
        node_features_y.append(features.values)
    
    return np.array(node_features_x), np.array(node_features_y)

In [40]:
def get_edge_features(net):
    # List of edge features
    #   e: np.array([trafo?, r_pu, x_pu, phase_shift])

    def get_line_features(net):
        # Undirected graph so need to add both directions to edge_index.
        edge_index = net.line.loc[:, ['from_bus', 'to_bus',
                                      'to_bus', 'from_bus']].values
        # Use .reshape to change shape from (E, 4) to (2E, 2), where E is num edges.
        # Transpose to make into proper (2, 2E format).
        edge_index = edge_index.reshape(-1, 2).T

        # TODO: Decide if use r/x or G/B??
        r = net.line['r_ohm_per_km'].values * net.line['length_km'].values
        x = net.line['x_ohm_per_km'].values * net.line['length_km'].values

        # We convert the r,x values into per unit (p.u.) to simplify calculations
        # and ensure consistency across the network. To do this, we divide r, x by
        # the base impedance. Therefore z = vn_kv**2/sn_mva, where vn_kv is rated
        # voltage and sn_mva is reference apparent power.
        # Note: vn_kv be the same for every bus except ext_grid, but this is safer.
        vn_kv = net.bus.loc[net.line['to_bus'], ['vn_kv']].values.reshape(-1)
        z = np.square(vn_kv) / net.sn_mva
        r_pu = r / z
        x_pu = x / z

        # Similarly, due to undirected graph, the edge features need to be repeated
        # twice, once for each respective connection present in the COO matrix.
        r_pu = r_pu.repeat(2)
        x_pu = x_pu.repeat(2)

        # Add zeros to indicate it is a line, and pad with nan to account for
        # missing phase shift.
        e = edge_index.shape[1] # b/c coo matrix
        edge_features = np.vstack([np.zeros(e),         # trafo?
                                   r_pu,                # r_pu
                                   x_pu,                # x_pu
                                   np.nan*np.ones(e)    # phase_shift
                                   ]).T

        return edge_index, edge_features

    def get_trafo_features(net):
        # TODO: Add charging susceptance - b (p.u.), transformer tap ratio - tau

        # Similar to get_line_features.
        edge_index = net.trafo.loc[:, ['hv_bus', 'lv_bus',
                                       'lv_bus', 'hv_bus']].values
        edge_index = edge_index.reshape(-1, 2).T

        # Impedance calculated as shown in pandapower docs:
        # https://pandapower.readthedocs.io/en/v2.2.1/elements/trafo.html#impedance-values
        # where vk_percent is short-circuit voltage and vkr_percent is the real
        # part of short-circuit voltage (%).
        z_pu = (net.trafo['vk_percent'].values / 100)*(1000 / net.trafo['sn_mva'].values)
        r_pu = (net.trafo['vkr_percent'].values / 100)*(1000 / net.trafo['sn_mva'].values)
        x_pu = np.sqrt(np.square(z_pu) - np.square(r_pu))

        # Add phase shift angle (deg) as additional feature.
        phase_shift = net.trafo['vk_percent'].values
        
        # Repeat the features (to match edge_index) and create feature matrix.
        e = edge_index.shape[1] # b/c coo matrix
        edge_features = np.vstack([np.ones(e),              # trafo?
                                   r_pu.repeat(2),          # r_pu
                                   x_pu.repeat(2),          # x_pu
                                   phase_shift.repeat(2)    # phase_shift
                                   ]).T

        return edge_index, edge_features
    
    A_line, E_line = get_line_features(net)
    A_trafo, E_trafo = get_trafo_features(net)
    
    # Combine and return the line and trafo features.
    A = np.hstack([A_line, A_trafo])
    E = np.vstack([E_line, E_trafo])
    return A, E

### Generate Grids using powerdata-gen

In [41]:
# All simbench codes for distribution grids
SB_CODES = get_dist_grid_codes()
SB_CODES = list(filter(lambda x: "no_sw" in x and "-MVLV-" not in x, SB_CODES))
SB_CODES

['1-LV-rural1--0-no_sw',
 '1-LV-rural2--0-no_sw',
 '1-LV-rural3--0-no_sw',
 '1-LV-semiurb4--0-no_sw',
 '1-LV-semiurb5--0-no_sw',
 '1-LV-urban6--0-no_sw',
 '1-MV-comm--0-no_sw',
 '1-MV-rural--0-no_sw',
 '1-MV-semiurb--0-no_sw',
 '1-MV-urban--0-no_sw']

In [None]:
TEST_GENERATION = False
TEST_SB_CODES = ['1-LV-rural1--0-no_sw']
DATASET_SPLIT = [15,3,2] # [Train, val, test]

# Setup input directory
input_dir = 'inputs/'
os.makedirs(input_dir, exist_ok=True)

# Convert simbench codes to json files, if not already done.
sb_codes = TEST_SB_CODES if TEST_GENERATION else SB_CODES
filenames = []
for code in sb_codes:
    f = os.path.join(input_dir, f'{code}.json')
    if not os.path.exists(f):
        save_pandapower_grid_to_json(code, f)
    filenames.append(f)

# Load a base config file and change adjust parameters.
cfg = OmegaConf.load('base_gen_config.yaml')
cfg.n_train, cfg.n_val, cfg.n_test = DATASET_SPLIT
cfg.seed = 12

# Set up logger (for powerdata-gen)
log = logging.getLogger(__name__)

# Create output directory for all data from this run, loop through ref grids, 
# generate new grids for each ref, save them to subdir of output dir.
output_dir = create_output_dir()
print(f'Output directory: {output_dir}\n')
generated_grid_base_dirs = []
for code, f in list(zip(sb_codes, filenames)):
    save_path = os.path.join(output_dir, code)
    os.makedirs(save_path, exist_ok=True)
    cfg.default_net_path = f
    powerdata_gen.build_datasets(cfg.default_net_path, save_path, log, cfg.n_train, cfg.n_val, cfg.n_test, cfg.keep_reject,
                   cfg.sampling, cfg.powerflow, cfg.filtering, cfg.seed)
    generated_grid_base_dirs.append(save_path)

Output directory: outputs/2024-12-30_19:23:46



Sample count = 55, Sampling issues = 0, Divergences = 0, Rejections = 40 : 100%|██████████| 15/15 [00:03<00:00,  4.26it/s]
Sample count = 24, Sampling issues = 0, Divergences = 0, Rejections = 21 : 100%|██████████| 3/3 [00:01<00:00,  2.46it/s]
Sample count = 4, Sampling issues = 0, Divergences = 0, Rejections = 2 : 100%|██████████| 2/2 [00:00<00:00,  5.42it/s]
Sample count = 82, Sampling issues = 0, Divergences = 0, Rejections = 67 : 100%|██████████| 15/15 [00:05<00:00,  2.95it/s]
Sample count = 12, Sampling issues = 0, Divergences = 0, Rejections = 9 : 100%|██████████| 3/3 [00:00<00:00,  3.01it/s]
Sample count = 7, Sampling issues = 0, Divergences = 0, Rejections = 5 : 100%|██████████| 2/2 [00:00<00:00,  3.39it/s]
Sample count = 57, Sampling issues = 0, Divergences = 0, Rejections = 42 : 100%|██████████| 15/15 [00:04<00:00,  3.45it/s]
Sample count = 10, Sampling issues = 0, Divergences = 0, Rejections = 7 : 100%|██████████| 3/3 [00:00<00:00,  3.94it/s]
Sample count = 4, Sampling issue

### Create datasets for GNN-based model development using the generated grids

In [None]:
dataset_splits = ['train', 'val', 'test']

for split in dataset_splits:
    # Gather all the grids for a particular dataset split
    for dir in generated_grid_base_dirs:
        generated_grid_dir = os.path.join(dir, split)
        generated_grids = os.listdir(generated_grid_dir)
        # list[outputs/<identifier>/<sb_code>/<train|test|val>/sample_<N>.json]
        generated_grid_files = [os.path.join(generated_grid_dir, f) for f in generated_grids]

        dataset_filename = os.path.join(generated_grid_dir,
                                        f'dataset_{split}.pt')

        if os.path.exists(dataset_filename):
            continue
        dataset = []

        for f in generated_grid_files:
            if f.split('.')[-1] != 'json':
                # There could be non json files that exist, so skip them.
                continue
            net = pp.from_json(f)

            X_i, Y_i = get_node_features(net)
            A_i, E_i = get_edge_features(net)

            dataset.append(
                Data(x=torch.tensor(X_i),
                    edge_index=torch.tensor(A_i, dtype=torch.int64),
                    edge_attr=torch.tensor(E_i),
                    y=torch.tensor(Y_i))
            )
        
        print("Saving dataset", dataset_filename, end="... ")
        torch.save(dataset, dataset_filename)
        print("completed")