This script is used to show how to create customize environment based on the template environment. Here, we modified the reward function based on the optimal goal: that is maximizing the profit of batteries, and ignore the penalty of voltage violations

In [None]:
# install the package
!pip install RL-ADN
!pip install pandapower
!pip install numpy
!pip install gym


In [5]:
# import packages

import numpy as np
from power_network_rl.environments.Environment_Integrated import PowerNetEnv

In [6]:
# set and prepare data, configuration for the initializing of the environment
import pkg_resources

line_data_path = pkg_resources.resource_filename('power_network_rl', 'data_sources/network_data/node_34/Lines_34.csv')
node_data_path= pkg_resources.resource_filename('power_network_rl', 'data_sources/network_data/node_34/Nodes_34.csv')
time_series_data_path= pkg_resources.resource_filename('power_network_rl', 'data_sources/time_series_data/34_node_time_series.csv')
# set yourself data path based on given example config, and based on the config, create environment
env_config={'voltage_limits': [0.95, 1.05],
 'algorithm': 'Laurent',
 'battery_list': [11, 15, 26, 29, 33],
 'year': 2020,
 'month': 1,
 'day': 1,
 'train': True,
 'state_pattern': 'default',
 'network_info': {'vm_pu':1.0,'s_base':1000,
                'bus_info_file': node_data_path,
                'branch_info_file': line_data_path},
 'time_series_data_path': time_series_data_path}
env = PowerNetEnv(env_config)

Data scale: from 2020-07-17 to 2021-01-01
Data time interval: 15 minutes
Dataset loaded from D:\BaiduNetdiskDownload\HSR\GITHUB\RL-ADN\power_network_rl\data_sources\time_series_data\34_node_time_series.csv
Dataset dimensions: (16224, 69)
Dataset contains the following types of data:
Active power columns: ['active_power_node_1', 'active_power_node_2', 'active_power_node_3', 'active_power_node_4', 'active_power_node_5', 'active_power_node_6', 'active_power_node_7', 'active_power_node_8', 'active_power_node_9', 'active_power_node_10', 'active_power_node_11', 'active_power_node_12', 'active_power_node_13', 'active_power_node_14', 'active_power_node_15', 'active_power_node_16', 'active_power_node_17', 'active_power_node_18', 'active_power_node_19', 'active_power_node_20', 'active_power_node_21', 'active_power_node_22', 'active_power_node_23', 'active_power_node_24', 'active_power_node_25', 'active_power_node_26', 'active_power_node_27', 'active_power_node_28', 'active_power_node_29', 'activ

### Creating new profitting environment based on our primary environment
In this new environment, the voltage violation penalty is ignored while the goal is minizie the operating cost

In [9]:
class ProfitBatteryEnv(PowerNetEnv):
    def __init__(self, env_config:env_config):
        super().__init__(env_config)  # Call the constructor of the parent class

    def _calculate_reward(self, current_normalized_obs: np.ndarray, vm_pu_after_control_bat: np.ndarray, saved_power: float) -> float:
        """
        Your new reward calculation logic goes here.

        Parameters:
            current_normalized_obs (np.ndarray): The current normalized observations.
            vm_pu_after_control_bat (np.ndarray): The voltage after control at battery locations.
            saved_power (float): The amount of power saved.

        Returns:
            float: Calculated reward.
        """

        # Your new logic to calculate the reward
        # For example, let's say the reward is now twice the saved power
        new_reward = 2 * saved_power

        return new_reward

In [None]:
profit_battery_env = ProfitBatteryEnv(env_config)
profit_battery_env.reset()

for j in range(1):
    episode_reward = 0
    for i in range(1000):
        tem_action = np.ones(len(profit_battery_env.battery_list))
        next_obs, reward, finish, info = profit_battery_env.step(tem_action)
        print(reward)
        episode_reward += reward
    print(episode_reward)