In [57]:
import numpy as np
import pandas as pd
import gym
from gym import spaces

Our action space is in any case discrete and each action can be assigned its integer value: 
'charge' = 0,
'discharge' = 1,
'wait' = 2.

To properly work in **Gym** our custom environment class shall include such methods as 'init','step','reset' and 'render'. All other methods are basically helper methods. 

Common to any algorithm is that in the end certain action is choosen according to probability distribution function or stepwise function which includes two options: choice due to probability distribution or choice randomly for exploration. 

Anyway, the function we optimize outputs probability distribution.

In [None]:
class BatteryEnv(gym.Env):
    """Battery optimization environment for OpenAI gym"""
    metadata = {'render.modes': ['human']}
    
    def __init__(self, low, high, calculate_reward_func,сheck_end_func, change_episode_idx_func, calculate_actual_signal_func, episode_idx=0, state_idx=0):
        super(BatteryEnv, self).__init__()
        
        #We have only 3 discrete actions (charge,discharge,wait)
        self.action_space = spaces.Discrete(3)
        
        #low and high can be either numpy arrays with shape (1,n) (if we choose to have n elements 
        #in one observation vector)or can be just min and max numbers (if we choose to have only one 
        #variable for observation
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float16)
        
        # custom function to calculate reward
        self.calculate_reward_func = calculate_reward_func
        
        # custom function to check whether an episode ends
        self.сheck_end_func = сheck_end_func 
        
        #custom function to calculate how signal has changed
        self.calculate_actual_signal_func = calculate_actual_signal_func
        
        # custom function for choosing next index of episode in list of episodes (generally,we just do+=1, but sometimes it can be random)
        self.change_episode_idx_func = change_episode_idx_func
        
        # index of current episode
        self.episode_idx = episode_idx        
        
        # index of current state within current episode
        self.state_idx = state_idx
    
    def step(self, action): 
        # here we should change input signal according to choosen action
        # / take action according to some algorithm
        """
        Method to execute one action within the environment and return reward,
        next observation, boolean on whether episode is over and info. A time point
        after which episode is over depends on choosen episode interval
        which shall be a tunable hyperparameter. 
        """
        self.state_idx+=1 #increase state idx within episide to see what is the input_signal at t+1 in data
        obs = self.calculate_actual_signal(self.state_idx,action) # use state idx at t+1 to see what is the new input_signal, change it with action and calculate actual signal
        reward = self.calculate_reward_func(obs) #calculate reward from actual signal
        done = self.сheck_end_func(self.state_idx) # check the end of episode
        return obs, reward, done, {}
        
    def reset(self): 
        """
        here we should reset our environment and prepare it for the next episode:
        change day/week/month counter in order for the next iteration (episode) to begin
        """
        self.episode_idx = self.change_episode_idx_func() # generally we increase index of episode by 1, but we can choose other more stochastic options
        return episodes_list[self.episode_idx]
    
    
    def render(self, mode='human', close=False):
    # Render the environment to the screen
          print('random_print')

Random agent test:

In [None]:
def calculate_reward_func(signal):
    signal*0.1

In [None]:
import gym
env = gym.make('CartPole-v0')
env.reset()
for _ in range(1000):
    #env.render()
    action = env.action_space.sample()
    env.step(action) # take a random action
env.close()

Testing our calculation functions ('argument'/'hyperparameter' functions):