In [137]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gym
from gym import spaces
pd.options.display.max_columns = None
pd.options.display.max_rows = None
import warnings
warnings.filterwarnings("ignore")

Our action space is in any case discrete and each action can be assigned its integer value: 
'charge' = 0,
'discharge' = 1,
'wait' = 2.

To properly work in **Gym** our custom environment class shall include such methods as 'init','step','reset' and 'render'. All other methods are basically helper methods. 

Common to any algorithm is that in the end certain action is choosen according to probability distribution function or stepwise function which includes two options: choice due to probability distribution or choice randomly for exploration. 

Anyway, the function we optimize outputs probability distribution.

In [138]:
df = pd.read_csv('Sweden Load Data 2005-2017.csv')
df.rename({'cet_cest_timestamp':'time','SE_load_actual_tso':'load'},axis='columns',inplace=True)
df['time'] = pd.to_datetime(df['time'],errors='ignore', utc=True)
#df['hour'] = df['time'].dt.hour
df['weekday'] = df['time'].dt.weekday
#df['month'] = df['time'].dt.month

In [139]:
df.head()

Unnamed: 0,time,load,weekday
0,2005-01-01 00:00:00+00:00,15991.34,5
1,2005-01-01 01:00:00+00:00,15673.12,5
2,2005-01-01 02:00:00+00:00,15431.81,5
3,2005-01-01 03:00:00+00:00,15326.48,5
4,2005-01-01 04:00:00+00:00,15468.59,5


In [200]:
class BatteryEnv:#(gym.Env):
    """Battery optimization environment for OpenAI gym"""
    #metadata = {'render.modes': ['human']}
    
    def __init__(self, calculate_reward_func, df):
        #super(BatteryEnv, self).__init__()
        
        self.dict_actions = {0:'discharge',1:'charge',2:'wait'}
        self.df = df
        self.charge = 4
        
        #We have only 3 discrete actions (charge,discharge,wait)
        self.action_space = spaces.Discrete(3)
        
        # our observation space is just one float value - our load 
        self.observation_space = spaces.Box(low=self.df['load'].min(), high=self.df['load'].max(), dtype=np.float16)
        
        # custom function to calculate reward
        self.calculate_reward_func = calculate_reward_func 
        
        # reward list for monitoring
        self.reward_list = []
        
        # actual load list for monitoring
        self.actual_load_list = []
        
        # index of current state within current episode
        self.state_idx = 0
                
    
    def step(self, action): 
        """
        Method to execute one action within the environment 
        according to some outer algorithm and return reward - 'reward',
        changed input load (actual load) - 'obs', boolean on whether episode 
        is over - 'done' and info - '{}', which is empty now.
        """
        #mapping integer to action for actual load calculation
        str_action = self.dict_actions[action]
        
        #increase state idx within episode (day)
        self.state_idx+=1  
        
        #calculating our actual load
        if str_action == 'charge' and self.charge < 4:
            obs = self.df['load'][self.state_idx] + 100
        elif str_action == 'discharge' and self.charge > 0:
            obs = self.df['load'][self.state_idx] - 100
        else:
            obs = self.df['load'][self.state_idx]
        
        # appending actual load to list for monitoring and comparison purposes
        self.actual_load_list.append(obs)
        
        # calculate reward from actual signal via inputted custom function
        reward = self.calculate_reward_func(obs,actual_load_list=self.actual_load_list) 
        
        # appending curr reward to list for monitoring and comparison purposes
        self.reward_list.append(reward) 
        
        
        #checking whether our episode (day interval) ends
        if self.df.iloc[self.state_idx,:].weekday != self.df.iloc[self.state_idx-1].weekday: 
            done = True
        else:
            done = False
            
        return obs, reward, done, {}
        
    def reset(self): 
        """
        here we just return the first state of the next episode:
        """
        return df.iloc[self.state_idx,:]
    
    
    def render(self, mode='human', close=False):
    # Render the environment to the screen
          print('random_print')

Custom reward func:

In [201]:
def calculate_reward_random(obs,actual_load_list):
    if obs > 15000:
        reward = -100
    elif obs < 12000:
        reward = -100
    else:
        reward = 100
    return reward

In [196]:
env = BatteryEnv(calculate_reward_random,df)

In [202]:
for episode in range(20):
    observation = env.reset()
    for t in range(100): #can't be smaller than 24 as 24 time points equal to 1 episode (1 day)
        #print(observation)
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break

Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps
Episode finished after 24 timesteps


In [203]:
env.reward_list

[-100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 100,
 100,
 100,
 100,
 100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -10