<a href="https://colab.research.google.com/github/DominikBurkert/RL_market_agent/blob/main/BatteryEnvironment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
#!pip install "stable-baselines3[extra]>=2.0.0a4"

In [5]:
pip show stable-baselines3

Name: stable-baselines3
Version: 2.0.0a10
Summary: Pytorch version of Stable Baselines, implementations of reinforcement learning algorithms.
Home-page: https://github.com/DLR-RM/stable-baselines3
Author: Antonin Raffin
Author-email: antonin.raffin@dlr.de
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: cloudpickle, gymnasium, matplotlib, numpy, pandas, torch
Required-by: 


In [10]:
from stable_baselines3 import PPO

In [8]:
pip show gym

  and should_run_async(code)


Name: gym
Version: 0.25.2
Summary: Gym: A universal API for reinforcement learning environments
Home-page: https://www.gymlibrary.ml/
Author: Gym Community
Author-email: jkterry@umd.edu
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: cloudpickle, gym-notices, numpy
Required-by: dopamine-rl


In [7]:
import gym
from gym import Env
from gym.spaces import Box
import numpy as np
import random
import pandas as pd
import datetime
from datetime import timedelta

### Environment Battery

In [11]:
class BatteryEnvironment(gym.Env):
    #params: power for discharge/charge of batery(in kW), capacity for batery in kWh, price for calculate profit, residualload for agent to learn
    def __init__(self, time_list_hours=[], prices=[], residual_load=[], capacity=200, power= 10): #power in kW, capacity in kWh
        """
            The customized initialisation of the environment.

            Returns:

        """
        #get predefined stuff
        super(BatteryEnvironment, self).__init__()
        
        #define variabels of environment from input data
        self.residual_load = residual_load
        self.prices = prices
        
        self.Start = min(time_list_hours)
        self.End = max(time_list_hours)
        self.eps_length= 24

        #set initial date
        self.date=self.Start

        # reset profit to 0
        self.profit = 0

        #define variables of batery
        self.capacity = capacity
        #self.power = power
        self.soc = 0 #soc = state of charge (soc=0 means: batery is empty in the beginning)
        


        #define possible OBSERVATIONS:
        #each hour of the day is one state with several observations
        # observation[0] = residual load prediction for the next 24 hours (in kWh)
        # observation[1] = soc of the batery (in kWh)
        #observation[2] = 
        # TODO normalize of residual load and soc (important for training)
        self.observation_space = Box(low=np.array([min(residual_load),0]), high=np.array([max(residual_load),capacity]),
                                     shape=(2,), dtype=np.float32)
        #self.observation_space = Box(low=np.array([-6000,0]), high=np.array([70000,capacity]),
        #                             shape=(2,), dtype=np.float32)
        
        
        #define possible ACTIONS:
        #the agent can decide how much energy he wants to charge/discharge
        #TODO normalize the amount of charge/discharge between -1 and 1 (important for training)
        
        self.action_space = Box(low=np.array([-2]), high=np.array([2]), shape=(1,), dtype=np.float32)
        #self.action_space = Box(low=np.array([-1]), high=np.array([1]), shape=(1,), dtype=np.float32)   #integer
        
        #other solution: define a discrete action_space
        #self.action_space = Discrete(3)#charge, doing nothing, discharge

    #function that sampels days from the data
    """
    def _observe_state(self, date):
        observation_end=str(pd.to_datetime(self.date)+timedelta(hours = 24))
        self.residual_load_forecast = self.residual_load[date:observation_end]

        return np.concatenate((self.soc, self.residual_load_forecast),axis=None)
    """
    def step (self, action):
        #observation_end=str(pd.to_datetime(self.date)+timedelta(hours = 24))
        #observation = np.concatenate((self.soc, self.residual_load[self.date:observation_end],),axis=None)
        observation = np.concatenate((self.soc, self.residual_load[self.date]),axis=None)
        
        #observation = self._observe_state(self.date)
        #get amount of charge/discharge
        #calculate amount energy with restrictions of batery capacity
        if action >=0:
            #charge
            if action + self.soc >= self.capacity: #batery is almost full
                amount_energy = self.capacity - self.soc
            else: #enough space in batery            
                amount_energy = action
        else:
            #discharge
            if action + self.soc <=0: #min restriction (batery soc cannot be negative)
                amount_energy = -self.soc
            else: #there is enough energy to discharge
                amount_energy = action
        
        self.soc += amount_energy
        
        #calculate profit (sell on market) resp. calculate investment (buy on market)
        #TODO right now we calculate with MWh but we need kWh (solution: divide price by 1000)
        reward = -self.prices[self.date] * amount_energy
        
        # Reduce eps_length by 1 hour
        self.eps_length -= 1
        

        #check if terminate
        if self.eps_length <= 0:
            done = True
        else:
            done = False
            
        #have little place holder for info as gym requires it
        info={}
        print('date:{} action:{} amount_energy: {} soc:{} price:{} reward:{} eps_length:{}'.format(self.date, action, amount_energy, self.soc, self.prices[self.date], reward, self.eps_length))
        
        #add one hour on date for the next step()
        self.date=str(pd.to_datetime(self.date)+timedelta(hours = 1))

        return observation, reward, done, info

    def reset (self):
        #reset profit to 0
        self.reward = 0
        self.soc = 0
        self.eps_length = 24        
        
        return self.soc


#### get data 

In [14]:
from google.colab import files
uploaded = files.upload()

Saving data.csv to data.csv


In [16]:
import io
data = pd.read_csv(io.BytesIO(uploaded['data.csv']))

In [18]:
data.head(3)

  and should_run_async(code)


Unnamed: 0,local_DateTime,Forecasted Solar [MWh],Forecasted Wind Offshore [MWh],Forecasted Wind Onshore [MWh],Demand in MWh,total vre in MWh,residual load in MWh,prices in €/MWh
0,2019-06-01 00:00:00+00:00,0.0,2730.0,2056.0,42405.0,4786.0,37619.0,33.63
1,2019-06-01 01:00:00+00:00,0.0,2887.0,2126.0,40510.0,5013.0,35497.0,31.03
2,2019-06-01 02:00:00+00:00,0.0,3097.0,2549.0,38905.0,5646.0,33259.0,28.23


In [19]:
#data= pd.read_csv('../data.csv',delimiter=',')
data.set_index('local_DateTime', drop=True, append=False, inplace=True, verify_integrity=True)
residual_load = data['residual load in MWh']
residual_load[1]
prices = data['prices in €/MWh']
time_list_hours = pd.to_datetime(data.index).tolist()
time_list_hours = (data.index).tolist()

### Test Environment

In [20]:
env = BatteryEnvironment(time_list_hours = time_list_hours, prices = prices, residual_load = residual_load, capacity=200, power= 10)

#random action
env.action_space.sample()
#random observation
env.observation_space.sample()

days = 2
for days in range(0, days):
    env.reset()
    print('Tag: ',days)
    profit = 0
    done = False
    
    while not done:
        action = env.action_space.sample()
        observation, reward, done, _ = env.step(action)
        #print('SOC {} reward: {}'.format(soc, reward))
        #print(observation)
        profit += reward
        
    print('Tag:{} Observation:{} Score:{}'.format(days, observation, profit))
    print('Tag:{} SOC ende:{}MWh Score:{}'.format(days, observation[0], profit))


Tag:  0
date:2019-06-01 00:00:00+00:00 action:[0.40088177] amount_energy: [0.40088177] soc:[0.40088177] price:33.63 reward:[-13.481654] eps_length:23
date:2019-06-01 01:00:00+00:00 action:[-1.0694563] amount_energy: [-0.40088177] soc:[0.] price:31.03 reward:[12.439362] eps_length:22
date:2019-06-01 02:00:00+00:00 action:[-1.4790144] amount_energy: [-0.] soc:[0.] price:28.23 reward:[0.] eps_length:21
date:2019-06-01 03:00:00+00:00 action:[0.2594964] amount_energy: [0.2594964] soc:[0.2594964] price:27.62 reward:[-7.1672907] eps_length:20
date:2019-06-01 04:00:00+00:00 action:[0.16199553] amount_energy: [0.16199553] soc:[0.42149192] price:25.53 reward:[-4.135746] eps_length:19
date:2019-06-01 05:00:00+00:00 action:[-0.5057514] amount_energy: [-0.42149192] soc:[0.] price:23.14 reward:[9.753323] eps_length:18
date:2019-06-01 06:00:00+00:00 action:[0.16838275] amount_energy: [0.16838275] soc:[0.16838275] price:25.8 reward:[-4.344275] eps_length:17
date:2019-06-01 07:00:00+00:00 action:[-1.28

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
