In [1137]:
import os
from gymnasium import Env
from gymnasium import spaces
from gymnasium.spaces import Discrete
import pandas as pd
import numpy as np

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy


In [1138]:
carbon_intensity = pd.read_csv('Datasets/Carbon_intensity_processed_hourly.csv')
electricity_usage = pd.read_csv('Datasets/Household_processed_hourly.csv')

In [1139]:
carbon_intensity['Datetime (UTC)'] = pd.to_datetime(carbon_intensity['Datetime (UTC)'], utc=True)
carbon_intensity = carbon_intensity.set_index('Datetime (UTC)')
carbon_intensity.index = carbon_intensity.index.tz_convert(None)
carbon_intensity = carbon_intensity['Carbon Intensity gCO₂eq/kWh (LCA)']


In [1140]:
c_int = np.asarray(carbon_intensity['2021-07-05 00:00:00':'2021-12-05 21:00:00'],dtype=np.int32)

In [1141]:
electricity_usage['date'] = pd.to_datetime(electricity_usage['date'], utc=True)
electricity_usage= electricity_usage.set_index('date')
electricity_usage.index = electricity_usage.index.tz_convert(None)
electricity_usage = electricity_usage['energy meter val']

In [1142]:
e_usage = np.asarray(electricity_usage['2015-07-05 00:00:00':], dtype=np.int32)

In [1143]:
# energy_matrix = np.vstack((c_int, e_usage)).T
# energy_matrix

In [1144]:
# Constant Environment Variables 
env_length = 24
charge_discharge_rate = 250

In [1145]:
def get_carbon_emissions(c_int, e_usage):
    return c_int * (e_usage/1000)

In [1146]:
def reward_algo(self):
    if self.emissions_baseline > self.emissions_with_battery:
        return 1
    else:
        return -1

In [1147]:
def is_done(self):
    if self.length == 0:
        return True
    else:
        return False

In [1148]:
class Environment(Env):
    def __init__(self):
        # Actions are discharge(-1), nothing (0) and charge (1)
        self.action_space = Discrete(3)
        
        self.observation_space = spaces.Dict({
                "carbon_int" : spaces.Box(low = 0, high = 1000, shape=(1,),dtype=int),
                "electric_usage" : spaces.Box(low = 0, high = 1000, shape=(1,),dtype=int),
                "battery_charge" : Discrete(1000),
            }
        )            
        
        self.length = env_length
        self.hour_index = 0
        self.c_int = c_int
        self.e_usage = e_usage
        self.emissions_baseline = 0
        self.emissions_with_battery = 0
        
        self.state = {
            "carbon_int": np.array([self.c_int[0]]),
            "electric_usage": np.array([self.e_usage[0]]),
            "battery_charge": 0,
        }
        
    def step(self, action):
        self.emissions_baseline +=  get_carbon_emissions(self.state["carbon_int"], self.state["electric_usage"])
        curr_c_int = self.state["carbon_int"]
        curr_e_usage = self.state["electric_usage"]
        
        # Perform 'game logic' for each possible agent action
        if action == 0:
            self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage - charge_discharge_rate)
            new_battery_charge_val = self.state["battery_charge"] - charge_discharge_rate
            self.state.update({"battery_charge" : new_battery_charge_val})
            # print(f'Discharge: {new_battery_charge_val}')
        elif action == 1:
            pass
            
        elif action == 2:
            self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage + charge_discharge_rate)
            new_battery_charge_val = self.state["battery_charge"] + charge_discharge_rate
            self.state.update({"battery_charge" : new_battery_charge_val})
            # print(f'Charge: {new_battery_charge_val}')
        else:
            print('Should not happen')
        
        # Update env variables
        self.length -= 1
        self.hour_index += 1
        
        # Go to next state
        self.state.update({"carbon_int": np.array([self.c_int[self.hour_index]])})
        self.state.update({"electric_usage": np.array([self.e_usage[self.hour_index]])})
        
        # Update reward
        reward = reward_algo(self)
            
        # Check if finished
        done = is_done(self)
        
        info = {}
        truncated = False
        return self.state, reward, done, truncated, info
    
    
    def reset(self, seed = None, options = None):
        self.state = {
            "carbon_int": np.array([self.c_int[0]]),
            "electric_usage": np.array([self.e_usage[0]]),
            "battery_charge": 0,
        } 
        self.length = env_length
        self.hour_index = 0
        self.emissions_baseline = 0
        self.emissions_with_battery = 0
        
        info = {}
        
        return self.state, info

In [1149]:
env = Environment()
# check_env(env)

In [1150]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample() # Random actions here
        n_state, reward, done, truncated, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:8
Episode:2 Score:-22
Episode:3 Score:24
Episode:4 Score:24
Episode:5 Score:24


In [1151]:
# log_path = os.path.join('Training', 'Logs')
# model = DQN('MultiInputPolicy', env, verbose = 1, tensorboard_log = log_path)

In [1152]:
# model.learn(total_timesteps=4000)