In [244]:
import os
from gymnasium import Env
from gymnasium import spaces
from gymnasium.spaces import Discrete
import pandas as pd
import numpy as np

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy


In [245]:
carbon_intensity = pd.read_csv('Datasets/Carbon_intensity_processed_hourly.csv')
electricity_usage = pd.read_csv('Datasets/Household_processed_hourly.csv')

In [246]:
carbon_intensity['Datetime (UTC)'] = pd.to_datetime(carbon_intensity['Datetime (UTC)'], utc=True)
carbon_intensity = carbon_intensity.set_index('Datetime (UTC)')
carbon_intensity.index = carbon_intensity.index.tz_convert(None)
carbon_intensity = carbon_intensity['Carbon Intensity gCO₂eq/kWh (LCA)']


In [247]:
c_int = np.asarray(carbon_intensity['2021-07-05 00:00:00':'2021-12-05 21:00:00'],dtype=np.int32)

In [248]:
electricity_usage['date'] = pd.to_datetime(electricity_usage['date'], utc=True)
electricity_usage= electricity_usage.set_index('date')
electricity_usage.index = electricity_usage.index.tz_convert(None)
electricity_usage = electricity_usage['energy meter val']

In [249]:
e_usage = np.asarray(electricity_usage['2015-07-05 00:00:00':], dtype=np.int32)

In [250]:
# energy_matrix = np.vstack((c_int, e_usage)).T
# energy_matrix

In [251]:
# Constant Environment Variables 
env_length = 1000
charge_discharge_rate = 250

In [252]:
def get_carbon_emissions(c_int, e_usage):
    return c_int * (e_usage/1000)

In [253]:
def reward_algo(self):
    if self.emissions_baseline > self.emissions_with_battery:
        return 1
    else:
        return -1

In [254]:
def is_done(self):
    if self.length == 0:
        # ser = pd.Series(self.bat_charge_viz)
        # ser.plot()
        print(self.hist[-1])
        print(f"emissions baseline: {self.emissions_baseline}")
        print(f"emissions w/ battery: {self.emissions_with_battery}")
        return True
    else:
        return False

In [255]:
class Environment(Env):
    def __init__(self):
        # Actions are discharge(-1), nothing (0) and charge (1)
        self.action_space = Discrete(3)
        
        self.observation_space = spaces.Dict({
                "carbon_int" : spaces.Box(low = 0, high = 1000, shape=(1,),dtype=int),
                "electric_usage" : spaces.Box(low = 0, high = 1000, shape=(1,),dtype=int),
                "battery_charge" : spaces.Box(low = 0, high = 1000, shape=(1,),dtype=int),
                "c_saved" : spaces.Box(low = 0, high = 100000, shape=(1,),dtype=float)
            }
        )            
        
        self.length = env_length
        self.hour_index = 0
        self.c_int = c_int
        self.e_usage = e_usage
        self.emissions_baseline = 0
        self.emissions_with_battery = 0
        
        self.hist = []
        
        self.state = {
            "carbon_int": np.array([self.c_int[0]]),
            "electric_usage": np.array([self.e_usage[0]]),
            "battery_charge": np.array([0]),
            "c_saved" : np.array([0]),
        }
        
    def step(self, action):
        self.emissions_baseline +=  get_carbon_emissions(self.state["carbon_int"], self.state["electric_usage"])
        curr_c_int = self.state["carbon_int"]
        curr_e_usage = self.state["electric_usage"]
        
        
        # Perform 'game logic' for each possible agent action
        if action == 0 and self.state["battery_charge"] >= charge_discharge_rate:
            self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage - charge_discharge_rate)
            new_battery_charge_val = self.state["battery_charge"] - charge_discharge_rate
            self.state.update({"battery_charge" : new_battery_charge_val})
            
        elif action == 1:
            pass
            
        elif action == 2 and self.state["battery_charge"] <= 1000:
            if 850 < self.state["battery_charge"] > 1000:
                new_battery_charge_val = 1000
                self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage + charge_discharge_rate)
                self.state.update({"battery_charge" : new_battery_charge_val})
            else:
                self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage + charge_discharge_rate)
                new_battery_charge_val = self.state["battery_charge"] + charge_discharge_rate
                self.state.update({"battery_charge" : new_battery_charge_val})
        else:
            pass
        
        
        # update hist array
        self.hist.append(int(self.emissions_baseline-self.emissions_with_battery))
        
        # Update env variables
        self.length -= 1
        self.hour_index += 1
        
        # Go to next state
        self.state.update({"carbon_int": np.array([self.c_int[self.hour_index]])})
        self.state.update({"electric_usage": np.array([self.e_usage[self.hour_index]])})

        # Update emissions saved
        emissions_saved = self.emissions_baseline - self.emissions_with_battery
        self.state.update({"c_saved" : emissions_saved})
        
        # Update reward
        reward = reward_algo(self)
            
        # Check if finished
        done = is_done(self)
        
        info = {}
        truncated = False
        return self.state, reward, done, truncated, info
    
    
    def reset(self, seed = None, options = None):
        self.state = {
            "carbon_int": np.array([self.c_int[0]]),
            "electric_usage": np.array([self.e_usage[0]]),
            "battery_charge": np.array([0]),
            "c_saved" : np.array([0]),
        } 
        self.length = env_length
        self.hour_index = 0
        self.emissions_baseline = 0
        self.emissions_with_battery = 0
        
        self.hist = []

        info = {}
        
        return self.state, info

In [256]:
env = Environment()

In [257]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample() # Random actions here
        n_state, reward, done, truncated, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

19563
emissions baseline: [43696.138]
emissions w/ battery: [24133.036]
Episode:1 Score:992
19396
emissions baseline: [43696.138]
emissions w/ battery: [24299.814]
Episode:2 Score:982
19903
emissions baseline: [43696.138]
emissions w/ battery: [23792.972]
Episode:3 Score:984
20974
emissions baseline: [43696.138]
emissions w/ battery: [22721.547]
Episode:4 Score:966
19255
emissions baseline: [43696.138]
emissions w/ battery: [24440.369]
Episode:5 Score:996


In [258]:
log_path = os.path.join('Training', 'Logs')
model = DQN('MultiInputPolicy', env, verbose = 1, tensorboard_log = log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [259]:
model.learn(total_timesteps=20000)

Logging to Training/Logs/DQN_6
18884
emissions baseline: [43696.138]
emissions w/ battery: [24811.417]
29709
emissions baseline: [43696.138]
emissions w/ battery: [13986.875]
35184
emissions baseline: [43696.138]
emissions w/ battery: [8512.121]
34514
emissions baseline: [43696.138]
emissions w/ battery: [9181.942]
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1e+03    |
|    ep_rew_mean      | 996      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 499      |
|    time_elapsed     | 8        |
|    total_timesteps  | 4000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.39     |
|    n_updates        | 974      |
----------------------------------
34037
emissions baseline: [43696.138]
emissions w/ battery: [9658.398]
36783
emissions baseline: [43696.138]
emissions w/ battery: [6912.832]
19077
emissions 

<stable_baselines3.dqn.dqn.DQN at 0x7fd0e84fd100>

In [260]:
evaluate_policy(model, env, n_eval_episodes=10, render = False)



43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0
43696
emissions baseline: [43696.138]
emissions w/ battery: 0


(1000.0, 0.0)

In [261]:
episodes = 5
for episode in range(1, episodes+1):
    obs, inf = env.reset()
    done = False
    score = 0

    while not done:
        action, _ = model.predict(obs) # Random actions here
        obs, reward, done, truncated, info = env.step(action)
        score += reward
    print('Episode:{} Reward:{} Total Grams Carbon Saved:{}'.format(episode, score, obs["c_saved"].astype("int64")))

41959
emissions baseline: [43696.138]
emissions w/ battery: [1736.139]
Episode:1 Reward:1000 Total Grams Carbon Saved:[41959]
42669
emissions baseline: [43696.138]
emissions w/ battery: [1026.796]
Episode:2 Reward:1000 Total Grams Carbon Saved:[42669]
41937
emissions baseline: [43696.138]
emissions w/ battery: [1758.49]
Episode:3 Reward:1000 Total Grams Carbon Saved:[41937]
41290
emissions baseline: [43696.138]
emissions w/ battery: [2405.521]
Episode:4 Reward:1000 Total Grams Carbon Saved:[41290]
42299
emissions baseline: [43696.138]
emissions w/ battery: [1396.205]
Episode:5 Reward:1000 Total Grams Carbon Saved:[42299]
