In [49]:
import os
from gymnasium import Env
from gymnasium import spaces
from gymnasium.spaces import Discrete
import pandas as pd
import numpy as np

from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy


In [50]:
carbon_intensity = pd.read_csv('Datasets/Carbon_intensity_processed_hourly.csv')
electricity_usage = pd.read_csv('Datasets/Household_processed_hourly.csv')

In [51]:
carbon_intensity['Datetime (UTC)'] = pd.to_datetime(carbon_intensity['Datetime (UTC)'], utc=True)
carbon_intensity = carbon_intensity.set_index('Datetime (UTC)')
carbon_intensity.index = carbon_intensity.index.tz_convert(None)
carbon_intensity = carbon_intensity['Carbon Intensity gCO₂eq/kWh (LCA)']


In [52]:
c_int = np.asarray(carbon_intensity['2021-07-05 00:00:00':'2021-12-05 21:00:00'],dtype=np.int32)

In [53]:
electricity_usage['date'] = pd.to_datetime(electricity_usage['date'], utc=True)
electricity_usage= electricity_usage.set_index('date')
electricity_usage.index = electricity_usage.index.tz_convert(None)
electricity_usage = electricity_usage['energy meter val']

In [54]:
elec_usage = np.asarray(electricity_usage['2015-07-05 00:00:00':], dtype=np.int32)
e_usage =[i * 10 for i in elec_usage]


In [55]:
# energy_matrix = np.vstack((c_int, e_usage)).T
# energy_matrix

In [56]:
# Constant Environment Variables 
env_length = 1000
battery_size = 1000
charge_discharge_rate = 250

In [57]:
def get_carbon_emissions(c_int, e_usage):
    return c_int * (e_usage/1000)

In [58]:
def reward_algo(self):
    if self.emissions_baseline > self.emissions_with_battery:
        return 1
    else:
        return -1

In [59]:
def is_done(self):
    if self.length == 0:
        print(self.hist)
        print(f"emissions baseline: {self.emissions_baseline}")
        print(f"emissions w/ battery: {self.emissions_with_battery}")
        return True
    else:
        return False

In [60]:
class Environment(Env):
    def __init__(self):
        # Actions are discharge(-1), nothing (0) and charge (1)
        self.action_space = Discrete(3)
        
        self.observation_space = spaces.Dict({
                "carbon_int" : spaces.Box(low = 0, high = 10000, shape=(1,),dtype=int),
                "electric_usage" : spaces.Box(low = 0, high = 10000, shape=(1,),dtype=int),
                "battery_charge" : spaces.Box(low = 0, high = battery_size, shape=(1,),dtype=int),
                "c_saved" : spaces.Box(low = 0, high = 1000000, shape=(1,),dtype=int)
            }
        )            
        
        self.length = env_length
        self.hour_index = 0
        self.c_int = c_int
        self.e_usage = e_usage
        self.emissions_baseline = 0
        self.emissions_with_battery = 0
        
        self.hist = []
        
        self.state = {
            "carbon_int": np.array([self.c_int[0]]),
            "electric_usage": np.array([self.e_usage[0]]),
            "battery_charge": np.array([0]),
            "c_saved" : np.array([0]),
        }
        
    def step(self, action):
        curr_c_int = self.state["carbon_int"]
        curr_e_usage = self.state["electric_usage"]
        curr_charge = self.state["battery_charge"]
        self.emissions_baseline +=  get_carbon_emissions(curr_c_int, curr_e_usage)
        
        
        # Perform 'game logic' for each possible agent action
        if action == 0 and curr_charge > 0:
            if curr_charge >= charge_discharge_rate:
                self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage - charge_discharge_rate)
                new_battery_charge_val = curr_charge - charge_discharge_rate
                self.state.update({"battery_charge" : new_battery_charge_val})
            elif 0 < curr_charge < charge_discharge_rate:
                self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage - curr_charge)
                new_battery_charge_val = 0
                self.state.update({"battery_charge" : new_battery_charge_val})
            
        elif action == 1:
            self.emissions_with_battery += get_carbon_emissions(curr_c_int,curr_e_usage)
            
        elif action == 2 and curr_charge <= battery_size:
            if (battery_size - charge_discharge_rate) < curr_charge < battery_size:
                new_battery_charge_val = battery_size
                self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage + (battery_size -                                                                         curr_charge))
                self.state.update({"battery_charge" : new_battery_charge_val})
            else:
                self.emissions_with_battery += get_carbon_emissions(curr_c_int, curr_e_usage + charge_discharge_rate)
                new_battery_charge_val = curr_charge + charge_discharge_rate
                self.state.update({"battery_charge" : new_battery_charge_val})
        else:
            pass
        
        
        # update hist array
        self.hist.append(int(self.emissions_baseline-self.emissions_with_battery))
        
        # Update env variables
        self.length -= 1
        self.hour_index += 1
        
        # Go to next state
        self.state.update({"carbon_int": np.array([self.c_int[self.hour_index]])})
        self.state.update({"electric_usage": np.array([self.e_usage[self.hour_index]])})

        # Update emissions saved
        emissions_saved = self.emissions_baseline - self.emissions_with_battery
        self.state.update({"c_saved" : emissions_saved})
        
        # Update reward
        reward = reward_algo(self)
            
        # Check if finished
        done = is_done(self)
        
        info = {}
        truncated = False
        return self.state, reward, done, truncated, info
    
    
    def reset(self, seed = None, options = None):
        self.state = {
            "carbon_int": np.array([self.c_int[0]]),
            "electric_usage": np.array([self.e_usage[0]]),
            "battery_charge": np.array([0]),
            "c_saved" : np.array([0]),
        } 
        self.length = env_length
        self.hour_index = 0
        self.emissions_baseline = 0
        self.emissions_with_battery = 0
        
        self.hist = []

        info = {}
        
        return self.state, info

In [61]:
env = Environment()

In [62]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample() # Random actions here
        n_state, reward, done, truncated, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

65353
emissions baseline: [436961.38]
emissions w/ battery: [371607.44]
Episode:1 Score:988
52306
emissions baseline: [436961.38]
emissions w/ battery: [384654.69]
Episode:2 Score:998
39548
emissions baseline: [436961.38]
emissions w/ battery: [397412.57]
Episode:3 Score:1000
49216
emissions baseline: [436961.38]
emissions w/ battery: [387745.08]
Episode:4 Score:974
51224
emissions baseline: [436961.38]
emissions w/ battery: [385736.56]
Episode:5 Score:972


In [63]:
log_path = os.path.join('Training', 'Logs')
model = DQN('MultiInputPolicy', env, verbose = 1, tensorboard_log = log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [64]:
model.learn(total_timesteps=20000)

Logging to Training/Logs/DQN_9
49734
emissions baseline: [436961.38]
emissions w/ battery: [387227.01]
166706
emissions baseline: [436961.38]
emissions w/ battery: [270254.41]
193041
emissions baseline: [436961.38]
emissions w/ battery: [243919.63]
204213
emissions baseline: [436961.38]
emissions w/ battery: [232748.29]
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1e+03    |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 588      |
|    time_elapsed     | 6        |
|    total_timesteps  | 4000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 18       |
|    n_updates        | 974      |
----------------------------------
225518
emissions baseline: [436961.38]
emissions w/ battery: [211442.84]
223349
emissions baseline: [436961.38]
emissions w/ battery: [213611.87]
233883


<stable_baselines3.dqn.dqn.DQN at 0x7fd19b1cfd60>

In [65]:
evaluate_policy(model, env, n_eval_episodes=10, render = False)



431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]
431706
emissions baseline: [436961.38]
emissions w/ battery: [5255.06]


(1000.0, 0.0)

In [66]:
episodes = 1
for episode in range(1, episodes+1):
    obs, inf = env.reset()
    done = False
    score = 0

    while not done:
        action, _ = model.predict(obs) # Random actions here
        obs, reward, done, truncated, info = env.step(action)
        score += reward
    print('Episode:{} Reward:{} Total Grams Carbon Saved:{}'.format(episode, score, obs["c_saved"].astype("int64")))

410937
emissions baseline: [436961.38]
emissions w/ battery: [26024.15]
Episode:1 Reward:1000 Total Grams Carbon Saved:[410937]


In [67]:
training_log_path = os.path.join(log_path, 'DQN_9')