# Info

* discrete action space of size 21, 10 discharge, 10 charge, 1 noop
* only One building
* only one battery
* can use custom reward with zeta parameter
* can yous masked action space

In [18]:
import numpy as np
import torch.nn
import math
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

from citylearn.citylearn import CityLearnEnv
import citylearn
from citylearn.energy_model import HeatPump
from citylearn.utilities import read_json

import gym

import gym
from gym.spaces import Box
from stable_baselines3 import PPO, A2C, DDPG, TD3, SAC

from sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy
from sb3_contrib.common.wrappers import ActionMasker
from sb3_contrib.ppo_mask import MaskablePPO
from sb3_contrib.common.maskable.utils import get_action_masks

In [19]:
def action_space_to_dict(aspace):
    """ Only for box space """
    return { "high": aspace.high,
             "low": aspace.low,
             "shape": aspace.shape,
             "dtype": str(aspace.dtype)
    }

def env_reset(env):
    observations = env.reset()
    action_space = env.action_space
    observation_space = env.observation_space
    building_info = env.get_building_information()
    building_info = list(building_info)
    action_space_dicts = [action_space_to_dict(asp) for asp in action_space]
    observation_space_dicts = [action_space_to_dict(osp) for osp in observation_space]
    obs_dict = {"action_space": action_space_dicts,
                "observation_space": observation_space_dicts,
                "building_info": building_info,
                "observation": observations }
    return obs_dict

In [20]:
class BuildingDevices:
  """
    Keeps track of all storage devices of a building.
  """
  def __init__(self, building, num_building):
    self.num_building = num_building
    self.building = building
    self.devices = {'battery' : Device_(building.electrical_storage, 'battery'),
                    'cooling' : None,
                    'dhw' : None}
    
  def compute_bounds(self):
    bounds = [self.bounds_action(i) for i,j in self.devices.items() if j is not None]
    return gym.spaces.Box(low=np.array([i[0] for i in bounds]), high=np.array([i[1] for i in bounds]), dtype=np.float64)
  
# ACTION 0 :  cooling
# ACTION 1 : dhw
# ACTION 2 : battery
  
  def bounds_action(self, type_action):
    device = self.devices[type_action].device
    if device is None:
        return None # if return none building doest have battery
    if type_action == 'battery':
        capacity = device.capacity_history[-2] if len(device.capacity_history) > 1 else device.capacity
        #HIGH
        #get max energy that the storage unit can use to charge [kW]
        #if trying to put more than the battery can accept reject action
        high1 = device.get_max_input_power()/capacity
        high2 = (device.capacity - device.soc_init)/(0.95*device.capacity) #approxim (efficiency = 0.95)
        high = min(high1, high2, 1)

        #LOW
        low1 = -device.get_max_input_power()/capacity
        low2 = (-device.soc_init*0.95)/device.capacity #approxim (efficiency = 0.95)
        low = max(low1, low2, -1)

    else:
        bool_h2, bool_l2 = False, False
        if type_action == 'cooling':
            # print('\ncooling')
            space_demand = self.building.cooling_demand[self.building.time_step]
            max_output = self.building.cooling_device.get_max_output_power(self.building.weather.outdoor_dry_bulb_temperature[self.building.time_step], False)
            # print('space_demand',space_demand)
            # print('max_output', max_output)
            # print('capacity:', device.capacity)
        else: #dhw
            # print('\ndhw')
            space_demand = self.building.dhw_demand[self.building.time_step]
            max_output = self.building.dhw_device.get_max_output_power(self.building.weather.outdoor_dry_bulb_temperature[self.building.time_step], False)\
            if isinstance(self.building.dhw_device, HeatPump) else self.building.dhw_device.get_max_output_power()
            # print('space_demand',space_demand)
            # print('max_output', max_output)
        space_demand = 0 if space_demand is None or math.isnan(space_demand) else space_demand # case where space demand is unknown

        #HIGH
        high1 = (max_output-space_demand) / device.capacity
        # print('high1', high1)
        if device.max_input_power is not None:
            bool_h2 = True
            high2 = device.max_input_power / device.capacity
            # print('high2', high2)
        high3 = (device.capacity - device.soc_init) / (device.capacity*device.efficiency)
        # print(device.capacity, device.soc_init)
        # print('high3', high3)
        
        if bool_h2:
            high = min(high1, high2, high3, 0.5)
        else:
            high = min(high1, high3, 0.5)


        #LOW
        low1 = -space_demand / device.capacity
        # print('low1', low1)
        if device.max_output_power is not None:
            bool_l2 = True
            low2 = -device.max_output_power / device.capacity
            # print('low2',low2)
        low3 = (-device.soc_init*device.efficiency) / device.capacity
        # print('low3',low3)

        if bool_l2:
            low = max(low1, low2, low3, -0.5)
        else:
            low = max(low1, low3, -0.5)

    return (low, high)
  
  def cost(self, zeta):
    """
    Other way to compute cost.
    1) we compute the total electrical consumption of the building,
    2) we the offset the PV generation if existant.
    3) we treat the case of charging and discharging the device 
    """
    #without dhw and cooling storage
    #net conso = cooling + dhw + electrical_storage + nsl - solar
    global_conso = 0
    building = self.building

    price = building.pricing.electricity_pricing[building.time_step]
    carbon = building.carbon_intensity.carbon_intensity[building.time_step]

    # print(building.time_step)
    cooling_demand = building.energy_simulation.cooling_demand[building.time_step] + building.cooling_storage.energy_balance[building.time_step]
    cooling_conso = building.cooling_device.get_input_power(cooling_demand, building.weather.outdoor_dry_bulb_temperature[building.time_step], heating=False)
    global_conso += cooling_conso

    dhw_demand = building.energy_simulation.dhw_demand[building.time_step] + building.dhw_storage.energy_balance[building.time_step]
    if isinstance(building.dhw_device, HeatPump):
            dhw_consumption = building.dhw_device.get_input_power(dhw_demand, building.weather.outdoor_dry_bulb_temperature[building.time_step], heating=True)
    else:
            dhw_consumption = building.dhw_device.get_input_power(dhw_demand)
    
    global_conso += dhw_consumption
    global_conso += building.energy_simulation.non_shiftable_load[building.time_step]
    global_conso -= building.pv.get_generation(building.energy_simulation.solar_generation)[building.time_step]

    # print('globa_conso', global_conso)

    #battery
    #discharge 
    #energy that can be used by building (< energy actually discharged)
    battery_conso_used = building.energy_from_electrical_storage[building.time_step]
    #remove from global conso the energy delivered by battery (not bought from the grid)
    # print('battery_conso_used', battery_conso_used)
    global_conso -= battery_conso_used

    #energy coming out of battery
    soc_t = building.electrical_storage.soc[-1]
    soc_t_1 = 0 if len(building.electrical_storage.soc) < 2 else building.electrical_storage.soc[-2]
    battery_net_conso =  max(0, soc_t_1 - soc_t) #keep only the case where we discharge
    # print('battery_net_conso', battery_net_conso)
    adjusted_battery_net_conso = battery_net_conso * (1 - zeta)
    
    #charge
    energy_used = building.energy_to_electrical_storage[building.time_step]
    global_conso += (energy_used * zeta)
    if energy_used > 0: #charging
        #update cost of 1 unit of energy in the device
        self.devices['battery'].update_cost(energy_used, price, carbon)
    
    # print('globa_conso', global_conso)
    #can be neagtive
    global_conso = max(0, global_conso)

    # print(global_conso)
    cost = (price + carbon) * global_conso
    cost += self.devices['battery'].cost * adjusted_battery_net_conso

    return -cost

In [21]:
class Device_:
  def __init__(self, device, storage_type):
    self.device = device
    # self.price_cost = 0
    # self.emission_cost = 0
    self.cost = 0
    self.storage_type = storage_type

  def loss(self, cost_t, pv_offset, battery_offset):
    """
    get avg price between (battery release, grid release and PV- direct consumption)
    add relative incertainty, but true in pratice as the energy is added up in a global consumption pool 

    battery: if battery releases, price = avg((total released by battery - remaining conso), grid) in the case of thermal
    in the case of battery, avg price with PV
    """
    if not self.device:
      print('not device')
      raise ValueError

    energy_used = self.device.energy_balance[-1]
    if isinstance(energy_used, np.ndarray):
      print('probleme energy used array instead of float')
      energy_used = energy_used[0]

    #charge
    if energy_used > 0:
      #if pv production, part of the energy is free
      if pv_offset > 0:
        energy_used = max(0, energy_used-pv_offset)
      #if usage of battery, part of energy has been already taken into account so free
      if battery_offset > 0:
        energy_used = max(0, energy_used-battery_offset)
      # self.price_cost = ((self.price_cost*self.device.soc[-2])+(energy_used*price))/self.device.soc[-1]
      # self.emission_cost = ((self.emission_cost*self.device.soc[-2])+(energy_used*emission))/self.device.soc[-1]
      
      total = self.device.soc[-1]
      if isinstance(total, np.ndarray):
        print('probleme soc-1 array instead of float')
        total = total[0]

      prev = self.device.soc[-2]
      if isinstance(prev, np.ndarray):
        print('probleme soc-2 array instead of float')
        prev = prev[0]

      self.cost = ((self.cost*prev) + (energy_used*cost_t)) / total
      return energy_used, None, None #energy_used > 0

    #discharge
    else:
      #energy_processed is total energy used during charge/discharge process including losses
      #energy_used is the energy_processed minus the losses (used by building)
      energy_processed = self.device.soc[-2]-self.device.soc[-1]
      return -energy_used, energy_processed, self.cost # -energy_used > 0, energy_processed > 0 

  def update_cost(self, energy_used, price_t, emission_t):
    prev_soc = 0 if len(self.device.soc)<2 else self.device.soc[-2]
    cost_t = price_t + emission_t
    self.cost = ((self.cost*prev_soc) + (energy_used*cost_t)) / self.device.soc[-1]

In [22]:
def get_offset(building, mode):
  """
  building is env.buildings[i]:
  mode = 'pv' or 'battery'

  each conso gets an equally distributed offset based on solar generation or battery
  discharge
  """
  if mode == 'pv':
    if not building.solar_generation is None:
      return 0
    demands = [building.non_shiftable_load_demand[-2], building.electrical_storage.energy_balance[-1],
             building.dhw_demand[-2], building.dhw_storage.energy_balance[-1],
             building.cooling_demand[-2], building.cooling_storage.energy_balance[-1]]
    count = len([i for i in demands if i > 0])
    return -building.solar_generation[-2]/count
  else:
    if not building.solar_generation is None:
      return 0
    if building.electrical_storage.energy_balance[-1] >=0:
      return 0
    demands = [building.non_shiftable_load_demand[-2], building.dhw_demand[-2],
            building.dhw_storage.energy_balance[-1], building.cooling_demand[-2],
             building.cooling_storage.energy_balance[-1]]
    count = len([i for i in demands if i > 0])
    return -building.electrical_storage.energy_balance[-1]/count

def compute_loss(building, building_devices, price, emission, outdoor_dry_bulb_temperature, zeta):
  loss = 0
  pv_offset = get_offset(building, 'pv') 
  battery_offset = get_offset(building, 'battery')
  # print('pv offset',pv_offset)
  # print('battery_offset', battery_offset)

  #1) compute loss for storage devices use or update cost in storage
  for name,device in building_devices.devices.items():
    #if the device exists in building
    if device:
      energy_used, energy_processed, cost = device.loss(price*emission, pv_offset, battery_offset)
    #else consider it exists and set energy used = 0
    #so we can compute the remaining demand associated with the device
    else:
      energy_used = 0

    if not energy_processed: #charge
      #account for a part of the cost at charging time
      loss += (price * emission) * (energy_used * zeta)
    else: #discharge
      loss += cost * (energy_processed * (1 - zeta))

    #2) compute remaining thermal demand and add cost of remaining direct demand to answer
    if name == 'cooling':
      #cooling and dhw stored energy is thermal not electrical
      remaining = building.cooling_demand[-2] - energy_used
      # print('remaining', remaining)
      if remaining > 0:
        energy = max(0, building.cooling_device.get_input_power(remaining, outdoor_dry_bulb_temperature, False) - pv_offset - battery_offset)
        # print('energy', energy)
        loss += (price + emission) * energy

    elif name == 'dhw':
      remaining = building.dhw_demand[-2]
      # print('remaining', remaining)
      if remaining > 0:
        energy = max(0, building.dhw_device.get_input_power(remaining) - pv_offset - battery_offset)
        # print('energy', energy)
        loss += (price + emission) * energy

  #3) compute additionnal loss coming from nsl
  nsl = max(0, building.non_shiftable_load_demand[-2] - pv_offset - battery_offset)
  loss += (price + emission) * nsl
  # print(loss)

  return loss

# ENV 

In [23]:
class EnvCityGym(gym.Env):
    """
    Env wrapper coming from the gym library.
    """

    def __init__(self, env, devices, discrete, custom_reward, solar, sum_cost,
                cost_ESU, zeta, normalize, stop=None):
        # print(schema_filepath)

        self.obs = 'method_1'
        # new obs
        if solar:
            self.index_keep = [0,1,2,3,22,23,27]
            self.index_norm = [12,7,24,1,1,1,1,1]
        else:
            self.index_keep = [0,1,2,3,22,27]
            # self.index_norm = [12,7,24,1,1,1,1]
            self.index_norm = [1,1,1,1,1,1,1]

        self.custom_reward = custom_reward
        self.sum_cost = sum_cost
        self.cost_ESU = cost_ESU
        self.zeta = zeta
        self.discrete = discrete
        self.normalize = normalize

        #normalization reward
        # self.mean_std = (0.7850008976449486, 0.1339831060216876)

        self.env = env
        #list of names of devices [[]]
        self.devices = devices
        self.building_devices = []
        # get the number of buildings
        self.num_buildings = len(self.env.action_space)

        low = self.env.observation_space[0].low
        high = self.env.observation_space[0].high        

        #if sum cost
        if self.sum_cost:
            cost_l = low[19]+low[28]
            cost_h = high[19]+high[28]

        d_low, d_high = [], []
        for i in self.devices[0]:
            if i == 'battery':
                d_low.append(low[26])
                d_high.append(high[26])
            elif i == 'cooling':
                d_low.append(low[24])
                d_high.append(high[24])
            elif i == 'dhw':
                d_low.append(low[25])
                d_high.append(high[25])

        low = [low[i] for i in self.index_keep]
        high = [high[i] for i in self.index_keep]

        low = low + d_low
        high = high + d_high

        #if sum cost
        if self.sum_cost:
            low.append(cost_l)
            high.append(cost_h)

        #if cost ESU, chage if multiple buildings
        if self.cost_ESU:
            for i in range(len(self.devices[0])):
                low.append(0)
                high.append(cost_h)

        if self.discrete:
            self.action_space = gym.spaces.Discrete(21)
            self.action_map = [-1,-0.9,-0.8,-0.7,-0.6,-0.5,-0.4,-0.3,-0.2,-0.1,0,
                                0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
        else:
            self.action_space = env.action_space[0]
        
        if self.obs == 'method_1':
            #TODO modify for proper nb of obs
            self.observation_space = gym.spaces.Box(low=-np.inf,
                            high=np.inf,
                            shape=(12,), 
                            dtype=np.float32)
        else:
            self.observation_space = gym.spaces.Box(low=np.array(low), high=np.array(high), dtype=np.float32)

        #keep last outdoor temp for each building
        self.temp = []
        self.stop = stop
        self.rewards = []

        #remove if test
        self.print_config()

        # TO THINK : normalize the observation space

    def reset(self):
        obs_dict = env_reset(self.env)
        obs = self.env.reset()

        for i,e in enumerate(self.env.buildings):
          self.building_devices.append(BuildingDevices(e,i))
        self.temp.append(obs[i][3])
        
        return self.get_obs(obs)

    def step(self, action):
        """
        we apply the same action for all the buildings
        """
        t = self.env.time_step
        # print('action', action,'\n')
        
        #if action is discrete convert using action mapping
        if self.discrete:
            action = [[self.action_conversion(action)]]
            action = action[0]
        # print('action', action)
        action = [action]

        # we do a step in the environment
        obs, reward, done, info = self.env.step(action)
        # print('normal_reward', reward)
        if t == self.stop:
            done = True
        
        #custom reward 1 is the one where we can use zeta
        if self.custom_reward == 1:
            for i,e in enumerate(self.env.buildings):
                rewards = []
                rewards.append(compute_loss(e, self.building_devices[i], self.env.buildings[i].pricing.electricity_pricing[t-1],
                self.env.buildings[i].carbon_intensity.carbon_intensity[t-1], self.temp[i], self.zeta))
                self.temp[i] = obs[i][3]
                #TODO multiple buildings
                return np.array(self.get_obs(obs)), -rewards[0], done, info
            
        # custom reward 2 is cost without storage - cost with storage
        elif self.custom_reward == 2:
            for i in range(len(self.env.buildings)):
                rewards = self.reward_diff(i)
                # print('rewards', rewards)
                self.rewards.append(rewards)
                #TODO multiple buildings
                return np.array(self.get_obs(obs)), rewards, done, info
            
        # custom reward 3 is the same as 1 but coded in a different way (not coded to be used w/ thermal storage)
        elif self.custom_reward == 3:
            rewards = []
            for i in range(len(self.env.buildings)):
                #TODO multiple buildings
                rewards.append(self.building_devices[i].cost(self.zeta))
                # print('reward3', self.building_devices[i].cost(self.zeta))
                return np.array(self.get_obs(obs)), normalize_norm('random_discrete', 'reward', rewards[0]), done, info  

        #else use normal reward 
        else:
            #TODO multiple buildings
            return np.array(self.get_obs(obs)), reward[0], done, info

    def get_obs(self, obs):
        #keep common obs
        obs_ = [[o[i]/n for i,n in zip(self.index_keep, self.index_norm)] for o in obs]
        # obs_ = list(itertools.chain(*obs_))

        #add soc of each device for each building
        for o in range(len(obs_)):
            if 'battery' in self.devices[o]:
                i = obs[o][26]
                if isinstance(i, np.ndarray):
                    print('probleme array instead of float soc battery obs')
                    i = i[0]
                obs_[o].append(i)
            if 'cooling' in self.devices[o]:
                obs_[o].append(obs[o][24])
            if 'dhw' in self.devices[o]:
                obs_[o].append(obs[o][25])

        #add sum of costs (emission+price)
        if self.sum_cost is True:
            for o in range(len(obs_)):
                #modify for buildings that dont have same nb of obs
                obs_[o].append(obs[o][19]+obs[o][28])
            # print(obs)
        
        #add cost of energy in storage device for each device of each building
        if self.cost_ESU is True:
            for o in range(len(obs_)):
                for i in self.devices[o]:
                    obs_[o].append(self.building_devices[o].devices[i].cost)

        if self.normalize:
            # print(obs_)
            if len(obs_[0]) != 9:
                print('/!\ CHECK NORMALIZATION INDICES')
            obs_ = [normalize_obs(i, 'random_discrete') for i in obs_]
            # print(obs_)
        return np.array(obs_)
    
    def action_conversion(self, action):
        return self.action_map[action]
    
    def valid_action_mask(self):
        mod_action_space = self.building_devices[0].compute_bounds()
        act = np.array(self.action_map)
        index = list(np.where((act>mod_action_space.low[0]) & (act<mod_action_space.high[0]))[0])
        act = [True if i in index else False for i in range(21)]
        act[10] = True #noop always valid
        return act
    
    def print_config(self):
        print('INIT ENV:')
        act = 'Discrete' if self.discrete else 'Continuous'
        print(f'ACTION SPACE: {act}')
        print(f'Use of custom reward: {self.custom_reward}')
        if self.custom_reward in [1,3]:
            print(f'    zeta: {self.zeta}')
        print('Observations kept:')
        for i in self.index_keep:
            print(f'    {i}: {self.env.observation_names[0][i]}')
        for i in self.devices[0]:
            if i == 'battery':
                print('    26: '+self.env.observation_names[0][26])
            elif i == 'cooling':
                print('    24: '+self.env.observation_names[0][24])
            elif i == 'dhw':
                print('    25: '+self.env.observation_names[0][25])
        if self.sum_cost or self.cost_ESU:
            print(f'Observations ADDED:')
            if self.sum_cost:
                print(f'    sum_cost: {self.env.observation_names[0][19]} + {self.env.observation_names[0][28]}')
            if self.cost_ESU:
                print('    cost_ESU: see Device.loss')


    def reward_diff(self, building_i):
        r = []
        building = self.env.buildings[building_i]
        c1 = building.net_electricity_consumption_cost[-1]
        c2 = building.net_electricity_consumption_emission[-1]
        c = c1 + c2

        # c1_ = building.net_electricity_consumption_without_storage_cost[-1]
        # c2_ = building.net_electricity_consumption_without_storage_emission[-1]
        # c_ = c1_ + c2_

        c1_ = building.net_electricity_consumption_without_storage_and_pv_cost[-1]
        c2_ = building.net_electricity_consumption_without_storage_and_pv_emission[-1]
        c_ = c1_ + c2_

        final_cost = c_ - c
        return final_cost

In [24]:
def mask_fn(env: gym.Env) -> np.ndarray:
    # Do whatever you'd like in this function to return the action mask
    # for the current env. In this example, we assume the env has a
    # helpful method we can rely on.
    return env.valid_action_mask()

In [25]:
def get_exp_name(model_name, env, total_timesteps):
    """
    get info about training session.
    """
    action_space = 'Discrete' if env.discrete else 'Continuous'
    reward = f'customR_{int(env.custom_reward)}'
    if env.custom_reward in [1,3]:
        reward += f'_zeta_{env.zeta}'
    equipment = 'devices'+'-'.join([str(len(i)) for i in env.devices])

    p = [model_name, str(env.num_buildings)+'building', equipment, action_space,
        reward, 'sum_cost_'+str(int(env.sum_cost)), 
        'cost_ESU_'+str(int(env.cost_ESU)), str(total_timesteps)]

    return '_'.join(p)

# Model related

In [26]:
def normalize_norm(mode, cat, value):
    """
    mode: random_mask
    cat: reward
    """
    if mode == 'random_discrete':
        if cat == 'reward':
            mean, std = -44.18679279288984, 34.31202033008597
        elif cat == 'temp':
            mean, std = 20.94367808219178, 7.28775091380609
        elif cat == 'nsl':
            mean, std = 24.83374885844749, 17.09060590705837
        elif cat =='net_conso':
            mean, std = 49.89326603300466, 61.492737733468275
        elif cat == 'soc_b':
            mean, std = 0.5886851198326962, 0.39343313608636826
        elif cat == 'cost':
            mean, std = 0.7850008976449486, 0.13398387077199095
        elif cat == 'cost_esu':
            mean, std = 0.8450902559653719, 0.12132634999196334
            
    return (value - mean)/std

In [27]:
def normalize_cycle(value, maxi):
    x_norm = 2 * math.pi * value / maxi
    return np.cos(x_norm), np.sin(x_norm)

In [28]:
def normalize_obs(obs, mode):
    normalized = []
    for i,e in enumerate(obs):
        if i == 0: #month
            normalized += normalize_cycle(e, 12)
        elif i == 1: #day type
            normalized += normalize_cycle(e, 8)
        elif i == 2: #hour
            normalized += normalize_cycle(e, 24)
        elif i == 3:
            normalized.append(normalize_norm(mode, 'temp', e))
        elif i == 4:
            normalized.append(normalize_norm(mode, 'nsl', e))
        elif i == 5:
            normalized.append(normalize_norm(mode, 'net_conso', e))
        elif i == 6:
            normalized.append(normalize_norm(mode, 'soc_b', e))
        elif i == 7:
            normalized.append(normalize_norm(mode, 'cost', e))
        elif i == 8:
            normalized.append(normalize_norm(mode, 'cost_esu', e))
        
        else:
            normalized.append(e)
    return normalized

In [29]:
def train_save_model(model_name, devices, discrete, custom_reward, solar, sum_cost,
                    cost_ESU, zeta, normalize, stop, checkpoint_path='./results', total_timesteps=None):

    # first we initialize the environment (petting zoo)
    if not total_timesteps:
        total_timesteps = 1_500_000

    if solar:
        schema_filepath = 'schema2.json'
    else:
        schema_filepath = 'schema3.json'
    schema = read_json(schema_filepath)
    print(schema_filepath)
    schema['root_directory'] = './'
    env = CityLearnEnv(schema)
    env = EnvCityGym(env, devices=devices, discrete=discrete, custom_reward=custom_reward,
                        solar=solar, sum_cost=sum_cost, cost_ESU=cost_ESU, zeta=zeta, 
                        normalize=normalize, stop=stop)
    if 'mask' in model_name:
        env = ActionMasker(env, mask_fn)
    obs = np.array(env.reset())

    exp_name = get_exp_name(model_name, env,total_timesteps)
    # load model if exist
    if model_name == 'ppo_mask':
        model = MaskablePPO(MaskableActorCriticPolicy, env,
                        verbose=1, tensorboard_log='./train', device='cuda')
    elif model_name == 'ppo':
        model = PPO('MlpPolicy', env, verbose=0, gamma=0.99, tensorboard_log="./train/", device='cuda',
                    n_steps=10_000, learning_rate=0.0005, clip_range=0.2, ent_coef=0.001, seed=0)
    else:
        print('model not recognized')
        return None

    print(f'Model: {model_name}')
    # Train the agent
    model.learn(total_timesteps=total_timesteps, tb_log_name=exp_name,log_interval=5)

    print('saving model')
    model.save(checkpoint_path+exp_name+'.zip')
    if 'mask' in model_name:
        env = env.env
    return model, env.rewards

In [30]:
def test_heuristic(mode, discrete, custom_reward, solar, sum_cost,
                cost_ESU, normalize, zeta=0):
    """
    For 1 building.
    mode:
        noop: test an agent that takes no action (action 0)
        random: test an agent that takes random action over ection space
    """
    schema_filepath = 'schema2.json'
    schema = read_json(schema_filepath)
    schema['root_directory'] = './'
    env = CityLearnEnv(schema)
    env = EnvCityGym(env, devices=[['battery']], discrete=discrete, custom_reward=custom_reward,
                solar=solar, sum_cost=sum_cost, cost_ESU=cost_ESU, normalize=normalize, zeta=zeta, stop=8760)

    
    obs = np.array(env.reset())
    _ = env.reset()
    done = False
    action_list = []

    while not done:
        if mode == 'noop':
            action = [0]
            _, _, done, _ = env.step(action)
        elif mode == 'random':
            action = env.action_space[0].sample()
            _, _, done, _ = env.step(action)

        action_list.append(action[0])
            
    solar = env.env.buildings[0].energy_simulation.solar_generation
    solar = env.env.buildings[0].pv.get_generation(solar)
    conso = env.env.buildings[0].net_electricity_consumption
    price = env.env.buildings[0].pricing.electricity_pricing
    carbon = env.env.buildings[0].carbon_intensity.carbon_intensity

    df = pd.DataFrame()
    df['Time [hours]'] = [i for i in range(len(conso))]
    df['Net conso [kWh]'] = conso
    df['SOC [kWh]'] = env.env.buildings[0].electrical_storage.soc
    df['Conso w/o storage [kWh]'] = env.env.buildings[0].net_electricity_consumption_without_storage
    df['Conso w/o storage and PV [kWh]'] = env.env.buildings[0].net_electricity_consumption_without_storage_and_pv
    df['Solar generation [kWh]'] = solar
    # df.iloc[0][0] = 24 #first is last day of july
    df['Cost sum(emission,price)x50'] = (price+carbon)*50
    df['Cost price x100'] = price*100
    df['Cost carbon x100'] = carbon*100

    return df, action_list, env #all vals of df in kWh

In [31]:
def test_model(model_name, model_path, discrete, custom_reward, solar, sum_cost,
                cost_ESU, normalize, zeta=0, total_timesteps=None):

    # first we initialize the environment (petting zoo)
    try:
        if model_name == 'ppo':
            print('PPO')
            model = PPO.load(model_path)
        elif model_name == 'ddpg':
            print('DDPG')
            model = DDPG.load(model_path)
        elif model_name == 'a2c':
            print('A2C')
            model = A2C.load(model_path)
        elif model_name == 'sac':
            print('SAC')
            model = SAC.load(model_path)
        elif model_name == 'ppo_mask':
            model = MaskablePPO.load(model_path)
    except:
        print('not_found')

    for i in range(1):
        done = False
        print(f'Case {i}:', i)
        schema_filepath = 'schema3.json'
        schema = read_json(schema_filepath)
        schema['root_directory'] = './'
        env = CityLearnEnv(schema)
        env = EnvCityGym(env, devices=[['battery']], discrete=discrete, custom_reward=custom_reward,
                solar=solar, sum_cost=sum_cost, cost_ESU=cost_ESU, normalize=normalize, zeta=zeta, stop=8760)
        if 'mask' in model_name:
            env = ActionMasker(env, mask_fn)
        print(env)
        obs = np.array(env.reset())
        print()
        action_list = []
        while not done:
            # print(obs)
            # obs = [i[0] if isinstance(i, np.array()) else i for i in obs]
            # obs = np.array(obs)
            # print(obs)
            action, _state = model.predict(obs[0], deterministic=True)
            # print(type(action))
            obs, rewards, done, _ = env.step(action)
            if isinstance(action, np.ndarray):
                action = int(action)
            action_list.append(action)

        if discrete:
            if 'mask' in model_name:
                env = env.env
            action_list = [env.action_conversion(i) for i in action_list]
                
            
        # print(action_list)
        x = pd.Series(action_list, name='action')
        print('List of different actions taken:')
        print(x.value_counts())

        for n, nd in env.env.evaluate().groupby('name'):
            nd = nd.pivot(index='name', columns='cost_function', values='value').round(3)
            print(n, ':', nd.to_dict('records'))
        print()

    solar = env.env.buildings[0].energy_simulation.solar_generation
    solar = env.env.buildings[0].pv.get_generation(solar)
    conso = env.env.buildings[0].net_electricity_consumption
    price = env.env.buildings[0].pricing.electricity_pricing
    carbon = env.env.buildings[0].carbon_intensity.carbon_intensity


    df = pd.DataFrame()
    # df['Time [hours]'] = [i for i in range(len(conso))]
    df['Net conso [kWh]'] = conso
    df['SOC [kWh]'] = env.env.buildings[0].electrical_storage.soc
    df['Conso w/o storage [kWh]'] = env.env.buildings[0].net_electricity_consumption_without_storage
    df['Conso w/o storage and PV [kWh]'] = env.env.buildings[0].net_electricity_consumption_without_storage_and_pv
    df['Solar generation [kWh]'] = solar
    # df.iloc[0][0] = 24 #first is last day of july
    df['Cost sum(emission,price)x50'] = (price+carbon)*50
    df['Cost price x100'] = price*100
    df['Cost carbon x100'] = carbon*100

    return df, action_list, env #all vals of df in kWh
    

In [32]:
def fig_plot(df, y_label=None, day_mark=False, show=True, write=False):
    """
    write(str): path/name.html
    """
    df['Time [hours]'] = [i for i in range(len(df))]
    fig = px.line(df, x="Time [hours]", y=list(df.columns))
    if day_mark:
        marks = [hour for hour in df["Time [hours]"] if hour%24==0]
        for hour in marks:
            fig.add_vline(x=hour)

    if y_label:
        fig.update_layout(yaxis_title=y_label)
    if show:
        fig.show()
    if write:
        print('writing to', write)
        fig.write_html(write)

# Launch

## Train

In [33]:
schema_filepath = 'schema3.json'
schema = read_json(schema_filepath)
schema['root_directory'] = './'

env = CityLearnEnv(schema)
env = EnvCityGym(env, devices=[['battery']], discrete=True, custom_reward=3,
                    solar=False, sum_cost=True, cost_ESU=True, zeta=1, normalize=True, stop=24*30*3)
_ = env.reset()
len(_[0])

INIT ENV:
ACTION SPACE: Discrete
Use of custom reward: 3
    zeta: 1
Observations kept:
    0: month
    1: day_type
    2: hour
    3: outdoor_dry_bulb_temperature
    22: non_shiftable_load
    27: net_electricity_consumption
    26: electrical_storage_soc
Observations ADDED:
    sum_cost: carbon_intensity + electricity_pricing
    cost_ESU: see Device.loss


12

In [34]:
_ = env.step(0)

zeta=0 is only at discharged

In [35]:
m = train_save_model(model_name='ppo_mask', devices=[['battery']], discrete=True,
                    custom_reward=3, solar=False, sum_cost=True, cost_ESU=True, zeta=0,
                    normalize=True, stop=None, checkpoint_path='./weights/', total_timesteps=3_000_000)

schema3.json
INIT ENV:
ACTION SPACE: Discrete
Use of custom reward: 3
    zeta: 0
Observations kept:
    0: month
    1: day_type
    2: hour
    3: outdoor_dry_bulb_temperature
    22: non_shiftable_load
    27: net_electricity_consumption
    26: electrical_storage_soc
Observations ADDED:
    sum_cost: carbon_intensity + electricity_pricing
    cost_ESU: see Device.loss
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Model: ppo_mask
Logging to ./train\ppo_mask_1building_devices1_Discrete_customR_3_zeta_0_sum_cost_1_cost_ESU_1_3000000_2
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.76e+03    |
|    ep_rew_mean          | -135        |
| time/                   |             |
|    fps                  | 148         |
|    iterations           | 5           |
|    time_elapsed         | 69          |
|    total_timesteps      | 10240       |
| train/                  |    

To visualise training curves in Tensorboard use: CTRL + MAJ + P, Python: Launch Tensorboard, select folder ./train (VSCODE)

## Test

### Run Single model

Prints:
  * name of model
  * config of the env
  * list and number of different actions taken by env
  * list of metrics w/ associated cost

path = 'weights_r3\ppo_mask_1building_devices1_Discrete_customR_3_zeta_0_sum_cost_1_cost_ESU_1_3000000.zip'
n_steps=10_000, learning_rate=0.0005, clip_range=0.2, ent_coef=0.001, timesteps=3e6

List of different actions taken:
 0.2    7935
 0.3     659
-0.1     165
Name: action, dtype: int64
Building_1 : [{'carbon_emissions': 1.002, 'cost': 1.004, 'electricity_consumption': 1.001, 'zero_net_energy': 1.001}]
District : [{'1 - load_factor': 1.01, 'average_daily_peak': 1.016, 'carbon_emissions': 1.002, 'cost': 1.004, 'electricity_consumption': 1.001, 'peak_demand': 1.0, 'ramping': 1.028, 'zero_net_energy': 1.001}]

'weights\ppo_mask_1building_devices1_Discrete_customR_3_zeta_0_sum_cost_1_cost_ESU_1_2500000.zip'
n_steps=10_000, learning_rate=0.0003, clip_range=0.2, ent_coef=0.001, timesteps=2.5e6
List of different actions taken:
0.0    7634
0.5    1125
Name: action, dtype: int64
Building_1 : [{'carbon_emissions': 1.0, 'cost': 1.001, 'electricity_consumption': 1.0, 'zero_net_energy': 1.0}]
District : [{'1 - load_factor': 1.016, 'average_daily_peak': 1.002, 'carbon_emissions': 1.0, 'cost': 1.001, 'electricity_consumption': 1.0, 'peak_demand': 1.0, 'ramping': 1.002, 'zero_net_energy': 1.0}]


In [21]:
# path = 'weights_r3\ppo_mask_1building_devices1_Discrete_customR_3_zeta_0_sum_cost_1_cost_ESU_1_3000000.zip'
path = 'weights\ppo_mask_1building_devices1_Discrete_customR_3_zeta_0_sum_cost_1_cost_ESU_1_3000000.zip'

df, actions, env = test_model('ppo_mask', model_path=path, discrete=True, custom_reward=3, sum_cost=True,
                solar=False, cost_ESU=True, normalize=True, zeta=1, total_timesteps=None)

Case 0: 0
INIT ENV:
ACTION SPACE: Discrete
Use of custom reward: 3
    zeta: 0
Observations kept:
    0: month
    1: day_type
    2: hour
    3: outdoor_dry_bulb_temperature
    22: non_shiftable_load
    27: net_electricity_consumption
    26: electrical_storage_soc
Observations ADDED:
    sum_cost: carbon_intensity + electricity_pricing
    cost_ESU: see Device.loss
<ActionMasker<EnvCityGym instance>>

List of different actions taken:
 0.0    5771
 0.1    1178
-0.1     875
 0.2     868
-0.3      32
 0.4      26
 0.5       9
Name: action, dtype: int64
Building_1 : [{'carbon_emissions': 1.007, 'cost': 0.979, 'electricity_consumption': 1.007, 'zero_net_energy': 1.006}]
District : [{'1 - load_factor': 1.035, 'average_daily_peak': 1.075, 'carbon_emissions': 1.007, 'cost': 0.979, 'electricity_consumption': 1.007, 'peak_demand': 1.082, 'ramping': 1.165, 'zero_net_energy': 1.006}]



In [None]:
# test min max normalization for soc, net, nsl (we know the maximum bound before hand, keeps between 0-1 which is coherent
# (battery soc already between 0-1)) and keep norm for other                       #5


# use future values to see impact of delays on the result                           #last -1

#re test different values of alpha when using obs normalization                     #4

#augmenter le poid du carbon dans le cout                                           #last -2

#test more baselines 1) noop 2) random 3) same algo no modifications (base ppo)     #1

#episodic analysis of resulting policy, add to paper                                #2

#test reward scaling or normalization (last)                                        #3

#add a section in experiment 
# 2 scenario: 1battery, 1battery w/solar                                            #same as 2
#analysis of behavior, yearly and zoom in, explain part where no or little decisions are taken 

#test solutions w/hyperparameter tuning                                             #last

In [23]:
1.007+0.979

1.9859999999999998

### Plot results of single test

In [22]:
fig_plot(df, day_mark=False, show=False, write='test_normalize2.html')

writing to test_normalize.html


### Run batch of models

Prints:
  * name of model
  * config of the env
  * list and number of different actions taken by env
  * list of metrics w/ associated costA

In [37]:
weights_folder = './weights_r3.1/'

for i in os.listdir(weights_folder):
    print(i)
    if 'mask' in i:
        model_name = 'ppo_mask'

        df, actions, env = test_model(model_name, model_path=weights_folder+i, discrete=True, custom_reward=3, sum_cost=True,
                    cost_ESU=True, solar=False, zeta=0, total_timesteps=None)
        
        #print fig here
                    

ppo_mask_1building_devices1_Discrete_customR_3_zeta_0.01_sum_cost_1_cost_ESU_1_3000000.zip
Case 0: 0
INIT ENV:
ACTION SPACE: Discrete
Use of custom reward: 3
    zeta: 0
Observations kept:
    0: month
    1: day_type
    2: hour
    3: outdoor_dry_bulb_temperature
    22: non_shiftable_load
    27: net_electricity_consumption
    26: electrical_storage_soc
Observations ADDED:
    sum_cost: carbon_intensity + electricity_pricing
    cost_ESU: see Device.loss
<ActionMasker<EnvCityGym instance>>

List of different actions taken:
0    8759
Name: action, dtype: int64
Building_1 : [{'carbon_emissions': 1.0, 'cost': 1.0, 'electricity_consumption': 1.0, 'zero_net_energy': 1.0}]
District : [{'1 - load_factor': 1.0, 'average_daily_peak': 1.0, 'carbon_emissions': 1.0, 'cost': 1.0, 'electricity_consumption': 1.0, 'peak_demand': 1.0, 'ramping': 1.0, 'zero_net_energy': 1.0}]

ppo_mask_1building_devices1_Discrete_customR_3_zeta_0.02_sum_cost_1_cost_ESU_1_3000000.zip
Case 0: 0
INIT ENV:
ACTION SPACE:

# Data Analysis

In [32]:
def data_solar(env):
    df_solar = pd.DataFrame()
    for i,b in enumerate(env.buildings):
        solar = b.energy_simulation.solar_generation
        solar = b.pv.get_generation(solar)
        df_solar['building_'+str(i)] = solar
    return df_solar

def data_nsl(env):
    df = pd.DataFrame()
    for i,b in enumerate(env.buildings):
        x = b.energy_simulation.non_shiftable_load
        df['building_'+str(i)] = x
    return df

def data_cool(env):
    df = pd.DataFrame()
    for i,b in enumerate(env.buildings):
        x = b.energy_simulation.cooling_demand
        df['building_'+str(i)] = x
    return df

def data_dhw(env):
    df = pd.DataFrame()
    for i,b in enumerate(env.buildings):
        x = b.energy_simulation.dhw_demand
        df['building_'+str(i)] = x
    return df

def data_cost(env):
    #cost is same for each b
    df_cost = pd.DataFrame()
    price = env.buildings[0].pricing.electricity_pricing
    # price = pd.read_csv('./pricing.csv')['Electricity Pricing [$]']*100
    carbon = env.buildings[0].carbon_intensity.carbon_intensity

    df_cost['Cost sum(emission,price)x50'] = (price+carbon)*50
    df_cost['Cost price x100'] = price*100
    df_cost['Cost carbon x100'] = carbon*100

    return df_cost

# Look at replay buffer actions during training

Todo