<a href="https://colab.research.google.com/github/QasimWani/ROLEVT/blob/dev/colab_implicit_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
!ls

gdrive	sample_data


In [3]:
import os
root = '/content'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]

if 'ROLEVT' not in dirlist:
  !git clone https://username:password@github.com/QasimWani/ROLEVT.git #SSH

  # !git clone https://<username>:<token>@github.com/QasimWani/ROLEVT.git #GitHub Token

Cloning into 'ROLEVT'...
remote: Enumerating objects: 134, done.[K
remote: Counting objects: 100% (134/134), done.[K
remote: Compressing objects: 100% (115/115), done.[K
remote: Total 134 (delta 27), reused 113 (delta 16), pack-reused 0[K
Receiving objects: 100% (134/134), 11.39 MiB | 10.89 MiB/s, done.
Resolving deltas: 100% (27/27), done.


In [4]:
%cd ROLEVT/

/content/ROLEVT


In [5]:
!ls

agent.py			   data			 main.ipynb
agents				   energy_models.py	 main.py
buildings_state_action_space.json  examples		 README.md
citylearn.py			   images		 reward_function.py
colab_implicit_agent.ipynb	   implicit_agent.ipynb  src
common				   LICENSE		 submission_files


In [6]:
#comment this out if no changes have been made. 
!git pull origin main # replace 'main' with 'dev' to pull another branch

From https://github.com/QasimWani/ROLEVT
 * branch            main       -> FETCH_HEAD
Already up to date.


In [7]:
!ls

agent.py			   data			 main.ipynb
agents				   energy_models.py	 main.py
buildings_state_action_space.json  examples		 README.md
citylearn.py			   images		 reward_function.py
colab_implicit_agent.ipynb	   implicit_agent.ipynb  src
common				   LICENSE		 submission_files


In [8]:
# To run this example, move this file to the main directory of this repository
from citylearn import  CityLearn
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
import numpy as np
import cvxpy as cp
import json
import time
from agents.rbc import RBC
from copy import deepcopy


import warnings
warnings.filterwarnings("ignore")

In [9]:
# Select the climate zone and load environment
climate_zone = 5
sim_period = (0, 8760*4-1)
params = {'data_path':Path("data/Climate_Zone_"+str(climate_zone)), 
        'building_attributes':'building_attributes.json', 
        'weather_file':'weather_data.csv', 
        'solar_profile':'solar_generation_1kW.csv', 
        'carbon_intensity':'carbon_intensity.csv',
        'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
        'buildings_states_actions':'buildings_state_action_space.json', 
        'simulation_period': sim_period, 
        'cost_function': ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','carbon_emissions'], 
        'central_agent': False,
        'save_memory': False }

env = CityLearn(**params)

observations_spaces, actions_spaces = env.get_state_action_spaces()

## Load Data

In [23]:
def get_current_data(env, t):
    """ Returns data:dic for each building from `env` for `t` timestep """
    
    ### FB - Full batch. Trim output X[:time-step]
    ### CT - current timestep only. X = full_data[time-step], no access to full_data
    ### DP - dynamic update. time-step k = [... k], time-step k+n = [... k + n].
    ### P - constant value across all time steps. changes per building only.
    
    observation_data = {}
    
    p_ele = [1 for i in range(1, 10)] #P (v.3 -> FB) #virtual electricity price.
    E_grid_past = [env.net_electric_consumption[-1] for i in range(1, 10)] #FB -- replace w/ per building cost
    
    # Loads
    E_ns = [env.buildings['Building_'+str(i)].get_non_shiftable_load() for i in range(1, 10)] #CT
    H_bd = [env.buildings['Building_'+str(i)].dhw_heating_device_to_building[-1] for i in range(1, 10)] #DP
    C_bd = [env.buildings['Building_'+str(i)].cooling_device_to_building[-1] for i in range(1, 10)] #DP
    
    # PV generations
    E_pv = [env.buildings['Building_'+str(i)].get_solar_power() for i in range(1, 10)] #CT
    
    # Heat Pump
    COP_C = [env.buildings['Building_'+str(i)].cooling_device.cop_cooling[-1] for i in range(1, 10)] #DP
    E_hpC_max = [env.buildings['Building_'+str(i)].cooling_device.get_max_cooling_power() for i in range(1, 10)] #P

    # Electric Heater
    eta_ehH = [env.buildings['Building_'+str(i)].dhw_heating_device.efficiency for i in range(1, 10)] #P
    #replaced capacity (not avaiable in electric heater) w/ nominal_power
    E_ehH_max = [env.buildings['Building_'+str(i)].dhw_heating_device.get_max_heating_power() for i in range(1, 10)] #P
    
    # Battery
    C_f_bat = [env.buildings['Building_'+str(i)].electrical_storage.capacity_loss_coef for i in range(1, 10)] #P
    C_p_bat = [env.buildings['Building_'+str(i)].electrical_storage.capacity for i in range(1, 10)] #P
    eta_bat = [env.buildings['Building_'+str(i)].electrical_storage.efficiency for i in range(1, 10)]#P
    #ending soc. right now, notes indicate 0.1 but rn it's true value 
    c_bat_end = [env.buildings['Building_'+str(i)].electrical_storage.soc[-1] / C_p_bat[i-1] for i in range(1, 10)] #DP
    
    # Heat (Energy->dhw) Storage
    C_f_Hsto = [env.buildings['Building_'+str(i)].dhw_storage.loss_coef for i in range(1, 10)] #P
    C_p_Hsto = [env.buildings['Building_'+str(i)].dhw_storage.capacity for i in range(1, 10)] #P
    eta_Hsto = [env.buildings['Building_'+str(i)].dhw_storage.efficiency for i in range(1, 10)] #P
    #ending soc. right now, notes indicate 0.1 but rn it's true value 
    c_Hsto_end = [env.buildings['Building_'+str(i)].dhw_storage.soc[-1] / C_p_Hsto[i-1] for i in range(1, 10)] #DP
        
    # Cooling (Energy->cooling) Storage
    C_f_Csto = [env.buildings['Building_'+str(i)].cooling_storage.loss_coef for i in range(1, 10)] #P
    C_p_Csto = [env.buildings['Building_'+str(i)].cooling_storage.capacity for i in range(1, 10)] #P
    eta_Csto = [env.buildings['Building_'+str(i)].cooling_storage.efficiency for i in range(1, 10)] #P
    #ending soc. right now, notes indicate 0.1 but rn it's true value 
    c_Csto_end = [env.buildings['Building_'+str(i)].cooling_storage.soc[-1] / C_p_Csto[i-1] for i in range(1, 10)] #DP
    
    
    # fill data
    observation_data['p_ele'] = p_ele
    observation_data['E_grid_past'] = E_grid_past
    
    observation_data['E_ns'] = E_ns
    observation_data['H_bd'] = H_bd
    observation_data['C_bd'] = C_bd
    
    observation_data['E_pv'] = E_pv
    
    observation_data['COP_C'] = COP_C
    observation_data['E_hpC_max'] = E_hpC_max
    
    observation_data['eta_ehH'] = eta_ehH
    observation_data['E_ehH_max'] = E_ehH_max
    
    observation_data['C_f_bat'] = C_f_bat
    observation_data['C_p_bat'] = C_p_bat
    observation_data['eta_bat'] = eta_bat
    observation_data['c_bat_end'] = c_bat_end
    
    observation_data['C_f_Hsto'] = C_f_Hsto
    observation_data['C_p_Hsto'] = C_p_Hsto
    observation_data['eta_Hsto'] = eta_Hsto
    observation_data['c_Hsto_end'] = c_Hsto_end
    
    observation_data['C_f_Csto'] = C_f_Csto
    observation_data['C_p_Csto'] = C_p_Csto
    observation_data['eta_Csto'] = eta_Csto
    observation_data['c_Csto_end'] = c_Csto_end
    
    return observation_data

In [11]:
def parse_data(data:dict, current_data:dict):
    """ Parses `current_data` for optimization and loads into `data` """
    assert len(current_data) == 22, "Invalid number of parameters. Can't run basic (root) agent optimization"
    
    for key, value in current_data.items():
        if key not in data:
            data[key] = []
        data[key].append(value)
        
    for key, value in current_data.items():
        if np.array(data[key]).shape == (1, 9):#removes duplicates
            data[key] = [value]
    return data

In [12]:
def get_dimensions(data:dict):
    """ Gets shape of each param """
    for key in data.keys():
        print(data[key].shape)

In [13]:
def get_building(data:dict, building_id:int):
    """ Loads data (dict) from a particular building """
    building_data = {}
    for key in data.keys():
        building_data[key] = data[key][building_id] if len(data[key].shape) == 2 else data[key]
    return building_data

## Optimization

In [14]:
def convert_to_numpy(params:dict):
    """ Converts dic[key] to nd.array """
    for key in params:
        if key == 'c_bat_end' or key == 'c_Csto_end' or key == 'c_Hsto_end':
            params[key] = np.array([params[key][0], params[key][-1]])
        else:
            params[key] = np.array(params[key])

In [15]:
def create_random_data(data:dict):
    """ Creates random data drawn from Gaussian. """
    for key in data:
        data[key] = np.random.random(size=data[key].shape)
    return data

In [31]:
def get_actions(T, data:dict, t, debug=False, apply_seed=False):
    """ Runs Optim for all 9 buildings per hour. """
    convert_to_numpy(data)
    data = create_random_data(deepcopy(data)) if apply_seed else deepcopy(data)
    if debug:
        return [Optim(T, t, data, i) for i in range(9)]
    
    return [Optim(T, t, data, i).solve(actions_spaces[i].shape[0], debug) for i in range(9)]

In [38]:
class Optim:
    """ Define Differential Optimization framework for CL. """
    def __init__(self, T:int, t:int, parameters:dict, building_id:int):
        """ 
        @Param:
        - `parameters` : data (dict) from r <= t <= T following `get_current_data` format.
        - `T` : 24 hours (constant)
        - `t` : hour to solve optimization for.
        - `
        Solves per building as specified by `building_id`. Note: 0 based.
        """
        window = T - t
        self.constraints = []
        self.costs = []
        self.t = t
        
        #define parameters and variables
        
        ### --- Parameters ---
        p_ele = cp.Parameter(name='p_ele', shape=(window), value=parameters['p_ele'][t:T, building_id])
        E_grid_past = cp.Parameter(name='E_grid_past', shape=(t), value=parameters['E_grid_past'][:t, building_id])
        
        # Loads
        E_ns = cp.Parameter(name='E_ns', shape=(window), value=parameters['E_ns'][t:T, building_id])
        H_bd = cp.Parameter(name='H_bd', shape=(window), value=parameters['H_bd'][t:T, building_id])
        C_bd = cp.Parameter(name='C_bd', shape=(window), value=parameters['C_bd'][t:T, building_id])
        
        # PV generations
        E_pv = cp.Parameter(name='E_pv', shape=(window), value=parameters['E_pv'][t:T, building_id])

        # Heat Pump
        COP_C = cp.Parameter(name='COP_C', shape=(window), value=parameters['COP_C'][t:T, building_id])
        E_hpC_max = cp.Parameter(name='E_hpC_max', value=parameters['E_hpC_max'][t, building_id])

        # Electric Heater
        eta_ehH = cp.Parameter(name='eta_ehH', value=parameters['eta_ehH'][t, building_id])
        E_ehH_max = cp.Parameter(name='E_ehH_max', value=parameters['E_ehH_max'][t, building_id])

        # Battery
        C_f_bat = cp.Parameter(name='C_f_bat', value=parameters['C_f_bat'][t, building_id])
        C_p_bat = cp.Parameter(name='C_p_bat', value=parameters['C_p_bat'][t, building_id])
        eta_bat = cp.Parameter(name='eta_bat', value=parameters['eta_bat'][t, building_id])
        c_bat_end = cp.Parameter(name='c_bat_end', shape=(2), value=parameters['c_bat_end'][:, building_id])

        # Heat (Energy->dhw) Storage
        C_f_Hsto = cp.Parameter(name='C_f_Hsto', value=parameters['C_f_Hsto'][t, building_id])
        C_p_Hsto = cp.Parameter(name='C_p_Hsto', value=parameters['C_p_Hsto'][t, building_id])
        eta_Hsto = cp.Parameter(name='eta_Hsto', value=parameters['eta_Hsto'][t, building_id])
        c_Hsto_end = cp.Parameter(name='c_Hsto_end', shape=(2), value=parameters['c_Hsto_end'][:, building_id])

        # Cooling (Energy->cooling) Storage
        C_f_Csto = cp.Parameter(name='C_f_Csto', value=parameters['C_f_Csto'][t, building_id])
        C_p_Csto = cp.Parameter(name='C_p_Csto', value=parameters['C_p_Csto'][t, building_id])
        eta_Csto = cp.Parameter(name='eta_Csto', value=parameters['eta_Csto'][t, building_id])
        c_Csto_end = cp.Parameter(name='c_Csto_end', shape=(2), value=parameters['c_Csto_end'][:, building_id])
        
        ### constants
        E_grid_pkhist = cp.max(E_grid_past) #maximum net electricity in the past 1 <= r < t
        E_grid_prevhour = E_grid_past[-1] #net electricity in the previous hour
        
        ### --- Variables ---
        E_grid = cp.Variable(name='E_grid', shape=(window)) #net electricity grid
        E_hpC = cp.Variable(name='E_hpC', shape=(window)) #heat pump
        E_ehH = cp.Variable(name='E_ehH', shape=(window)) #electric heater
        
        SOC_bat = cp.Variable(name='SOC_bat', shape=(window)) #electric battery
        action_bat = cp.Variable(name='action_bat', shape=(window)) #electric battery
    
        SOC_H = cp.Variable(name='SOC_H', shape=(window)) #heat storage
        action_H = cp.Variable(name='action_H', shape=(window)) #heat storage
        
        SOC_C = cp.Variable(name='SOC_C', shape=(window)) #cooling storage
        action_C = cp.Variable(name='action_C', shape=(window)) #cooling storage
        
        
        ### objective function
        ramping_cost = cp.abs(E_grid[0] - E_grid_prevhour) + cp.sum(cp.abs(E_grid[1:] - E_grid[:-1])) # E_grid_t+1 - E_grid_t
        peak_net_electricity_cost = cp.max(cp.atoms.affine.hstack.hstack([*E_grid, E_grid_pkhist])) #max(E_grid, E_gridpkhist)
        electricity_cost = cp.sum(p_ele * E_grid)
        self.costs.append(ramping_cost + peak_net_electricity_cost + electricity_cost)
        
        ### constraints
        self.constraints.append( E_grid >= 0 )
        
        #energy balance constraints
        self.constraints.append( E_pv + E_grid == E_ns + E_hpC + E_ehH + action_bat * C_p_bat) #electricity balance
        self.constraints.append( E_ehH * eta_ehH == action_H * C_p_Hsto + H_bd ) #heat balance
        self.constraints.append( E_hpC * COP_C == action_C * C_p_Csto + C_bd ) #cooling balance
        
        #heat pump constraints
        self.constraints.append( E_hpC >= 0 ) #constraint minimum cooling to positive
        self.constraints.append( E_hpC <= E_hpC_max ) #maximum cooling
        #electric heater constraints
        self.constraints.append( E_ehH >= 0 ) #constraint to PD
        self.constraints.append( E_ehH <= E_ehH_max ) #maximum limit
        
        #electric battery constraints
        self.constraints.append( SOC_bat[0] == (1 - C_f_bat)*c_bat_end[0] + action_bat[0]*eta_bat ) #initial SOC
        #soc updates
        for i in range(1, window): #1 = t + 1
            self.constraints.append( SOC_bat[i] == (1 - C_f_bat)*SOC_bat[i - 1] + action_bat[i]*eta_bat)
        self.constraints.append( SOC_bat[-1] == c_bat_end[-1] ) #soc terminal condition
        self.constraints.append(SOC_bat >= 0) #battery SOC bounds
        self.constraints.append(SOC_bat <= 1) #battery SOC bounds
        
        #Heat Storage constraints
        self.constraints.append( SOC_H[0] == (1 - C_f_Hsto) * c_Hsto_end[0] + action_H[0]*eta_Hsto ) #initial SOC
        #soc updates
        for i in range(1, window):
            self.constraints.append( SOC_H[i] == (1 - C_f_Hsto)*SOC_H[i - 1] + action_H[i]*eta_Hsto)
        self.constraints.append( SOC_H[-1] == c_Hsto_end[-1] ) #soc terminal condition
        self.constraints.append(SOC_H >= 0) #battery SOC bounds
        self.constraints.append(SOC_H <= 1) #battery SOC bounds
        
        #Cooling Storage constraints
        self.constraints.append( SOC_C[0] == (1 - C_f_Csto) * c_Csto_end[0] + action_C[0]*eta_Csto ) #initial SOC
        #soc updates
        for i in range(1, window):
            self.constraints.append( SOC_C[i] == (1 - C_f_Csto)*SOC_C[i - 1] + action_C[i]*eta_Csto)
        self.constraints.append( SOC_C[-1] == c_Csto_end[-1] ) #soc terminal condition
        self.constraints.append(SOC_C >= 0) #battery SOC bounds
        self.constraints.append(SOC_C <= 1) #battery SOC bounds
        
    def solve(self, num_actions, debug=False):
        # Form objective.
        obj = cp.Minimize(*self.costs)
        # Form and solve problem.
        prob = cp.Problem(obj, self.constraints)
        status = prob.solve(verbose=debug)  # Returns the optimal value.

        if float('-inf') < status < float('inf'):
            pass
        else:
            return "Unbounded Solution"
            
        actions = {}
        for var in prob.variables():
            actions[var.name()] = np.clip(var.value[0], -1, 1)
        
        #Temporary... needs fixing!
        if num_actions == 2:
            return [actions['action_C'], actions['action_bat']]
        return [actions['action_C'], actions['action_H'], actions['action_bat']]

In [39]:
# #optimization debug -> !!! PROBLEM !!!
# debug = get_actions(24, deepcopy(data), 2, True)
# debug[0].solve(3)

## RBC for Data Collection 

In [40]:
def get_idx_hour():
    # Finding which state 
    with open('buildings_state_action_space.json') as file:
        actions_ = json.load(file)

    indx_hour = -1
    for obs_name, selected in list(actions_.values())[0]['states'].items():
        indx_hour += 1
        if obs_name=='hour':
            break
        assert indx_hour < len(list(actions_.values())[0]['states'].items()) - 1, "Please, select hour as a state for Building_1 to run the RBC"
    return indx_hour

In [41]:
def estimate_data(surrogate_env:CityLearn, agent:RBC, state, data, idx_hour:int, t_start:int):
    """ Returns data for hours `t_start` - 24 using `surrogate_env` running RBC `agent` """
    for i in range(t_start%24, 24):
        hour_state = np.array([[state[0][idx_hour]]])
        action = agents.select_action(hour_state) #using RBC to select next action given current sate
        next_state, rewards, done, _ = surrogate_env.step(action)
        state = next_state
        
        data = parse_data(data, get_current_data(surrogate_env, t_start + i))
    return data

In [42]:
# data = [[] for _ in range(28) ] #loads all data for 10 params
data = {}

state = env.reset() #states/building
done = False
t_idx = 0
rbc_threshold = 168 #run RBC 1st week of every 2 months

total_rewards = [] #reward for each building

agents = RBC(actions_spaces)
indx_hour = get_idx_hour()

start = time.time()

#run agent
while not done and t_idx < 168:

    hour_state = np.array([[state[0][indx_hour]]])

    if t_idx%24 == 0 and t_idx > 0: #reset values every day
        #data = [[] for _ in range(28) ] #loads all data for 10 params
        data = {}
        #env = reset_soc(env)
            
    if t_idx%1460 < rbc_threshold: #run RBC 1st week of every 2 months
        action = agents.select_action(hour_state)
        next_state, rewards, done, _ = env.step(action)
    else:
        #running optimization per 24 hours.
        data_est = estimate_data(deepcopy(env), agents, state, deepcopy(data), indx_hour, t_idx) #adaptive-perhour
        action = get_actions(24, data_est, t_idx % 24 + 1) #runs optimization per hour.
        next_state, rewards, done, _ = env.step(action)
        
    state = next_state
        
    data = parse_data(data, get_current_data(env, t_idx))
    total_rewards.append(rewards)
    
    t_idx += 1
    
    print(f"Time step: {t_idx}", end='\r', flush=True)
    
end = time.time()
print(f"Total time = {end - start}")

Total time = 1.2772510051727295


In [45]:
#optimization debug -> !!! PROBLEM !!!

### Works on 17 hour
debug = get_actions(24, data, 17, debug=False, apply_seed=False)
debug

[[0.0009284330303495561, -0.15793337858395598, -0.025620924021989115],
 [0.0009284310375570228, -0.033608915779687734, -0.016569332998565618],
 [0.0009284293404431489, -0.0729475817538694],
 [0.00074571425963897, -0.05305494156930998],
 [0.004117322893092736, -0.007896207319562212, -0.01742859725984626],
 [0.028479982110506116, 8.078480567114637e-11, 0.07588399385379686],
 [0.03512066218711012, 5.97352113291494e-10, 0.08106493454872675],
 [0.0009284276018374838, -0.0074469445869528245, 0.035750720052539826],
 [0.0009284221604420308, 5.705861594214342e-10, 0.085223666119832]]

In [46]:
#optimization debug -> !!! PROBLEM !!!

#Doesn't work on 18 hour
debug = get_actions(24, data, 18, debug=False, apply_seed=False)
debug

[[0.00010083193599768471, -0.15764783311874475, -0.004103709807549032],
 [0.00010082914719196349, -0.0299007068335152, -0.011760433358639329],
 [0.00010082878000472553, -0.06165761911666732],
 'Unbounded Solution',
 [0.0038333199167329838, 1.1627101579782754e-11, -0.04790475850068577],
 'Unbounded Solution',
 [0.04011276633874864, -0.019539324983547553, 0.051823225344468794],
 [0.00010082675309837736, 6.279384427914433e-11, -0.013568173328040811],
 [0.00010082009104571055, 9.489660982109725e-11, 0.021661836515368866]]

In [None]:
basic_agent_cost = env.cost()

In [None]:
# Plotting 5 days of winter operation of year 1
plt.figure(figsize=(16,5))
interval = range(0,24*5)
plt.plot(env.net_electric_consumption_no_pv_no_storage[interval])
plt.plot(env.net_electric_consumption_no_storage[interval])
plt.plot(env.net_electric_consumption[interval], '--')
plt.xlabel('time (hours)')
plt.ylabel('kW')
plt.legend(['Electricity demand without storage or generation (kW)', 'Electricity demand with PV generation and without storage(kW)', 'Electricity demand with PV generation and using RBC for storage(kW)'])

In [None]:
# total_rewards = np.array(total_rewards).T
# plt.figure(figsize=(15, 10))
# for i in range(9):
#     plt.plot(total_rewards[i], label='Building_'+str(i))

# plt.xlabel('Hour')
# plt.ylabel('Reward')
# plt.legend()
# plt.show()
# print(total_rewards.sum(1)) #total reward per building