In [3]:
from citylearn.agents.sac import SAC as RLAgent
from citylearn.citylearn import CityLearnEnv, EvaluationCondition
import os
from citylearn.utilities import read_json

In [4]:
schema_filepath = '/home/amalnamm/work/Neurips CityLearn/GIT/citylearn-2023-starter-kit/data/schemas/warm_up/schema.json'
schema = read_json(schema_filepath)
schema['root_directory'] = '/home/amalnamm/work/Neurips CityLearn/GIT/citylearn-2023-starter-kit/data/schemas/warm_up'
env = CityLearnEnv(schema)

In [5]:
#env = CityLearnEnv(schema, central_agent=False)
model = RLAgent(env)

In [8]:
model.learn(episodes=2, deterministic_finish=True)

In [9]:
metrics_df = env.evaluate_citylearn_challenge()

In [10]:
metrics_df

{'carbon_emissions_total': {'display_name': 'Carbon emissions',
  'weight': 0.1,
  'value': 0.9516948510682978},
 'discomfort_proportion': {'display_name': 'Unmet hours',
  'weight': 0.3,
  'value': 0.7042784847097007},
 'ramping_average': {'display_name': 'Ramping',
  'weight': 0.075,
  'value': 0.8820170204710601},
 'daily_one_minus_load_factor_average': {'display_name': 'Load factor',
  'weight': 0.075,
  'value': 0.9432779693167079},
 'daily_peak_average': {'display_name': 'Daily peak',
  'weight': 0.075,
  'value': 0.9286474542897546},
 'annual_peak_average': {'display_name': 'All-time peak',
  'weight': 0.075,
  'value': 0.9484990488028759},
 'one_minus_thermal_resilience_proportion': {'display_name': 'Thermal resilience',
  'weight': 0.15,
  'value': 0.6380952380952382},
 'power_outage_normalized_unserved_energy_total': {'display_name': 'Unserved energy',
  'weight': 0.15,
  'value': 0.624421237790722},
 'average_score': {'display_name': 'Score',
  'weight': None,
  'value': 0.7

In [11]:
class WrapperEnv:
    """
    Env to wrap provide Citylearn Env data without providing full env
    Preventing attribute access outside of the available functions
    """
    def __init__(self, env_data):
        self.observation_names = env_data['observation_names']
        self.action_names = env_data['action_names']
        self.observation_space = env_data['observation_space']
        self.action_space = env_data['action_space']
        self.time_steps = env_data['time_steps']
        self.seconds_per_time_step = env_data['seconds_per_time_step']
        self.random_seed = env_data['random_seed']
        self.buildings_metadata = env_data['buildings_metadata']
        self.episode_tracker = env_data['episode_tracker']
    
    def get_metadata(self):
        return {'buildings': self.buildings_metadata}

In [17]:
from rewards.user_reward import SubmissionReward


In [20]:
def create_citylearn_env(schema, reward_function):
    env = CityLearnEnv(schema, reward_function=reward_function,central_agent=False)

    env_data = dict(
        observation_names = env.observation_names,
        action_names = env.action_names,
        observation_space = env.observation_space,
        action_space = env.action_space,
        time_steps = env.time_steps,
        random_seed = 1234,
        episode_tracker = None,
        seconds_per_time_step = None,
        buildings_metadata = env.get_metadata()['buildings']
    )

    wrapper_env = WrapperEnv(env_data)
    return env, wrapper_env

In [21]:
env, wrapper_env = create_citylearn_env(schema, SubmissionReward)

In [25]:
agent = RLAgent(env)


In [26]:
agent.learn(episodes=2, deterministic_finish=True)

In [27]:
metrics_df = env.evaluate_citylearn_challenge()


In [28]:
metrics_df

{'carbon_emissions_total': {'display_name': 'Carbon emissions',
  'weight': 0.1,
  'value': 0.9524949372122796},
 'discomfort_proportion': {'display_name': 'Unmet hours',
  'weight': 0.3,
  'value': 0.7072975069995903},
 'ramping_average': {'display_name': 'Ramping',
  'weight': 0.075,
  'value': 0.8822691302350596},
 'daily_one_minus_load_factor_average': {'display_name': 'Load factor',
  'weight': 0.075,
  'value': 0.9432832179231083},
 'daily_peak_average': {'display_name': 'Daily peak',
  'weight': 0.075,
  'value': 0.9289293527981298},
 'annual_peak_average': {'display_name': 'All-time peak',
  'weight': 0.075,
  'value': 0.9481310397646742},
 'one_minus_thermal_resilience_proportion': {'display_name': 'Thermal resilience',
  'weight': 0.15,
  'value': 0.6380952380952382},
 'power_outage_normalized_unserved_energy_total': {'display_name': 'Unserved energy',
  'weight': 0.15,
  'value': 0.624879498221464},
 'average_score': {'display_name': 'Score',
  'weight': None,
  'value': 0.7

In [32]:
observations = env.reset()

In [33]:
observations

[[5,
  1,
  24.66,
  24.910639,
  38.41596,
  27.611464,
  0.0,
  54.625927,
  116.84289,
  0.0,
  0.0,
  143.32434,
  1020.7561,
  0.0,
  0.40248835,
  23.098652,
  0.35683933,
  0.0,
  0.0,
  0.2,
  0.67788136,
  0.02893,
  0.02893,
  0.02915,
  0.02893,
  1.1192156,
  0.055682074,
  3.0,
  23.222221,
  0],
 [5,
  1,
  24.66,
  24.910639,
  38.41596,
  27.611464,
  0.0,
  54.625927,
  116.84289,
  0.0,
  0.0,
  143.32434,
  1020.7561,
  0.0,
  0.40248835,
  24.278513,
  0.18733284,
  0.0,
  0.0,
  0.2,
  0.18733284,
  0.02893,
  0.02893,
  0.02915,
  0.02893,
  0.0,
  0.0,
  1.0,
  24.444445,
  0],
 [5,
  1,
  24.66,
  24.910639,
  38.41596,
  27.611464,
  0.0,
  54.625927,
  116.84289,
  0.0,
  0.0,
  143.32434,
  1020.7561,
  0.0,
  0.40248835,
  24.431562,
  0.4220805,
  0.0,
  0.0,
  0.2,
  0.5631514,
  0.02893,
  0.02893,
  0.02915,
  0.02893,
  0.5579055,
  0.0,
  2.0,
  24.444445,
  0]]

In [34]:
agent.predict(observations)

[array([-0.2366574 , -0.1068032 ,  0.04216267], dtype=float32),
 array([-0.3589483 ,  0.26893902,  0.36544722], dtype=float32),
 array([ 0.36875746, -0.1719276 ,  0.03730884], dtype=float32)]

In [35]:
# print cost functions at the end of episode
kpis = agent.env.evaluate(baseline_condition=EvaluationCondition.WITHOUT_STORAGE_BUT_WITH_PARTIAL_LOAD_AND_PV)
kpis = kpis.pivot(index='cost_function', columns='name', values='value')
kpis = kpis.dropna(how='all')
display(kpis)

ZeroDivisionError: float division by zero

In [None]:
agent.learn(episodes=30, deterministic_finish=True)