In [7]:
%load_ext autoreload
%autoreload 2
import ipywidgets as widgets
from ipywidgets import interact, interactive

from citylearn.citylearn import CityLearnEnv
import time, random, typing, cProfile, traceback
import numpy as np
from common.initialization_methods import *
from visualiser.frame_cache import *

class Constants:
    episodes = 3
    schema_path = './data/citylearn_challenge_2022_phase_1/schema.json'
    steps_per_frame_save = 1

from agents.agents import ddpg
from agents.networks import central_critic, comm_net
from agents.features import *
from rewards import get_reward, rewards

from agents.orderenforcingwrapper import OrderEnforcingAgent

agent_wrapper = OrderEnforcingAgent(agent = ddpg.DDPGAgent(
    actor = comm_net.CommNet,
    critic = central_critic.CentralCritic,
    actor_feature=BaseFeatureEngineer(),
    critic_feature=CentralCriticEngineer(BaseFeatureEngineer())
))

get_reward.reward_function = rewards.default_reward

def train():
    env = CityLearnEnv(schema=Constants.schema_path)

    obs_dict = env_reset(env)

    agent = agent_wrapper
    agent_time_elapsed = 0

    step_start = time.perf_counter()
    actions = agent.register_reset(obs_dict)
    agent_time_elapsed += time.perf_counter()- step_start

    episodes_completed = 0
    num_steps = 0
    interrupted = False
    episode_metrics = []
    try:
        while True:
            observations, _, done, _ = env.step(actions)
            if done:
                episodes_completed += 1
                metrics_t = env.evaluate()
                metrics = {"price_cost": metrics_t[0], "emmision_cost": metrics_t[1]}
                if np.any(np.isnan(metrics_t)):
                    raise ValueError("Episode metrics are nan, please contant organizers")
                episode_metrics.append(metrics)
                print(f"Episode complete: {episodes_completed} | Latest episode metrics: {metrics}", )

                obs_dict = env_reset(env)

                step_start = time.perf_counter()
                actions = agent.register_reset(obs_dict)
                agent_time_elapsed += time.perf_counter()- step_start
            else:
                step_start = time.perf_counter()
                actions = agent.compute_action(observations)
                agent_time_elapsed += time.perf_counter()- step_start
            
            num_steps += 1
            if num_steps % 1000 == 0:
                print(f"Num Steps: {num_steps}, Num episodes: {episodes_completed}")
            if num_steps % Constants.steps_per_frame_save == 0:
                append_one_frame(env.render())

            if episodes_completed >= Constants.episodes:
                break
    except KeyboardInterrupt:
        print("========================= Stopping Evaluation =========================")
        interrupted = True

    if not interrupted:
        print("=========================Completed=========================")

    if len(episode_metrics) > 0:
        print("Average Price Cost:", np.mean([e['price_cost'] for e in episode_metrics]))
        print("Average Emmision Cost:", np.mean([e['emmision_cost'] for e in episode_metrics]))

        
def display_sequence():
    def _show(also_frame=(0, get_total_frame_number()-1),frame=0):
        return get_image_of_frame_at(also_frame)
    play = widgets.Play(
        value=0,
        min=0,
        max=get_total_frame_number()-1,
        step=1,
        interval=500,
        disabled=False
    )
    slider = widgets.IntSlider(value=0,min=0,max=get_total_frame_number()-1,step=1)
    widgets.jslink((play, 'value'), (slider, 'value'))
    return interact(_show, also_frame=play, frame=slider)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
train()



In [9]:
# TODO: Add more functions
display_sequence()

interactive(children=(Play(value=0, description='also_frame', interval=500, max=40), IntSlider(value=0, descri…

<function __main__.display_sequence.<locals>._show(also_frame=(0, 40), frame=0)>