In [61]:
import pd as pd

from traineval.train_eval import TrainerEvaluator
from tqdm import tqdm
from traineval.utils.convert_arguments import get_environment_arguments
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
import os.path as osp
from traineval.training.spinningup import data as saved_models
%matplotlib inline

In [None]:
def create_model_env_arguments(model_type, number_of_epochs, model_seed, save_freq):

    # The arguments you want the agent to use
    district_args = ["hour",
                     "month",
                     "carbon_intensity",
                     "electricity_pricing",
                     "outdoor_dry_bulb_temperature_predicted_6h",
                     "outdoor_relative_humidity_predicted_6h"]

    building_args = ["non_shiftable_load",
                     "solar_generation",
                     "electrical_storage_soc",
                     "net_electricity_consumption"]

    environment_arguments = get_environment_arguments(district_args, building_args)

    model_args = [
        [['--env'], str, 'Epoch-Citylearn-v1'],
        [['--hid'], int, 64],
        [['--l'], int, 2],
        [['--gamma'], float, 0.99],
        [['--seed', '-s'], int, model_seed],
        [['--cpu'], int, 4],
        [['--steps'], int, 4000],
        [['--epochs'], int, number_of_epochs],
        [['--exp_name'], str, model_type],
        [['--save_freq'], int, save_freq],
        ]

    return model_args, environment_arguments

In [75]:
def get_training_times(model_type, model_seed):
    progress_path = model_type + '/' + model_type + '_s' + str(model_seed) + '/progress.txt'
    full_path = osp.join(osp.dirname(saved_models.__file__), progress_path)

    return list(pd.read_table(full_path))


In [76]:
def get_evaluation_data(trainer_evaluator, model_type, model_seed, num_epochs, eval_freq):

    averaged_scores = []
    for epoch in tqdm(range(0, num_epochs, eval_freq)):

        # TODO: make this run in parallel
        averaged_score, agent_time = trainer_evaluator.run_evaluation(model_type=model_type, model_seed=model_seed, model_iteration=str(epoch), verbose=False)
        averaged_scores.append(averaged_score)
        if epoch + eval_freq >= num_epochs and epoch != num_epochs - 1:
            averaged_score, agent_time = trainer_evaluator.run_evaluation(model_type=model_type, model_seed=model_seed, model_iteration=str(epoch), verbose=False)
            averaged_scores.append(averaged_score)
    return pd.DataFrame({"averages":averaged_scores, "times": get_training_times(model_type, model_seed)})


In [77]:
def plot_and_save(title, xs, ys, xlabel, ylabel):
    fig,ax=plt.subplots()
    ax.plot(xs, ys, marker="o")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    # ax.plot(gapminder_us.year, gapminder_us["gdpPercap"], marker="o")
    plt.savefig(title)
    plt.show()

In [47]:
number_of_epochs = 720
model_type = "ppo"
model_args, environment_arguments = create_model_env_arguments(model_type=number_of_epochs, number_of_epochs=model_type, model_seed=0, save_freq=20)

trainer_evaluator = TrainerEvaluator(model_args=model_args, environment_arguments=environment_arguments)
df_costs_times = get_evaluation_data(trainer_evaluator=trainer_evaluator, model_type=model_type, model_seed=0, num_epochs=number_of_epochs, eval_freq=20)

ppo_untuned_title = str.strip(f"{model_type}-{number_of_epochs}epochs")
plot_and_save(title=ppo_untuned_title, xs=df_costs_times.times, ys=df_costs_times.averages, xlabel="Time spent (s)", ylabel="Average cost")

100%|██████████| 2/2 [00:50<00:00, 25.35s/it]


In [None]:
number_of_epochs = 1240
model_type = "ppo"
model_args, environment_arguments = create_model_env_arguments(model_type=model_type, number_of_epochs=1240, model_seed=0, save_freq=20)

trainer_evaluator = TrainerEvaluator(model_args=model_args, environment_arguments=environment_arguments)
df_costs_times = get_evaluation_data(trainer_evaluator=trainer_evaluator, model_type=model_type, model_seed=0, num_epochs=number_of_epochs, eval_freq=20)

ppo_untuned_title = str.strip(f"{model_type}-{number_of_epochs}epochs")
plot_and_save(title=ppo_untuned_title, xs=df_costs_times.times, ys=df_costs_times.averages, xlabel="Time spent training (s)", ylabel="Average cost")