In [None]:
from src.experiments.experiment_runner import ExperimentRunner
from src.utils.plotting.eval_reward_plotting import read_and_plot_multiple_final_eval_rewards_grouped
from src.utils.match_utils import get_results_by_coeff
from src.utils.match_utils import perform_pairwise_analysis_baseline
from src.utils.match_utils import plot_and_save_rewards_by_coeff
from src.utils.plotting.energy_plots import run_energy_plotting

%matplotlib notebook

In [None]:
def get_experiment_name(common_reward_factor, price_based=False):
    if price_based:
        return "dilemma_price_based_" + str(common_reward_factor).replace(".", "_")
    else:
        return "dilemma_" + str(common_reward_factor).replace(".", "_")

In [None]:
training_params = {
    "lr": 5e-5,
    "gamma": 0.99,
    "train_batch_size": 50000,
    "sgd_minibatch_size": 512,
    "clip_param": 0.15,
    "num_sgd_iter": 10,
    'model': {
        'fcnet_hiddens': [256, 256],
        'fcnet_activation': 'relu',
    },
}

In [None]:
# SSD mitigation with fixed common battery reward factor
common_reward_factor = 0.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
common_reward_factor = 1.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
common_reward_factor = 2.5
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
common_reward_factor = 5.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
common_reward_factor = 10.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
common_reward_factor = 25.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
common_reward_factor = 50.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor))

In [None]:
results_dir = "dilemma_runs"
read_and_plot_multiple_final_eval_rewards_grouped("results/{}/train".format(results_dir), ["dilemma_0_run", "dilemma_1_run", "dilemma_2_5_run", "dilemma_5_run", "dilemma_10_run", "dilemma_25_run", "dilemma_50_run"], ["0", "1", "2.5", "5", "10", "25", "50"], save_path="results/{}/eval_rewards_{}.png".format(results_dir, results_dir), common_battery_reward_mode=True, limit_y=True)

In [None]:
# Plotting rewards
root_dir = 'results/dilemma_runs'

results_by_coeff = get_results_by_coeff(root_dir)

In [None]:
perform_pairwise_analysis_baseline(results_by_coeff)

In [None]:
plot_and_save_rewards_by_coeff(results_by_coeff, plot_type='boxplot', file_name="plots/mitigation_fixed_coefficient_0_5_pv_boxplot.png",
                      title=r"Final Evaluation Costs for Different $\beta_s$ Values, $\eta_{\text{pv}} = 0.5$", xlabel=r'$\beta_s$', sort_numerically=True)

In [None]:
run_energy_plotting("dilemma_runs", group_prefix="dilemma_0_run")

In [None]:
run_energy_plotting("dilemma_runs", group_prefix="dilemma_10_run")

In [None]:
run_energy_plotting("dilemma_runs", group_prefix="dilemma_25_run")

In [None]:
# SSD mitigation with price-based common battery reward factor
common_reward_factor = 0.0
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs_price_based",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        price_based_common_reward_factor=True,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor, True))

In [None]:
common_reward_factor = 0.1
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs_price_based",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        price_based_common_reward_factor=True,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor, True))

In [None]:
# SSD mitigation with price-based common battery reward factor
common_reward_factor = 0.25
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs_price_based",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        price_based_common_reward_factor=True,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor, True))

In [None]:
# SSD mitigation with price-based common battery reward factor
common_reward_factor = 0.5
ssd_mitigation = ExperimentRunner(
        [27, 27, 27],
        results_dir="results/dilemma_runs_price_based",
        pricing_mode="tou",
        plot_rewards=True,
        trading_phases=0,
        pv_efficiency=0.5,
        testing_mode="testing",
        training_params=training_params,
        eval_interval=4,
        log_plotting=False,
        num_cpus=16,
        common_reward_factor=common_reward_factor,
        price_based_common_reward_factor=True,
        common_battery_type="default"
    )

ssd_mitigation.run_multiple_training_runs({'few_months': 10}, get_experiment_name(common_reward_factor, True))

In [None]:
root_dir = 'results/dilemma_price_based'
results_by_coeff = get_results_by_coeff(root_dir)

In [None]:
analysis_results = perform_pairwise_analysis_baseline(results_by_coeff)

In [None]:
plot_and_save_rewards_by_coeff(results_by_coeff, plot_type='boxplot', file_name="plots/mitigation_price_coefficient_0_5_pv_boxplot.png",
                      title=r"Final Evaluation Costs for Different $\lambda_s$ Values, $\eta_{\text{pv}} = 0.5$", xlabel=r'$\lambda_s$')