In [175]:
import pandas as pd

In [176]:
# clean benchmark log
import csv

with open('benchmark_log.csv', 'r') as fin, open('benchmark_log_cleaned.csv', 'w') as fout:
    data = [row[:9] for row in csv.reader(fin)]
    csv.writer(fout).writerows(data)

In [177]:
data = pd.read_csv('benchmark_log_cleaned.csv')

# remove unnecessary columns
data.drop("train_steps", axis=1, inplace=True)

# get total cost in scientific notation
data.drop("total_cost_format", axis=1, inplace=True)
data["total_cost_format"] = data["total_cost"].apply(lambda x: f"{x:.4e}")

# convert microgrid to IDs
unique_microgrids = data["microgrid"].unique()
microgrid_map = {name: i for i, name in enumerate(unique_microgrids)}
data["microgrid"] = data["microgrid"].map(microgrid_map)

data.head()

Unnamed: 0,config_id,total_cost,microgrid,agent,policy_act,policy_net_arch,learning_rate,total_cost_format
0,0,53062.168702,0,SB3Agent DQN,ReLU,"[64, 64]",0.0001,53062.0
1,1,55578.578584,0,SB3Agent DQN,ReLU,"[64, 64]",0.0005,55579.0
2,2,57543.274309,0,SB3Agent DQN,ReLU,"[64, 64]",0.001,57543.0
3,3,55934.748791,0,SB3Agent DQN,ReLU,"[128, 128]",0.0001,55935.0
4,4,84487.924273,0,SB3Agent DQN,ReLU,"[128, 128]",0.0005,84488.0


In [178]:
heuristic_res = [
    item["total_cost"] for item in data[data["agent"] == "BasicAgent heuristics"]\
    .drop_duplicates(subset=["microgrid"])[["total_cost"]]\
    .to_dict(orient="records")
]

data = data[data["agent"] != "BasicAgent heuristics"].copy()

def add_baseline(row):
    return heuristic_res[row["microgrid"]]

def absolute_improvement(row):
    return heuristic_res[row["microgrid"]] - row["total_cost"]

def add_percentage_improvement(row):
    heuristic_cost = heuristic_res[row["microgrid"]]
    return (heuristic_cost - row["total_cost"]) / heuristic_cost * 100

data["baseline"] = data.apply(add_baseline, axis=1)
data["absolute_improvement"] = data.apply(absolute_improvement, axis=1)
data["percentage_improvement"] = data.apply(add_percentage_improvement, axis=1)

data.head()

Unnamed: 0,config_id,total_cost,microgrid,agent,policy_act,policy_net_arch,learning_rate,total_cost_format,baseline,absolute_improvement,percentage_improvement
0,0,53062.168702,0,SB3Agent DQN,ReLU,"[64, 64]",0.0001,53062.0,187183.490996,134121.322294,71.652324
1,1,55578.578584,0,SB3Agent DQN,ReLU,"[64, 64]",0.0005,55579.0,187183.490996,131604.912412,70.30797
2,2,57543.274309,0,SB3Agent DQN,ReLU,"[64, 64]",0.001,57543.0,187183.490996,129640.216687,69.25836
3,3,55934.748791,0,SB3Agent DQN,ReLU,"[128, 128]",0.0001,55935.0,187183.490996,131248.742206,70.117691
4,4,84487.924273,0,SB3Agent DQN,ReLU,"[128, 128]",0.0005,84488.0,187183.490996,102695.566723,54.863581


In [179]:
[f"{res:.4e}" for res in heuristic_res]

['1.8718e+05',
 '2.3694e+07',
 '3.6006e+07',
 '3.5932e+07',
 '3.2445e+07',
 '1.1351e+08',
 '4.6285e+07',
 '1.5447e+07',
 '9.7355e+06',
 '3.5711e+06']

In [180]:
# get best general configs according to total_cost
data.sort_values("total_cost").head()

Unnamed: 0,config_id,total_cost,microgrid,agent,policy_act,policy_net_arch,learning_rate,total_cost_format,baseline,absolute_improvement,percentage_improvement
34,34,51143.508873,0,SB3Agent PPO,Tanh,"[128, 128]",0.0005,51144.0,187183.490996,136039.982124,72.67734
18,18,51143.508873,0,SB3Agent A2C,Tanh,"[64, 64]",0.0001,51144.0,187183.490996,136039.982124,72.67734
17,17,51143.508873,0,SB3Agent A2C,ReLU,"[128, 128]",0.001,51144.0,187183.490996,136039.982124,72.67734
21,21,51143.508873,0,SB3Agent A2C,Tanh,"[128, 128]",0.0001,51144.0,187183.490996,136039.982124,72.67734
15,15,51143.508873,0,SB3Agent A2C,ReLU,"[128, 128]",0.0001,51144.0,187183.490996,136039.982124,72.67734


In [181]:
# get best general configs according to total_cost
data.sort_values("percentage_improvement", ascending=False).head()

Unnamed: 0,config_id,total_cost,microgrid,agent,policy_act,policy_net_arch,learning_rate,total_cost_format,baseline,absolute_improvement,percentage_improvement
202,202,4073472.0,5,SB3Agent DQN,Tanh,"[64, 64]",0.0005,4073500.0,113508200.0,109434700.0,96.411296
230,230,4073472.0,5,SB3Agent PPO,Tanh,"[128, 128]",0.001,4073500.0,113508200.0,109434700.0,96.411296
229,229,4073472.0,5,SB3Agent PPO,Tanh,"[128, 128]",0.0005,4073500.0,113508200.0,109434700.0,96.411296
227,227,4073472.0,5,SB3Agent PPO,Tanh,"[64, 64]",0.001,4073500.0,113508200.0,109434700.0,96.411296
226,226,4073472.0,5,SB3Agent PPO,Tanh,"[64, 64]",0.0005,4073500.0,113508200.0,109434700.0,96.411296


In [165]:
# for each microgrid, get the best config
for microgrid in range(len(unique_microgrids)):
    best_config = data[data["microgrid"] == microgrid].sort_values("total_cost").head(1)
    print(f"Best config for microgrid {microgrid}:")
    print(best_config.to_string(index=False))
    print()

Best config for microgrid 0:
 config_id   total_cost  microgrid        agent policy_act policy_net_arch  learning_rate total_cost_format      baseline  percentage_improvement
        17 51143.508873          0 SB3Agent A2C       ReLU      [128, 128]          0.001        5.1144e+04 187183.490996                72.67734

Best config for microgrid 1:
 config_id   total_cost  microgrid        agent policy_act policy_net_arch  learning_rate total_cost_format     baseline  percentage_improvement
        39 3.646774e+06          1 SB3Agent DQN       ReLU        [64, 64]         0.0001        3.6468e+06 2.369357e+07               84.608591

Best config for microgrid 2:
 config_id   total_cost  microgrid        agent policy_act policy_net_arch  learning_rate total_cost_format     baseline  percentage_improvement
        78 1.750680e+06          2 SB3Agent DQN       ReLU        [64, 64]         0.0001        1.7507e+06 3.600570e+07               95.137769

Best config for microgrid 3:
 config_i

In [166]:
data.drop(columns=["total_cost_format", "microgrid", "config_id", "baseline", "absolute_improvement", "percentage_improvement"])\
    .groupby(["agent", "policy_act", "policy_net_arch", "learning_rate"])\
    .mean()\
    .sort_values("total_cost")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,total_cost
agent,policy_act,policy_net_arch,learning_rate,Unnamed: 4_level_1
SB3Agent PPO,Tanh,"[64, 64]",0.001,2403480.0
SB3Agent PPO,Tanh,"[128, 128]",0.001,2403480.0
SB3Agent PPO,Tanh,"[128, 128]",0.0005,2403480.0
SB3Agent PPO,Tanh,"[64, 64]",0.0005,2403480.0
SB3Agent A2C,Tanh,"[128, 128]",0.001,2403480.0
SB3Agent PPO,Tanh,"[64, 64]",0.0001,2403504.0
SB3Agent PPO,Tanh,"[128, 128]",0.0001,2403504.0
SB3Agent A2C,Tanh,"[64, 64]",0.001,2403517.0
SB3Agent A2C,Tanh,"[128, 128]",0.0005,2403517.0
SB3Agent DQN,Tanh,"[128, 128]",0.0001,2403525.0


In [167]:
data.drop(columns=["total_cost_format", "microgrid", "config_id", "baseline", "absolute_improvement", "percentage_improvement", "learning_rate"])\
    .groupby(["agent", "policy_act", "policy_net_arch"])\
    .mean()\
    .sort_values("total_cost")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_cost
agent,policy_act,policy_net_arch,Unnamed: 3_level_1
SB3Agent PPO,Tanh,"[128, 128]",2403488.0
SB3Agent PPO,Tanh,"[64, 64]",2403488.0
SB3Agent A2C,Tanh,"[128, 128]",2403540.0
SB3Agent A2C,Tanh,"[64, 64]",2403588.0
SB3Agent DQN,Tanh,"[64, 64]",2494487.0
SB3Agent DQN,Tanh,"[128, 128]",2507807.0
SB3Agent DQN,ReLU,"[128, 128]",2778062.0
SB3Agent DQN,ReLU,"[64, 64]",2994549.0
SB3Agent PPO,ReLU,"[64, 64]",14565960.0
SB3Agent PPO,ReLU,"[128, 128]",40630240.0


In [188]:
res = data.drop(columns=["total_cost_format", "microgrid", "config_id", "baseline", "total_cost", "absolute_improvement"])\
    .groupby(["agent", "policy_act", "policy_net_arch", "learning_rate"])\
    .agg(
        mean_percentage_improvement=("percentage_improvement", "mean"),
        std_percentage_improvement=("percentage_improvement", "std"),
    )\
    .sort_values("mean_percentage_improvement", ascending=False)

res[res["mean_percentage_improvement"] > 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean_percentage_improvement,std_percentage_improvement
agent,policy_act,policy_net_arch,learning_rate,Unnamed: 4_level_1,Unnamed: 5_level_1
SB3Agent PPO,Tanh,"[64, 64]",0.001,82.75392,17.239815
SB3Agent PPO,Tanh,"[64, 64]",0.0005,82.75392,17.239815
SB3Agent PPO,Tanh,"[128, 128]",0.001,82.75392,17.239815
SB3Agent PPO,Tanh,"[128, 128]",0.0005,82.75392,17.239815
SB3Agent A2C,Tanh,"[128, 128]",0.001,82.75392,17.239815
SB3Agent PPO,Tanh,"[128, 128]",0.0001,82.753899,17.239797
SB3Agent PPO,Tanh,"[64, 64]",0.0001,82.753899,17.239797
SB3Agent A2C,Tanh,"[128, 128]",0.0005,82.753812,17.239745
SB3Agent A2C,Tanh,"[64, 64]",0.001,82.753812,17.239745
SB3Agent A2C,Tanh,"[128, 128]",0.0001,82.753515,17.239499


In [189]:
res = data.drop(columns=["total_cost_format", "microgrid", "config_id", "baseline", "total_cost", "absolute_improvement", "learning_rate"])\
    .groupby(["agent", "policy_act", "policy_net_arch"])\
    .agg(
        mean_percentage_improvement=("percentage_improvement", "mean"),
        std_percentage_improvement=("percentage_improvement", "std"),
    )\
    .sort_values("mean_percentage_improvement", ascending=False)

res[res["mean_percentage_improvement"] > 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_percentage_improvement,std_percentage_improvement
agent,policy_act,policy_net_arch,Unnamed: 3_level_1,Unnamed: 4_level_1
SB3Agent PPO,Tanh,"[128, 128]",82.753913,16.634714
SB3Agent PPO,Tanh,"[64, 64]",82.753913,16.634714
SB3Agent A2C,Tanh,"[128, 128]",82.753749,16.634595
SB3Agent A2C,Tanh,"[64, 64]",82.753614,16.634493
SB3Agent DQN,Tanh,"[64, 64]",82.41354,16.617307
SB3Agent DQN,Tanh,"[128, 128]",82.307943,16.787641
SB3Agent DQN,ReLU,"[128, 128]",80.582333,17.538421
SB3Agent DQN,ReLU,"[64, 64]",80.314488,18.051209
SB3Agent PPO,ReLU,"[64, 64]",50.720356,49.075062
