In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
from evaluation import evaluate_heursitic_grid
from policies.heuristics import TCBMHeuristicAgent
from environments.road_env import RoadEnvironment
from environments.config.environment_presets import smallest_environment_dict
parameter_dict = {
    "threshold": np.arange(0,1.05,0.05),
    "inspection_interval": np.arange(1, 21),
}

environment = RoadEnvironment(**smallest_environment_dict)

threshold_area = 0.5
inspection_interval_area = 10

number_of_episodes = 10000
filename = f"results/TCBM_heuristic_grid_search_{threshold_area:0.2f}_{inspection_interval_area}_{number_of_episodes}"

results = evaluate_heursitic_grid(
    environment = environment,
    heuristic_class = TCBMHeuristicAgent,
    parameter_dict = parameter_dict,
    number_of_episodes = number_of_episodes,
    number_of_processes = 32,
    result_path = f"{filename}.csv",
    overwrite = True,
)

results
import pandas as pd 
# load results
filename = "results/TCBM_heuristic_grid_search_0.13_10_10000"
results = pd.read_csv(f"{filename}.csv")

# find best parameters
best = results.sort_values("reward_mean", ascending=False).head(1)
best_reward_mean = best["reward_mean"].values[0]
best_reward_std = best["reward_std"].values[0]
best_reward_std_error = best["reward_std"].values[0] / np.sqrt(best["episodes"].values[0])

print("Best parameters:")
print(best)
print("\t with std error: {:.2f}".format(best_reward_std_error))

std_error_reward_range = best_reward_mean - best_reward_std_error

best_results = results[results["reward_mean"] >= std_error_reward_range].sort_values("reward_mean", ascending=False)
best_results
# load results
results = pd.read_csv(f"{filename}.csv")

# plot results
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="whitegrid")

fig, ax = plt.subplots(figsize=(10, 6))
sns.lineplot(
    data=results, x="threshold", y="reward_mean", hue="inspection_interval", ax=ax, palette="tab10" # options: "Set1", "Set2", "Set3", "tab10", "Paired"
)
ax.set_title("TCBM Heuristic Grid Search")
ax.set_ylabel("Mean Reward")

plt.savefig(f"{filename}.png", dpi=300)
plt.show()

# plot as 2d heatmap
from matplotlib.colors import LogNorm

results["threshold"] = results["threshold"].round(5)
results["inspection_interval"] = results["inspection_interval"].astype(int)

results = results.pivot(index="threshold", columns="inspection_interval", values="reward_mean")

# make reward_mean positive
results = -results

fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(results, ax=ax, norm=LogNorm())
ax.set_title("TCBM Heuristic Grid Search (Log Scale)")
ax.set_ylabel("Threshold")
ax.set_xlabel("Inspection Interval")
ax.collections[0].colorbar.set_label("Average Cost")

plt.savefig(f"{filename}_heatmap.png", dpi=300)
plt.show()
