# **Introduction**

This notebook is for performing hyperparameter sweeps on the algorithms that have been designed thus far.

# **Import Packages**

This section imports the necessary packages.

In [None]:
# Import these packages:
import gymnasium as gym
import numpy as np
from tqdm import tqdm 
import matplotlib.pyplot as plt

import json
import os
import itertools
from frozen_lake_classes import GLIE_MC_Agent, SARSA_0_Agent, SARSA_L_Agent

# **Environment Definition**

This section defines the relevant objects and functions to perform the hyperparameter sweep.

### **Sweep Parameters**

In [None]:
# set the model to evaluate:
model_types = ["GLIE_MC", "SARSA_0", "SARSA_L"]

for model_type in model_types:

    # set the environment:
    env = gym.make("FrozenLake-v1", is_slippery = True, render_mode = 'rgb_array')
    train_length = int(2.5e6)

    # must first define the grid of values to be examined:
    grid = {
        "gamma" : [0.90, 0.99],
        "epsilon_decay" : [0.5/train_length, 0.75/train_length, 2/train_length],
    }

    # append λ if the model uses a λ method:
    if not model_type == "GLIE_MC":
        grid["alpha"] = [0.05, 0.1, 0.25, 0.5]
        if model_type == "SARSA_L":
            grid["lamb"] = [0.8, 0.9, 0.95]

    # get cartesian product of all combinations:
    keys, values = zip(*grid.items())
    combos = [dict(zip(keys, v)) for v in itertools.product(*values)]

    # initialize results:
    results = []

    # output directory:
    out_dir = f"sweep_results/{model_type}"
    os.makedirs(out_dir, exist_ok = True)

    for params in combos:
        # create a filename and path:
        fname = "_".join(f"{k}{v}" for k,v in params.items()) + ".json"
        path = os.path.join(out_dir, fname)

        # need to make a model with those params:
        match model_type:
            case "GLIE_MC":
                agent = GLIE_MC_Agent(env, **params, es = False, rs = False, initial_epsilon = 1.0, final_epsilon = 0.1)
            case "SARSA_0":
                agent = SARSA_0_Agent(env, **params, es = False, rs = False, initial_epsilon = 1.0, final_epsilon = 0.1)
            case "SARSA_L":
                agent = SARSA_L_Agent(env, **params, es = False, rs = False, initial_epsilon = 1.0, final_epsilon = 0.1)
        
        # train the model:
        q = agent.GPI(num_episodes = train_length)

        # evaluate:
        success = agent.success_rate(num_episodes = 1000)
        avg_return = agent.average_return(num_episodes = 1000)
        avg_length = agent.average_length(num_episodes = 1000)

        # metrics dict:
        metrics = {"success_rate" : success, "avg_return" : avg_return, "avg_length" : avg_length}

        # store results:
        result = {
            "params" : params,
            "metrics" : metrics
        }

        results.append(result)

        # dump results:
        with open(path, "w") as f:
            json.dump(result, f, indent = 2)