# **Introduction**

This notebook is used to load and sort the results of the hyperparameter sweep.

# **Import Packages**

This section imports necessary packages.

In [1]:
# import these:
import numpy as np
import os
import json
import matplotlib.pyplot as plt
import pandas as pd

# **Loading the Data**

This section loads the data based on which model the user would like to evaluate.

In [2]:
# define the model:
models = ["GLIE_MC", "SARSA_0", "SARSA_L", "Q_LEARN"]
model = models[3]

# get the path to the results:
sweep_path = os.path.join(os.getcwd(), "sweep_results", model)

With the path, we can now load each file:

In [3]:
# initialize results list:
results = []

for file in os.listdir(sweep_path):
    # consolidate into one path:
    file_path = os.path.join(sweep_path, file)
    
    # open:
    with open(file_path, 'r') as f:
        data = json.load(f)

        # flatten the JSON:
        match model:
            case "GLIE_MC":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }
            case "SARSA_0":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],
                    "alpha" : data["params"]["alpha"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }
            case "SARSA_L":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],
                    "alpha" : data["params"]["alpha"],
                    "lambda" : data["params"]["lamb"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }
            case "Q_LEARN":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],
                    "alpha" : data["params"]["alpha"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }

        # append to list:
        results.append(extracted_data)

# turn into a pandas df:
results_df = pd.DataFrame(results)

# insert an identifier for models:
results_df.insert(0, 'model_name', [f'{model}_model_{index + 1}' for index, row in results_df.iterrows()])

Save consolidated results into a .csv:

In [4]:
# make consolidated file path:
consolidated_file_path = os.path.join(os.getcwd(), "sweep_results/consolidated_results", model + "_results.csv")
results_df.sort_values(by = "success_rate", ascending = False).to_csv(consolidated_file_path, index = False)

# **Visualizing the Results**

This section sorts the consolidated results by the various metrics.

Sort the consolidated results by the highest success rate:

In [5]:
results_df.sort_values(by = "success_rate", ascending = False).head(10)

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,success_rate,avg_return,avg_length
21,Q_LEARN_model_22,0.99,7.5e-07,0.1,74.38,0.739,44.982
12,Q_LEARN_model_13,0.99,2e-06,0.05,74.37,0.738,44.275
13,Q_LEARN_model_14,0.99,2e-06,0.1,74.3,0.74,44.57
20,Q_LEARN_model_21,0.99,7.5e-07,0.05,74.2,0.738,44.443
14,Q_LEARN_model_15,0.99,2e-06,0.25,73.96,0.747,44.254
15,Q_LEARN_model_16,0.99,2e-06,0.5,73.79,0.746,44.182
17,Q_LEARN_model_18,0.99,5e-07,0.1,73.52,0.742,44.313
1,Q_LEARN_model_2,0.95,2e-06,0.1,73.23,0.735,41.4
4,Q_LEARN_model_5,0.95,5e-07,0.05,73.04,0.73,41.633
0,Q_LEARN_model_1,0.95,2e-06,0.05,72.96,0.728,41.414


Sort the consolidated results by the highest average return:

In [6]:
results_df.sort_values(by = "avg_return", ascending = False).head(10)

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,success_rate,avg_return,avg_length
14,Q_LEARN_model_15,0.99,2e-06,0.25,73.96,0.747,44.254
15,Q_LEARN_model_16,0.99,2e-06,0.5,73.79,0.746,44.182
17,Q_LEARN_model_18,0.99,5e-07,0.1,73.52,0.742,44.313
13,Q_LEARN_model_14,0.99,2e-06,0.1,74.3,0.74,44.57
21,Q_LEARN_model_22,0.99,7.5e-07,0.1,74.38,0.739,44.982
12,Q_LEARN_model_13,0.99,2e-06,0.05,74.37,0.738,44.275
20,Q_LEARN_model_21,0.99,7.5e-07,0.05,74.2,0.738,44.443
1,Q_LEARN_model_2,0.95,2e-06,0.1,73.23,0.735,41.4
23,Q_LEARN_model_24,0.99,7.5e-07,0.5,72.1,0.735,41.598
8,Q_LEARN_model_9,0.95,7.5e-07,0.05,72.44,0.73,41.412


Sort the consolidated results by the lowest average episode length:

In [7]:
results_df.sort_values(by = "avg_length", ascending = True).head(10)

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,success_rate,avg_return,avg_length
30,Q_LEARN_model_31,0.9,5e-07,0.25,12.84,0.118,8.582
34,Q_LEARN_model_35,0.9,7.5e-07,0.25,19.46,0.196,11.616
26,Q_LEARN_model_27,0.9,2e-06,0.25,10.44,0.101,20.16
25,Q_LEARN_model_26,0.9,2e-06,0.1,28.16,0.293,23.003
29,Q_LEARN_model_30,0.9,5e-07,0.1,32.24,0.329,27.675
10,Q_LEARN_model_11,0.95,7.5e-07,0.25,33.32,0.324,27.82
24,Q_LEARN_model_25,0.9,2e-06,0.05,44.1,0.444,30.369
31,Q_LEARN_model_32,0.9,5e-07,0.5,48.32,0.487,33.799
7,Q_LEARN_model_8,0.95,5e-07,0.5,48.31,0.486,34.081
35,Q_LEARN_model_36,0.9,7.5e-07,0.5,50.02,0.498,34.117


Sort the consolidated results by the highest success rate, the highest average return, and the lowest average episode length:

In [8]:
results_df.sort_values(by = ["success_rate", "avg_return", "avg_length"], ascending = [False, False, True])

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,success_rate,avg_return,avg_length
21,Q_LEARN_model_22,0.99,7.5e-07,0.1,74.38,0.739,44.982
12,Q_LEARN_model_13,0.99,2e-06,0.05,74.37,0.738,44.275
13,Q_LEARN_model_14,0.99,2e-06,0.1,74.3,0.74,44.57
20,Q_LEARN_model_21,0.99,7.5e-07,0.05,74.2,0.738,44.443
14,Q_LEARN_model_15,0.99,2e-06,0.25,73.96,0.747,44.254
15,Q_LEARN_model_16,0.99,2e-06,0.5,73.79,0.746,44.182
17,Q_LEARN_model_18,0.99,5e-07,0.1,73.52,0.742,44.313
1,Q_LEARN_model_2,0.95,2e-06,0.1,73.23,0.735,41.4
4,Q_LEARN_model_5,0.95,5e-07,0.05,73.04,0.73,41.633
0,Q_LEARN_model_1,0.95,2e-06,0.05,72.96,0.728,41.414
