# **Introduction**

This notebook is used to load and sort the results of the hyperparameter sweep.

# **Import Packages**

This section imports necessary packages.

In [2]:
# import these:
import numpy as np
import os
import json
import matplotlib.pyplot as plt
import pandas as pd

# **Loading the Data**

This section loads the data based on which model the user would like to evaluate.

In [3]:
# define the model:
models = ["GLIE_MC", "SARSA_0", "SARSA_L"]
model = models[2]

# get the path to the results:
sweep_path = os.path.join(os.getcwd(), "sweep_results", model)

With the path, we can now load each file:

In [4]:
# initialize results list:
results = []

for file in os.listdir(sweep_path):
    # consolidate into one path:
    file_path = os.path.join(sweep_path, file)
    
    # open:
    with open(file_path, 'r') as f:
        data = json.load(f)

        # flatten the JSON:
        match model:
            case "GLIE_MC":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }
            case "SARSA_0":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],
                    "alpha" : data["params"]["alpha"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }
            case "SARSA_L":
                extracted_data = {
                    # training params:
                    "gamma" : data["params"]["gamma"],
                    "epsilon_decay" : data["params"]["epsilon_decay"],
                    "alpha" : data["params"]["alpha"],
                    "lambda" : data["params"]["lamb"],

                    # training metrics:
                    "success_rate" : data["metrics"]["success_rate"],
                    "avg_return" : data["metrics"]["avg_return"],
                    "avg_length" : data["metrics"]["avg_length"]
                }

        # append to list:
        results.append(extracted_data)

# turn into a pandas df:
results_df = pd.DataFrame(results)

# insert an identifier for models:
results_df.insert(0, 'model_name', [f'{model}_model_{index + 1}' for index, row in results_df.iterrows()])

Save consolidated results into a .csv:

In [5]:
# make consolidated file path:
consolidated_file_path = os.path.join(os.getcwd(), "sweep_results/consolidated_results", model + "_results.csv")
results_df.sort_values(by = "success_rate", ascending = False).to_csv(consolidated_file_path, index = False)

# **Visualizing the Results**

This section sorts the consolidated results by the various metrics.

Sort the consolidated results by the highest success rate:

In [6]:
results_df.sort_values(by = "success_rate", ascending = False).head(10)

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,lambda,success_rate,avg_return,avg_length
16,SARSA_L_model_17,0.99,4e-06,0.1,0.9,50.4,0.515,34.067
12,SARSA_L_model_13,0.99,4e-06,0.05,0.8,44.5,0.433,30.454
0,SARSA_L_model_1,0.99,2e-06,0.05,0.8,23.5,0.229,19.303
18,SARSA_L_model_19,0.9,2e-06,0.05,0.8,22.4,0.218,20.037
21,SARSA_L_model_22,0.9,2e-06,0.1,0.8,21.0,0.21,18.386
26,SARSA_L_model_27,0.9,1e-06,0.05,0.95,20.8,0.193,19.094
33,SARSA_L_model_34,0.9,4e-06,0.1,0.8,20.5,0.196,15.896
9,SARSA_L_model_10,0.99,1e-06,0.1,0.8,19.1,0.198,17.632
15,SARSA_L_model_16,0.99,4e-06,0.1,0.8,18.2,0.17,18.109
29,SARSA_L_model_30,0.9,1e-06,0.1,0.95,17.6,0.176,41.413


Sort the consolidated results by the highest average return:

In [8]:
results_df.sort_values(by = "avg_return", ascending = False).head(10)

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,lambda,success_rate,avg_return,avg_length
16,SARSA_L_model_17,0.99,4e-06,0.1,0.9,50.4,0.515,34.067
12,SARSA_L_model_13,0.99,4e-06,0.05,0.8,44.5,0.433,30.454
0,SARSA_L_model_1,0.99,2e-06,0.05,0.8,23.5,0.229,19.303
18,SARSA_L_model_19,0.9,2e-06,0.05,0.8,22.4,0.218,20.037
21,SARSA_L_model_22,0.9,2e-06,0.1,0.8,21.0,0.21,18.386
9,SARSA_L_model_10,0.99,1e-06,0.1,0.8,19.1,0.198,17.632
33,SARSA_L_model_34,0.9,4e-06,0.1,0.8,20.5,0.196,15.896
26,SARSA_L_model_27,0.9,1e-06,0.05,0.95,20.8,0.193,19.094
29,SARSA_L_model_30,0.9,1e-06,0.1,0.95,17.6,0.176,41.413
15,SARSA_L_model_16,0.99,4e-06,0.1,0.8,18.2,0.17,18.109


Sort the consolidated results by the lowest average episode length:

In [7]:
results_df.sort_values(by = "avg_length", ascending = True).head(10)

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,lambda,success_rate,avg_return,avg_length
35,SARSA_L_model_36,0.9,4e-06,0.1,0.95,3.4,0.041,5.501
8,SARSA_L_model_9,0.99,1e-06,0.05,0.95,0.0,0.0,5.949
1,SARSA_L_model_2,0.99,2e-06,0.05,0.9,2.7,0.018,6.136
6,SARSA_L_model_7,0.99,1e-06,0.05,0.8,9.1,0.077,6.599
27,SARSA_L_model_28,0.9,1e-06,0.1,0.8,4.5,0.059,6.643
10,SARSA_L_model_11,0.99,1e-06,0.1,0.9,6.1,0.081,7.711
11,SARSA_L_model_12,0.99,1e-06,0.1,0.95,4.8,0.048,7.821
32,SARSA_L_model_33,0.9,4e-06,0.05,0.95,3.6,0.044,8.0
13,SARSA_L_model_14,0.99,4e-06,0.05,0.9,2.1,0.012,8.039
7,SARSA_L_model_8,0.99,1e-06,0.05,0.9,7.0,0.066,8.532


Sort the consolidated results by the highest success rate, the highest average return, and the lowest average episode length:

In [9]:
results_df.sort_values(by = ["success_rate", "avg_return", "avg_length"], ascending = [False, False, True])

Unnamed: 0,model_name,gamma,epsilon_decay,alpha,lambda,success_rate,avg_return,avg_length
16,SARSA_L_model_17,0.99,4e-06,0.1,0.9,50.4,0.515,34.067
12,SARSA_L_model_13,0.99,4e-06,0.05,0.8,44.5,0.433,30.454
0,SARSA_L_model_1,0.99,2e-06,0.05,0.8,23.5,0.229,19.303
18,SARSA_L_model_19,0.9,2e-06,0.05,0.8,22.4,0.218,20.037
21,SARSA_L_model_22,0.9,2e-06,0.1,0.8,21.0,0.21,18.386
26,SARSA_L_model_27,0.9,1e-06,0.05,0.95,20.8,0.193,19.094
33,SARSA_L_model_34,0.9,4e-06,0.1,0.8,20.5,0.196,15.896
9,SARSA_L_model_10,0.99,1e-06,0.1,0.8,19.1,0.198,17.632
15,SARSA_L_model_16,0.99,4e-06,0.1,0.8,18.2,0.17,18.109
29,SARSA_L_model_30,0.9,1e-06,0.1,0.95,17.6,0.176,41.413
