In [1]:
import os
import json
import glob
import itertools

if os.getcwd().endswith("src"):
    os.chdir("..")

sweep_config = "configs/sweep_configs/sweep.json"
path = "models/sweep"

with open(sweep_config, "r") as f:
    config = json.load(f)

keys, values = config.keys(), config.values()
hyperparameter_settings = list(itertools.product(*values))

print(f"{len(hyperparameter_settings)}")

36


In [2]:
all_experiments = {}
for folder in os.listdir(path):
    path_folder = os.path.join(path, folder)
    experiments = []
    for i in range(len(hyperparameter_settings)):
        if not os.path.exists(os.path.join(path_folder, f"model_{i}.pt.log")):
            print(f"Folder {folder=} missing starting from index {i=}")
            break
        sweep_parameters = hyperparameter_settings[i]
        with open(os.path.join(path_folder, f"model_{i}.pt.log")) as f:
            log_file = f.read().strip()
            lines = log_file.split("\n")
            key = "(hyperparameters)"
            idx_with_hyperparameter = [i for i, n in enumerate(lines) if n.startswith(key)]
            last_idx = idx_with_hyperparameter[-1]
            line_with_hyperparameters = lines[last_idx]
            relevant_hyperparameters = json.loads(line_with_hyperparameters[len(key):])
            
            cur_line_idx = last_idx + 1
            outputs = []
            while cur_line_idx < len(lines) and (line := lines[cur_line_idx]).startswith(">>"):
                contents = line[2:-2]
                epoch, loss, metrics = contents.split("|")
                epoch, loss, metrics = int(epoch), float(loss), json.loads(metrics)
                assert epoch == len(outputs)
                outputs.append((epoch, loss, metrics))
                cur_line_idx += 1
        experiments.append([sweep_parameters, outputs])
    all_experiments[folder] = experiments
            

Folder folder='2_user_id_anime_id_title' missing starting from index i=14
Folder folder='2_custom' missing starting from index i=14


In [7]:
from typing import List, Tuple


def determine_best_hyperparameter_settings(experiments: List[Tuple[dict, List[Tuple[int, float, dict]]]]):
    ranks = []
    for i, (sweep_parameters, outputs) in enumerate(experiments):
        max_ndcg = max([x[2]["ndcg"] for x in outputs])
        ranks.append((max_ndcg, i, sweep_parameters))
    ranks.sort(reverse=True)
    for i in range(10):
        best = ranks[i]
        best_ndcg, best_i, best_sweep_parameters = best
        print(f"{best_ndcg=} {best_i=} {best_sweep_parameters=}")
for k, v in all_experiments.items():
    print(f"=== {k} ===")
    determine_best_hyperparameter_settings(v)
    print()
        

=== 1_user_embedder_base_512 ===
best_ndcg=0.09522052000169692 best_i=25 best_sweep_parameters=(200, 0.01, 5)
best_ndcg=0.09464787587210495 best_i=13 best_sweep_parameters=(50, 0.01, 5)
best_ndcg=0.09323580072974788 best_i=12 best_sweep_parameters=(50, 0.01, 1)
best_ndcg=0.09322916654295134 best_i=24 best_sweep_parameters=(200, 0.01, 1)
best_ndcg=0.08863356070929279 best_i=30 best_sweep_parameters=(200, 0.005, 1)
best_ndcg=0.08568558557035728 best_i=0 best_sweep_parameters=(10, 0.01, 1)
best_ndcg=0.08065541335283255 best_i=18 best_sweep_parameters=(50, 0.005, 1)
best_ndcg=0.08046870458990199 best_i=14 best_sweep_parameters=(50, 0.01, 10)
best_ndcg=0.08007733971972934 best_i=19 best_sweep_parameters=(50, 0.005, 5)
best_ndcg=0.0799381711898058 best_i=1 best_sweep_parameters=(10, 0.01, 5)

=== 2_user_id_anime_id_title ===
best_ndcg=0.06854937176454562 best_i=1 best_sweep_parameters=(10, 0.01, 5)
best_ndcg=0.06750946842433218 best_i=12 best_sweep_parameters=(50, 0.01, 1)
best_ndcg=0.067141