In [1]:
from pathlib import Path
import os
import re

In [2]:
def read_last_line_from_file(path: Path):
    with path.open('rb') as f:
        try:  # catch OSError in case of a one line file 
            f.seek(-2, os.SEEK_END)
            while f.read(1) != b'\n':
                f.seek(-2, os.SEEK_CUR)
        except OSError:
            f.seek(0)
        last_line = f.readline().decode()
        return last_line

def read_first_line_from_file(path: Path):
    with path.open("r") as f:
        return f.readline()

In [3]:
class HyperparameterRun:
    def __init__(
        self,
        run_dir: Path,
        log_file_name: str = "out.log",
        model_name: str = "model.pt",
    ):
        if not run_dir.exists():
            raise ValueError(f"run_dir `{run_dir}` does not exit")
        if not run_dir.is_dir():
            raise ValueError(f"run_dir `{run_dir}` is not a directory")
        self.run_dir = run_dir
        self.log_file_name = log_file_name
        self.model_name = model_name

    def model_path(self):
        return self.run_dir / self.model_name

    def validation_ndcg_20(self):
        last_line = read_last_line_from_file(self.run_dir / self.log_file_name)

        ndcg_20_match = re.search(r"NDCG@20':\s*'([\d.]+)'", last_line)

        if ndcg_20_match:
            ndcg_20 = float(ndcg_20_match.group(1))
        else:
            raise Exception(f"NDCG@20 not found in last line of log file `{self.run_dir / self.log_file_name}`.")

        return ndcg_20

    def run_args(self):
        first_line = read_first_line_from_file(self.run_dir / self.log_file_name)
        first_line = (
            first_line.split("Namespace", maxsplit=1)[1].strip().lstrip("(").rstrip(")")
        )
        pattern = re.compile(r"(\w+)=((?:'[^']*')|(?:[^,]+))")
        args = {}

        for key, value in pattern.findall(first_line):
            args[key] = value

        return args

    def hyperparameters(self, hyperparameters: list[str]):
        run_args = self.run_args()
        return {key: value for key, value in run_args.items() if key in hyperparameters}


class HyperparameterSearchRun:
    def __init__(
        self,
        search_dir: Path,
        log_file_name: str = "out.log",
        model_name: str = "model.pt",
    ):
        self.search_dir = search_dir
        self.log_file_name = log_file_name
        self.model_name = model_name

    def _get_runs(self) -> list[HyperparameterRun]:
        return [
            HyperparameterRun(item)
            for item in self.search_dir.iterdir()
            if item.is_dir()
        ]

    def number_of_runs(self) -> int:
        return sum(1 for item in self.search_dir.iterdir() if item.is_dir())

    def best_run(self):
        runs = self._get_runs()

        best_run_dir = None
        best_ndcg_20 = -1

        for run in runs:
            new_ndcg_20 = run.validation_ndcg_20()
            if new_ndcg_20 > best_ndcg_20:
                best_ndcg_20 = new_ndcg_20
                best_run_dir = run.run_dir

        return best_run_dir


In [4]:
def best_hparams(hparam_search_dir: Path, hparams: list[str]):
    search = HyperparameterSearchRun(hparam_search_dir)
    number_of_runs = search.number_of_runs()
    best_run_dir = search.best_run()
    best_run = HyperparameterRun(best_run_dir)

    print(f"Number of runs: {number_of_runs}")
    print(f"Best run: {best_run.run_dir}")
    print("Best hyperparameters:", best_run.hyperparameters(hparams))
    print("Best Model path:", best_run.model_path())

In [5]:
HPARAM_SEARCH_DIR = Path("hparam_search")

In [6]:
HYPERPARAMETERS = ["lr", "num_attention_heads"]

In [7]:
BERT4Rec_hyperparameters = HYPERPARAMETERS + ["mask_ratio"]
DuoRec_hyperparameters = HYPERPARAMETERS + []
SASRec_hyperparameters = HYPERPARAMETERS + []
FEARec_hyperparameters = HYPERPARAMETERS + []
BSARec_hyperparameters = HYPERPARAMETERS + ["c", "alpha"]

## LastFM

In [8]:
best_hparams(HPARAM_SEARCH_DIR / "BERT4Rec_LastFM", BERT4Rec_hyperparameters)

Number of runs: 18
Best run: hparam_search/BERT4Rec_LastFM/12373134_5
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '2', 'mask_ratio': '0.4'}
Best Model path: hparam_search/BERT4Rec_LastFM/12373134_5/model.pt


In [9]:
best_hparams(HPARAM_SEARCH_DIR / "DuoRec_LastFM", DuoRec_hyperparameters)

Number of runs: 6
Best run: hparam_search/DuoRec_LastFM/12364640_3
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '4'}
Best Model path: hparam_search/DuoRec_LastFM/12364640_3/model.pt


In [10]:
best_hparams(HPARAM_SEARCH_DIR / "SASRec_LastFM", SASRec_hyperparameters)

Number of runs: 6
Best run: hparam_search/SASRec_LastFM/12364634_3
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '4'}
Best Model path: hparam_search/SASRec_LastFM/12364634_3/model.pt


In [11]:
best_hparams(HPARAM_SEARCH_DIR / "FEARec_LastFM", FEARec_hyperparameters)

Number of runs: 6
Best run: hparam_search/FEARec_LastFM/12364666_3
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '4'}
Best Model path: hparam_search/FEARec_LastFM/12364666_3/model.pt


## Diginetica

In [12]:
best_hparams(HPARAM_SEARCH_DIR / "BERT4Rec_Diginetica", BERT4Rec_hyperparameters)

Number of runs: 18
Best run: hparam_search/BERT4Rec_Diginetica/12362289_7
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '4', 'mask_ratio': '0.2'}
Best Model path: hparam_search/BERT4Rec_Diginetica/12362289_7/model.pt


In [13]:
best_hparams(HPARAM_SEARCH_DIR / "DuoRec_Diginetica", DuoRec_hyperparameters)

Number of runs: 6
Best run: hparam_search/DuoRec_Diginetica/12364443_3
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '4'}
Best Model path: hparam_search/DuoRec_Diginetica/12364443_3/model.pt


In [14]:
best_hparams(HPARAM_SEARCH_DIR / "SASRec_Diginetica", SASRec_hyperparameters)

Number of runs: 6
Best run: hparam_search/SASRec_Diginetica/12362200_4
Best hyperparameters: {'lr': '0.0005', 'num_attention_heads': '1'}
Best Model path: hparam_search/SASRec_Diginetica/12362200_4/model.pt


In [15]:
best_hparams(HPARAM_SEARCH_DIR / "FEARec_Diginetica", FEARec_hyperparameters)

Number of runs: 6
Best run: hparam_search/FEARec_Diginetica/12373668_1
Best hyperparameters: {'lr': '0.001', 'num_attention_heads': '1'}
Best Model path: hparam_search/FEARec_Diginetica/12373668_1/model.pt


In [16]:
best_hparams(HPARAM_SEARCH_DIR / "BSARec_Diginetica", BSARec_hyperparameters)

Number of runs: 150
Best run: hparam_search/BSARec_Diginetica/12367757_143
Best hyperparameters: {'lr': '0.0005', 'num_attention_heads': '4', 'c': '7', 'alpha': '0.5'}
Best Model path: hparam_search/BSARec_Diginetica/12367757_143/model.pt
