In [256]:
from pathlib import Path
import os
import re

In [257]:
def read_last_line_from_file(path: Path):
    with path.open('rb') as f:
        try:  # catch OSError in case of a one line file 
            f.seek(-2, os.SEEK_END)
            while f.read(1) != b'\n':
                f.seek(-2, os.SEEK_CUR)
        except OSError:
            f.seek(0)
        last_line = f.readline().decode()
        return last_line

def read_first_line_from_file(path: Path):
    with path.open("r") as f:
        return f.readline()

In [258]:
class HyperparameterRun:
    def __init__(
        self,
        run_dir: Path,
        log_file_name: str = "out.log",
        model_name: str = "model.pt",
    ):
        if not run_dir.exists():
            raise ValueError(f"run_dir `{run_dir}` does not exit")
        if not run_dir.is_dir():
            raise ValueError(f"run_dir `{run_dir}` is not a directory")
        self.run_dir = run_dir
        self.log_file_name = log_file_name
        self.model_name = model_name

    def model_path(self):
        return self.run_dir / self.model_name

    def validation_ndcg_20(self):
        last_line = read_last_line_from_file(self.run_dir / self.log_file_name)

        ndcg_20_match = re.search(r"NDCG@20':\s*'([\d.]+)'", last_line)

        if ndcg_20_match:
            ndcg_20 = float(ndcg_20_match.group(1))
        else:
            raise Exception("NDCG@20 not found in last line of log file.")

        return ndcg_20

    def run_args(self):
        first_line = read_first_line_from_file(self.run_dir / self.log_file_name)
        first_line = (
            first_line.split("Namespace", maxsplit=1)[1].strip().lstrip("(").rstrip(")")
        )
        pattern = re.compile(r"(\w+)=((?:'[^']*')|(?:[^,]+))")
        args = {}

        for key, value in pattern.findall(first_line):
            args[key] = value

        return args

    def hyperparameters(self, hyperparameters: list[str]):
        run_args = self.run_args()
        return {key: value for key, value in run_args.items() if key in hyperparameters}


class HyperparameterSearchRun:
    def __init__(
        self,
        search_dir: Path,
        log_file_name: str = "out.log",
        model_name: str = "model.pt",
    ):
        self.search_dir = search_dir
        self.log_file_name = log_file_name
        self.model_name = model_name

    def _get_runs(self) -> list[HyperparameterRun]:
        return [
            HyperparameterRun(item)
            for item in self.search_dir.iterdir()
            if item.is_dir()
        ]

    def best_run(self):
        runs = self._get_runs()

        best_run_dir = None
        best_ndcg_20 = -1

        for run in runs:
            new_ndcg_20 = run.validation_ndcg_20()
            if new_ndcg_20 > best_ndcg_20:
                best_ndcg_20 = new_ndcg_20
                best_run_dir = run.run_dir

        return best_run_dir


In [259]:
def best_hparams(hparam_search_dir: Path, hparams: list[str]):
    search = HyperparameterSearchRun(hparam_search_dir)
    best_run_dir = search.best_run()
    best_run = HyperparameterRun(best_run_dir)

    print(f"Best run: {best_run.run_dir}")
    print("Hyperparameters:", best_run.hyperparameters(hparams))
    print("Model path:", best_run.model_path())

In [260]:
HPARAM_SEARCH_DIR = Path("src", "hparam_search")

## BERT4Rec

In [261]:
BERT4Rec_hyperparameters = ["mask_ratio"]

### LastFM

In [262]:
best_hparams(HPARAM_SEARCH_DIR / "BERT4Rec_LastFM", BERT4Rec_hyperparameters)

Best run: src/hparam_search/BERT4Rec_LastFM/Jun-11-2025_23-02-32
Hyperparameters: {'mask_ratio': '0.2'}
Model path: src/hparam_search/BERT4Rec_LastFM/Jun-11-2025_23-02-32/model.pt
