In [None]:
import sys
import mlflow
from dotenv import load_dotenv
import optuna

sys.path.append("..")
from herec.reader import *
from herec.utils import hyParamSuggester

## Arguments

In [None]:
MODEL = "MF"
DATASET = "ML100K"
SETTING_PATH = f"../setting/{MODEL}.yaml"

## Dataset

In [None]:
if DATASET == "ML100K":
    reader = ML100K()
elif DATASET == "ML1M":
    reader = ML1M()
elif DATASET == "ML10M":
    reader = ML10M()
elif DATASET == "ML25M":
    reader = ML25M()
else:
    raise Exception("Invalid DATASET NAME")

## Setup MLFlow

In [None]:
load_dotenv("../.env")

EXPERIMENT_NAME = f"HeRec-{MODEL}-{DATASET}"
if (experiment := mlflow.get_experiment_by_name(EXPERIMENT_NAME)) is None:
    experiment_id = mlflow.create_experiment(name=EXPERIMENT_NAME)
else:
    experiment_id = experiment.experiment_id

print("実験名:", EXPERIMENT_NAME)
print("実験ID:", experiment_id)

## Training

In [None]:
# Def. Hyper-parameter Suggester
suggester = hyParamSuggester(SETTING_PATH)

# Def. DataLoader
from herec.loader import ratingLoader as targetLoader

# Def. Trainer
from herec.trainer import ratingTrainer as targetTrainer

In [None]:
def objective(trial):

    with mlflow.start_run(experiment_id=experiment_id) as run:

        # Get Hyper-parameter Setting
        hyparam = suggester.suggest_hyparam(trial)

        # Save Hyper-parameter to MLFlow
        mlflow.log_params(hyparam["model"])
        mlflow.log_params(hyparam["trainer"])
        mlflow.log_dict(hyparam, "params.json")
    
        # Define Model
        if MODEL == "MF":
            from herec.model import MF
            model = MF(
                user_num=reader.user_num,
                item_num=reader.item_num,
                **hyparam["model"]
            )

        # Train
        trainer = targetTrainer(model=model, dataLoader=targetLoader, run=run, ckpt_dir="../checkpoint/", verbose=1, **hyparam["trainer"])
        trainer.fit(reader.df_SUBSET["TRAIN"], reader.df_SUBSET["VALID"])
        trainer.clear_cache()
    
        # Get Best Validation Loss
        best_valid_loss = trainer.score( trainer.get_best_params(), reader.df_SUBSET["VALID"] )

        # Save Best Valid. Loss to MLFlow
        mlflow.log_metric("BEST_VALID_LOSS", best_valid_loss)

    return best_valid_loss

# TPE
study = optuna.create_study()
study.optimize(objective, n_trials=100)