In [None]:
import json

import numpy as np
import optuna
from apopfail.model import clean, get_pipeline
from apopfail.occ import split_data_subset
from apopfail.utils.loading import load_data
from pyod.models.lunar import LUNAR
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
X, _, y = load_data(root="..")
X, y = clean(X, y)
# X = get_pipeline(reducer="passthrough", scaler=StandardScaler()).fit_transform(X)
X = X.astype(np.float32)
X_normal = X.loc[y == 0]
X_abnormal = X.loc[y == 1]

In [None]:
train, test, test_labels = split_data_subset(X_normal, X_abnormal)

In [None]:
def get_lunar_param_space(trial):
    """Return the parameter space for Lunar to tune."""
    param_space = {
        "n_neighbours": trial.suggest_int("n_neighbours", 1, 2),
        "scaler": trial.suggest_categorical(
            "n_jobs", [MinMaxScaler(), StandardScaler()]
        ),
        "epsilon": trial.suggest_float("epsilon", 0.1, 0.9),
        "proportion": trial.suggest_float("porportion", 0.1, 1),
        "n_epochs": trial.suggest_int("n_epochs", 100, 1000),
        "lr": trial.suggest_float("lr", 0.001, 0.1),
        "wd": trial.suggest_float("wd", 0.1, 0.9),
    }
    return param_space


def get_lunar_objective(train, X_val, y_val):
    """Return the objective function for Lunar."""
    preprocessor = get_pipeline()
    preprocessor.fit(train)

    def objective(trial):
        """Tune lunar."""
        param_space = get_lunar_param_space(trial)
        clf = LUNAR(**param_space)
        model = get_pipeline(clf=clf)
        model.fit(train)
        y_pred = model.predict(X_val)
        score = average_precision_score(y_val, y_pred)
        return score

    return objective

In [None]:
study_name = "lunar"

objective = get_lunar_objective(train, test, test_labels)

study = optuna.create_study(
    study_name=study_name,
    direction="maximize",
    # pruner=optuna.pruners.MedianPruner(n_warmup_steps=20),
)

study.optimize(objective, n_trials=100, show_progress_bar=True, timeout=1 * 60 * 60)

best_params = study.best_params
print(f"Study completed with best score: {study.best_value:.4f}")

with open(f"../output/{study_name}_best_params.json", "w") as f:
    json.dump(best_params, f, indent=4)