In [None]:
import sys

sys.path.append('..')

import optuna
from sklearn import datasets
from sklearn.svm import SVC

# DEID libraries
import gojo
from gojo import core

In [None]:
# load test dataset (Wine)
wine_dt = datasets.load_wine()

# create the target variable. Classification problem 0 vs rest
# to see the target names you can use wine_dt['target_names']
y = (wine_dt['target'] == 1).astype(int)  
X = wine_dt['data']

In [None]:
# evaluate the model using a simple cross-validation strategy with a 
# default parameters
cv_report = core.evalCrossVal(
    X=X,
    y=y,
    model=core.SklearnModelWrapper(
        SVC, kernel='poly', degree=1, coef0=0.0,
        cache_size=1000, class_weight='balanced'
    ),
    cv=gojo.util.getCrossValObj(cv=5, stratified=True, loocv=False, random_state=1997),
    verbose=True,
    save_train_preds=True,
    save_models=False,
    n_jobs=1
)
scores = cv_report.getScores(core.getDefaultMetrics('binary_classification', bin_threshold=0.5))
scores['test']

In [None]:
search_space = {
    'degree': ('suggest_int', (1, 10)),
    'coef0': ('suggest_float', (0.0, 100.00 ))
}
model = core.SklearnModelWrapper(
    SVC, kernel='poly', degree=1, coef0=0.0,
    cache_size=1000, class_weight='balanced'
)

In [None]:
# perform the HPO to optimice model-hyperparameters
cv_report = core.evalCrossValNestedHPO(
    X=X,
    y=y,
    model=model,
    search_space=search_space,
    outer_cv=gojo.util.getCrossValObj(cv=5, stratified=True, loocv=False, random_state=1997),
    inner_cv=gojo.util.getCrossValObj(cv=5, stratified=True, loocv=False, random_state=1997),
    hpo_sampler=optuna.samplers.TPESampler(n_startup_trials=100),
    hpo_n_trials=200,
    minimization=False,
    metrics=core.getDefaultMetrics('binary_classification', bin_threshold=0.5),
    objective_metric='f1_score',
    verbose=1,
    save_train_preds=True,
    save_models=True,
    n_jobs=15
)

In [None]:
scores = cv_report.getScores(core.getDefaultMetrics('binary_classification', bin_threshold=0.5))
scores['test']

In [None]:
cv_report.metadata['hpo_history'][0]['value'].plot()

In [None]:
# test a custom aggregation function as optimization objective
def adocMetric(_scores):
    f1_test = _scores['test']['f1_score'].mean()
    f1_train = _scores['train']['f1_score'].mean()
    pen1 = 2 * abs(f1_test - f1_train)
    pen2 = 2 * _scores['test']['f1_score'].std()
    
    return f1_test - (pen1 + pen2)

cv_report2 = core.evalCrossValNestedHPO(
    X=X,
    y=y,
    model=model,
    search_space=search_space,
    outer_cv=gojo.util.getCrossValObj(cv=5, stratified=True, loocv=False, random_state=1997),
    inner_cv=gojo.util.getCrossValObj(cv=5, stratified=True, loocv=False, random_state=1997),
    hpo_sampler=optuna.samplers.TPESampler(n_startup_trials=100),
    hpo_n_trials=200,
    minimization=False,
    metrics=core.getDefaultMetrics('binary_classification', bin_threshold=0.5),
    objective_metric='f1_score',
    agg_function=adocMetric,
    verbose=-1,
    save_train_preds=True,
    save_models=True,
    n_jobs=15
)

In [None]:
scores2 = cv_report2.getScores(core.getDefaultMetrics('binary_classification', bin_threshold=0.5))
scores2['test']

In [None]:
cv_report2.metadata['hpo_history'][0]['value'].plot()

In [None]:
cv_report2.metadata['hpo_best_params']

In [None]:
cv_report.metadata['hpo_best_params']