# Introduction

This notebook contains a minimalist template for logging nested hyperparameter tuning runs in Databricks using hyperopt and mlflow. 

In [None]:
# set the path where the best params can be saved in one seperate files, can be helpful when you have multiple layers in a nested tuning design
# but check out mlflow.search_runs() for simpler designs where you can search all runs under one experiment_id
model_best_params_path = f"""{project_dbfs_path}/HM_best_params.pkl"""

# Define the search space
hyp_params = {
            'l1_ratio':                hp.choice('l1_ratio', np.arange(0, 1.0, 0.1))
        }
fit_params = {
            'early_stopping_rounds':   hp.choice('early_stopping_rounds', [10,20]),
}

space = dict()
space['hyp_params'] = hyp_params
space['fit_params'] = fit_params

# or use SparkTrails() to speed things up
trials = Trials()

# set up the tuning
def tune_model(space, model, X, y, max_evals, patience):    
   
    # define the loss function
    def loss_function(space, scorer=scorer, X=X, y=y):
     
        with mlflow.start_run(nested = True):

            # configure the run
            hyp_params = space['hyp_params'] 
            fit_params = space['fit_params']                  
            model.train(X,y, **hyp_params, **fit_params)
            
            # get model performance (could be a custom function)
            cv_results = model.cross_validate(X, y)

            # log params and metrics for comparison
            mlflow.sklearn.log_model(model, "model")             
            auc = np.mean(cv_results['auc'])
            mlflow.log_metric('cv_auc', auc)
            mlflow.log_params(hyp_params)
            mlflow.log_params(fit_params)
                        
            # optimize auc
            loss = 1 - auc

        return {'loss': loss, 'status': STATUS_OK}

    # all experiments will be nested under the run_name
    with mlflow.start_run(run_name = 'Model turning'):
        best=fmin(
            fn = loss_function,
            space = space,
            algo = tpe.suggest,
            max_evals = max_evals,
            trials=trials,
            rstate = np.random.default_rng(10),
            early_stop_fn=no_progress_loss(patience)
        )
    
    pickle.dump(best, open(model_best_params_path, 'wb'))    

    return best

# configure then run the tuning
max_evals = 400
patience = 50
tune_model(space, model, X, y, max_evals, patience)

# retrieve the best params from tuning
best = pickle.load(open(model_best_params_path), 'rb')
best_space = space_eval(space, best)

# utilize the best params to train the best model
model.train(X,y, **best_space['hyp_params'], **best_space['fit_params'])