### Base script that shows how to use Hyperopt to search for optimal parameters for a model
### Hyperopt documentation: http://hyperopt.github.io/hyperopt/

In [6]:
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

from multiprocessing import cpu_count

In [7]:
allowed_cpus = cpu_count() - 2

In [9]:
def cv_splitter(df, nr_of_splits=3):
    
    splits = []
    
    train_start = df.index.min().to_pydatetime()
    for nr in range(nr_of_splits):
        train_end = train_start + pd.DateOffset(months=6 + nr * 2)
        valid_start = train_end
        valid_end = valid_start + pd.DateOffset(months=2)
        train_index = np.arange(df.index.get_loc(train_end, method='nearest'))
        validation_index = np.arange(df.index.get_loc(valid_start, method='nearest'), df.index.get_loc(valid_end, method='nearest'))
#         train_index = pd.date_range(train_start, train_end, freq='min')
#         validation_index = pd.date_range(valid_start, valid_end, freq='min')
        
        splits.append((train_index, validation_index))

    return splits

### Define the objective function that hyperopt uses to find the optimal parameters

In [1]:
def get_pipeline(params):
    standard_scaler_float32 = StandardScaler_Float32() # don't use this scaler, use the normal float64 scaler!!
    elastic_net = ElasticNet(tol=0.01, **params)  # tol=0.01 to keep model fitting fast
    pipeline = make_pipeline(standard_scaler_float32, elastic_net)
    
    return pipeline

def objective(params):
    print(params)
    params = {'alpha': params['alpha'], 
              'l1_ratio': params['l1_ratio']}
    
    pipeline = get_pipeline(params)
    
    # and then conduct the cross validation with the same folds as before
    
    score = -cross_val_score(pipeline, X_train, y_train, cv=splits, scoring='neg_mean_squared_error', n_jobs=allowed_cpus)

    validation_rmse = np.sqrt(score)
    mean_validation_rmse =  validation_rmse.mean()
    print(mean_validation_rmse, validation_rmse, '\n')
    print('\n\n')

    return {'loss': mean_validation_rmse, 'status': STATUS_OK}

### Define the space that hyperopt should search. These are priors.
### I use a loguniform distribution for alpha, because I want lower values to have a larger chance to be chosen in the spcae

In [4]:
space = {
    'alpha': hp.loguniform('alpha', low=-4.6, high=2.35),
    'l1_ratio' : hp.uniform('l1_ratio', low=0.02, high=1.0),
}

### Start finding the optimal parameters.
- Results are written to Trials() object
- max_evals defines the number of combinations that should be tried

In [None]:
# define cv splits
splits = cv_splitter(X_train)

# results from hyperopt are written to trials
trials = Trials()

# start searching the space for the best parameters given the score returned by the objective function
best = fmin(
    fn=objective,  # function to optimize
    space=space,
    algo=tpe.suggest,  # optimization algorithm, hyperopt will select its parameters automatically
    max_evals=30,  # maximum number of iterations
    trials=trials,  # logging
    verbose=0,
)

### Make a nice dataframe from the hyperopt trials to be able to check results

In [None]:
trial_results = pd.DataFrame({**trials.vals, **{'score': trials.losses()}})

trial_timings = {}
for trial in trials.trials:
    trial_timings[trial['tid']] = [trial['refresh_time'] - trial['book_time']]

fitting_times = pd.DataFrame.from_dict(trial_timings, orient='index', columns=['fitting_time'])

trial_results = trial_results.join(fitting_times).sort_values(by='score')

### Plot trial results, works for 2 parameters of ElasticNet

In [None]:
# plot trial results, works for 2 parameters of Elasticnet
sns.set(style='white', font_scale=1.5)

ax = trial_results.plot(
    x='alpha', 
    y='l1_ratio', 
    c='score', 
    cmap=cm.coolwarm_r, 
    kind='scatter', 
    logx=True, 
    figsize=(10, 7),
)

for i in range(trial_results.shape[0]):
    ax.annotate(
        xy=(trial_results['alpha'][i], trial_results['l1_ratio'][i]), 
        s=round(trial_results['score'][i], 2),
        alpha=0.4,
        fontsize=12,
    )

ax.set(title=f"{model_choices['pred_target']} Hyperopt grid search: test_rmse");