# Regression models

In this notebook we show how the tuning library can be called for any regression predictor implementing our `fit` and `evaluate` methods.
The tuning process can be differentiated based on a number of different settings, most importantly the set of parameters to be explored and the method of evaluation. The latter is by default set to train-test split for timing reasons (since GridSearch is a computationally expensive procedure). However more robust results are likely using `method='CV'`

In [None]:
import pandas as pd
import numpy as np

import sys
sys.path.append('..')

from regressors import RidgePredictor, LassoPredictor, RandomForestPredictor
from tuning import tune, bayesian_optimization

In [None]:
rows = 100
columns = 10

#train_y = round(pd.DataFrame([int(i) for i in train_y]),2)
train_y = [np.random.randint(1, 3) for i in range(rows)]
train_x, test_x = round(pd.DataFrame(np.random.rand(rows, columns)*1000), 2) , round(pd.DataFrame(np.random.rand(rows,columns)*100), 2)

# Define output file
write_to = '../data/tuning.txt'

In [None]:
param_grid = {
    'max_depth': [3,4,5,6,7,8,9]
}

best_params, best_score = tune(RandomForestPredictor, train_x, train_y, param_grid, silent=False, persist=False, write_to=write_to)
print("Optimal parameters achieve log loss = {}".format(best_score))

In [None]:
param_grid = {
    'alpha': np.logspace(-10, -2, 15),
    'normalize': [True], #otherwise we have to preprocess teh data before fitting the model
}

best_params, best_score = tune(RidgePredictor, train_x, train_y, param_grid, silent=False, persist=False, write_to=write_to)
print("Optimal parameters achieve log loss = {}".format(best_score))

In [None]:
param_grid = {
    'alpha': np.logspace(-10, -2, 15),
    'normalize': [False] #only if the data is not normalized!!
}

best_params, best_score = tune(LassoPredictor, train_x, train_y, param_grid, silent=False, persist=False, write_to=write_to)
print("Optimal parameters achieve log loss = {}".format(best_score))