# Regression models

In this notebook we show how the tuning library can be called for any regression predictor implementing our `fit` and `evaluate` methods.
The tuning process can be differentiated based on a number of different settings, most importantly the set of parameters to be explored and the method of evaluation. The latter is by default set to train-test split for timing reasons (since GridSearch is a computationally expensive procedure). However more robust results are likely using `method='CV'`

In [None]:
import pandas as pd
import numpy as np

import sys
sys.path.append('..')

from regressors import RidgePredictor, LassoPredictor, RandomForestPredictor
from tuning import tune, bayesian_optimization
from preprocessing import keep_intersection_of_columns

In [None]:
# load datasets and define writhing path
aggregated_train = pd.read_csv('../data/aggregated_train.csv')
aggregated_test = pd.read_csv('../data/aggregated_test.csv')
write_to = '../data/tuning.txt'

In [None]:
train_x, test_x = keep_intersection_of_columns(aggregated_train.reset_index(drop=True), aggregated_test.reset_index(drop=True))
train_y = np.log(aggregated_train.reset_index(drop=True)["target_sum"]+1)
train_x = train_x.fillna(0)
test_x = test_x.fillna(0)

In [None]:
param_grid = {
    'max_depth': [3,4,5,6,7]
}

best_params, best_score = tune(RandomForestPredictor, train_x, train_y, param_grid, silent=False, persist=False, write_to=write_to, method = 'CV')
print("Optimal parameters achieve RMSE = {}".format(best_score))

In [None]:
param_grid = {
    'alpha': np.logspace(-10, -2, 15),
    'normalize': [True], # only false if the data is not normalized!!
}

best_params, best_score = tune(RidgePredictor, train_x, train_y, param_grid, silent=False, persist=False, write_to=write_to, method = 'CV')
print("Optimal parameters achieve RMSE = {}".format(best_score))

In [None]:
param_grid = {
    'alpha': np.logspace(-10, -2, 15),
    'normalize': [True] # only false if the data is not normalized!!
}

best_params, best_score = tune(LassoPredictor, train_x, train_y, param_grid, silent=False, persist=False, write_to=write_to, method = 'CV')
print("Optimal parameters achieve RMSE = {}".format(best_score))