# Wind prediction - Second assignment

## Authors

David Moreno Maldonado 100441714     
Inés Fernández Campos 100443936

## 0. Preliminaries

In [1]:
# Import some libraries
import os
import numpy as np              
import pandas as pd
import matplotlib.pyplot as plt 

import sys
import time
import math

from sklearn.experimental import enable_iterative_imputer
from sklearn import preprocessing, impute, model_selection, metrics, neighbors, ensemble, feature_selection
from sklearn.pipeline import Pipeline
import optuna
import optuna.visualization as ov

os.getcwd()

'/Users/roni/Desktop/master/2nd quarter/big data intelligence/assignments/assignment_2'

In [2]:
#MAIN PARAMETERS FOR THE ASSIGNMENT
budget = 20
random_state = 3
verbose = 0
n_jobs = 1

The "wind_pickle" file contains data in a binary format called "Pickle". Pickle data loads faster than text data.

In [3]:
data = pd.read_pickle('wind_pickle.pickle')

You can visualize the attributes in the dataset. Very important, the output attribute (i.e. the value to be predicted, **energy**, is the first attribute). **Steps** represents the hours in advance of the forecast. We will not use this variable here.

In [4]:
# The dataset contains 5937 instances and 556 attributes (including the outcome to be predicted)
print(data.shape)
#data.columns.values.tolist() 

(5937, 556)


In [5]:
#-1 for training, 0 for validation, 1 for testing
year_to_part = {
    2005: -1,
    2006: -1,
    2007: 0,
    2008: 0, 
    2009: 1,
    2010: 1
}
data['partition'] = data['year'].apply(lambda x: year_to_part[x])

We now remove the columns that cannot be used for training the models from the DataFrame

In [6]:
# Steps, month, day, hour, year should be removed, they cannot be used for training the models
to_remove = ['steps', 'month', 'year', 'day', 'hour']
for m in to_remove: data = data.drop(m, 1)

In [7]:
from numpy.random import randint

# we add na values at random
my_NIA = 100443936 + 100441714
np.random.seed(my_NIA)

how_many_nas = round(data.shape[0]*data.shape[1]*0.05)
print('Lets put '+str(how_many_nas)+' missing values \n')
x_locations = randint(0, data.shape[0], size=how_many_nas)
y_locations = randint(1, data.shape[1]-2, size=how_many_nas)

for i in range(len(x_locations)):
    data.iat[x_locations[i], y_locations[i]] = np.nan
    
data.to_pickle('wind_pickle_with_nan.pickle')

Lets put 163861 missing values 



From this point on, the file wind_pickle_with_nan should be used.

In [8]:
data = pd.read_pickle('wind_pickle_with_nan.pickle')
data.shape

(5937, 552)

## Input missing data

In [9]:
print(data.isnull().values.any())
input_cols = data.columns.difference(['energy', 'partition'])
x = data[input_cols]
#Iterative imputer (takes too long)
'''iter_imp = impute.IterativeImputer(random_state=random_state, 
                                   initial_strategy='median', 
                                   max_iter=3,
                                   verbose=verbose)
no_nan = iter_imp.fit_transform(x)'''

#KNN imputer(takes too long)
'''knn_imp = impute.KNNImputer(weights='distance')
no_nan = knn_imp.fit_transform(x)'''

#Simple imputer
simp_imp = impute.SimpleImputer(strategy='median',
                               verbose=2)
no_nan = simp_imp.fit_transform(x)

data[input_cols] = pd.DataFrame(data=no_nan)
print(data.isnull().values.any())

True
False


## Scaling

In [10]:
scaler = preprocessing.StandardScaler().fit(data[input_cols]) 
data[input_cols] = scaler.transform(data[input_cols])

## Data split
We are going to use train/test for model evaluation (outer) and train/validation for hyperparameter tuning (inner), as follows:     
1. Train partition: the first two years of data. Given that there are 6 years worth of data, we will use the first 2/6 of the instances for training.     
2. Validation partition: the second two years of data. 
3. Test partition: the remaining data    


In [11]:
#-1 for training, 0 for validation, 1 for testing
test = data[data['partition'] == 1]
train = data[data['partition'] == -1]
val = data[data['partition'] == 0]

del test['partition']
del train['partition']

y_test = test['energy']
x_test = test[test.columns.difference(['energy'])]

y_train = train['energy']
x_train = train[train.columns.difference(['energy'])]


y_val = val['energy']
x_val = val[train.columns.difference(['energy'])]

# 1. MODEL SELECTION AND HYPER-PARAMETER TUNING

In [12]:
#Dataframes with all the information of each model
summary = {
    'knn': pd.DataFrame(columns=['Time (sec)', 'Score (RMSE)', 'N. neighbors', 'Weights', 'P']),
    'random_forest': pd.DataFrame(columns=['Time (sec)', 'Score (RMSE)', 'Min. samples split', 'Criterion', 'Max. depth', 'N. estimators','Max. features']),
    'gradient_boosting': pd.DataFrame(columns=['Time (sec)', 'Score (RMSE)'])
}


## 1.1 KNN

### 1.1.1 Default hyper-parameters

In [14]:
np.random.seed(random_state)
knn_default = neighbors.KNeighborsRegressor()

start_time = time.time()
scores = -model_selection.cross_val_score(knn_default, x_train, y_train,
                                          scoring='neg_root_mean_squared_error',
                                          cv=tr_val_partition,
                                          verbose=verbose) 
start_time = time.time()
knn_default = knn_default.fit(x_train, y_train)
y_val_pred = knn_default.predict(x_val)
score = math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))
end_time = time.time()

summary['knn'] = summary['knn'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': score, 
    'N. neighbors': 5, 
    'Weights': 'uniform', 
    'P': 2
    }, 
    name='default'))

### 1.1.2 Hyper-parameter tunning (OPTUNA)

In [None]:
min_n_neigbors = 1
max_n_neigbors = 16

In [15]:
np.random.seed(random_state)
def knn_objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', min_n_neigbors, max_n_neigbors)
    weights = trial.suggest_categorical('weights', ['uniform','distance'])
    p = trial.suggest_categorical('p', [1, 2])

    clf = neighbors.KNeighborsRegressor(
        n_neighbors=n_neighbors,
        weights=weights,
        p=p)
    
    clf = clf.fit(x_train, y_train)
    y_val_pred = clf.predict(x_val)
    return math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))

knn_optuna = optuna.create_study(direction='minimize')
start_time = time.time()
knn_optuna.optimize(knn_objective, n_trials=budget)
end_time = time.time()

summary['knn'] = summary['knn'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': knn_optuna.best_value, 
    'N. neighbors': knn_optuna.best_params['n_neighbors'], 
    'Weights': knn_optuna.best_params['weights'], 
    'P': knn_optuna.best_params['p']
    }, 
    name='optuna'))

[32m[I 2021-01-13 09:42:49,538][0m A new study created in memory with name: no-name-a8bca65c-cfc6-4259-bafe-8f395c6645e4[0m
[32m[I 2021-01-13 09:42:49,635][0m Trial 0 finished with value: 435.96737369410965 and parameters: {'n_neighbors': 16, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 435.96737369410965.[0m
[32m[I 2021-01-13 09:42:51,987][0m Trial 1 finished with value: 437.64527002872245 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'p': 1}. Best is trial 0 with value: 435.96737369410965.[0m
[32m[I 2021-01-13 09:42:52,075][0m Trial 2 finished with value: 438.83757546832885 and parameters: {'n_neighbors': 8, 'weights': 'uniform', 'p': 2}. Best is trial 0 with value: 435.96737369410965.[0m
[32m[I 2021-01-13 09:42:54,402][0m Trial 3 finished with value: 426.6970286295103 and parameters: {'n_neighbors': 13, 'weights': 'distance', 'p': 1}. Best is trial 3 with value: 426.6970286295103.[0m
[32m[I 2021-01-13 09:42:54,479][0m Trial 4 finished with v

## 1.2 Random Forest

### 1.2.1 Default hyper-parameters

In [16]:
np.random.seed(random_state)
rf_default = ensemble.RandomForestRegressor(random_state=random_state, verbose=verbose, n_jobs=n_jobs)

start_time = time.time()
rf_default = rf_default.fit(x_train, y_train)
y_val_pred = rf_default.predict(x_val)
score =  math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))
end_time = time.time()

summary['random_forest'] = summary['random_forest'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': scores.mean(),
    'Min. samples split': 2, 
    'Criterion': 'mse', 
    'Max. depth': 'None',
    'N. estimators': 100,
    'Max. features': 1
    },
    name='default'))

### 1.2.2 Hyper-parameter tunning (OPTUNA)

In [None]:
min_max_depth = 2
max_max_depth = 50
min_n_estimators = 50
max_n_estimators = 200

In [47]:
np.random.seed(random_state)
def random_forest_objective(trial):
    min_samples_split = trial.suggest_uniform('min_samples_split', 0+sys.float_info.min, 1)
    criterion = trial.suggest_categorical('criterion', ['mse','mae'])
    max_depth = trial.suggest_int('max_depth', min_max_depth, max_max_depth)
    n_estimators = trial.suggest_int('n_estimators', min_n_estimators, max_n_estimators)
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2'])

    clf = ensemble.RandomForestRegressor(
        random_state=random_state,
        min_samples_split=min_samples_split,
        criterion=criterion,
        max_depth=max_depth,
        n_estimators=n_estimators,
        max_features=max_features
        )

    clf = clf.fit(x_train, y_train)
    y_val_pred = clf.predict(x_val)
    return math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))

rf_optuna = optuna.create_study(direction='minimize')
start_time = time.time()
rf_optuna.optimize(random_forest_objective, n_trials=budget, n_jobs=n_jobs)
end_time = time.time()

summary['random_forest'] = summary['random_forest'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': rf_optuna.best_value,
    'Min. samples split': rf_optuna.best_params['min_samples_split'], 
    'Criterion': rf_optuna.best_params['criterion'], 
    'Max. depth': rf_optuna.best_params['max_depth'],
    'N. estimators': rf_optuna.best_params['n_estimators'],
    'Max. features': rf_optuna.best_params['max_features']
    },
    name='optuna'))

[32m[I 2021-01-13 11:39:52,943][0m A new study created in memory with name: no-name-76fa9773-98e2-419b-b3e9-014342aa65b8[0m
[32m[I 2021-01-13 11:39:54,026][0m Trial 0 finished with value: 417.3797320560538 and parameters: {'min_samples_split': 0.25350059739092334, 'criterion': 'mse', 'n_estimators': 157, 'max_features': 'log2'}. Best is trial 0 with value: 417.3797320560538.[0m
[32m[I 2021-01-13 11:40:36,417][0m Trial 1 finished with value: 698.9371249690789 and parameters: {'min_samples_split': 0.6408643943729035, 'criterion': 'mae', 'n_estimators': 193, 'max_features': 'auto'}. Best is trial 0 with value: 417.3797320560538.[0m
[32m[I 2021-01-13 11:40:41,102][0m Trial 2 finished with value: 531.076475910281 and parameters: {'min_samples_split': 0.6105777961377941, 'criterion': 'mse', 'n_estimators': 72, 'max_features': 'auto'}. Best is trial 0 with value: 417.3797320560538.[0m
[32m[I 2021-01-13 11:40:42,455][0m Trial 3 finished with value: 400.7199654855606 and parameter

## 1.3 Gradient Boosting

### 1.3.1 Default hyper-parameters

In [18]:
# implementation using sklearn
np.random.seed(random_state)
gb_sk_def = ensemble.GradientBoostingRegressor(random_state=random_state, verbose=verbose)

start_time = time.time()
gb_sk_def = gb_sk_def.fit(x_train, y_train)
y_val_pred = gb_sk_def.predict(x_val)
score =  math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))
end_time = time.time()

summary['gradient_boosting'] = summary['gradient_boosting'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': score,
    'Learning rate': 0.1,
    'N. estimators': 100,
    'Criterion': 'friedman_mse', 
    'Min. samples split': 2, 
    'Min. samples leaf': 1,
    'Max. depth': 3,
    'Max. leaf nodes': 'None'
    },
    name='default'))

In [None]:
# implementation using xgboost
import xgboost as xgb

dtrain = xgb.DMatrix(mat_x_train, label=mat_y_train)
dtest = xgb.DMatrix(mat_x_test, label=mat_y_test)

model = xgb.XGBRegressor(objective='reg:squarederror')

start_time = time.time()
model = model.fit(x_train, y_train)
y_val_pred = model.predict(x_val)
score = math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))
end_time = time.time()

summary['gradient_boosting'] = summary['gradient_boosting'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': score,
    'N. estimators': model.get_params()['n_estimators']
    },
    name='default'))

### 1.3.2 Hyper-parameter tunning

In [19]:
#print(dir(dtrain))
#print('\n', dir(model))
min_max_leaf_nodes = 2
max_max_leaf_nodes = 20
min_min_samples_leaf = 1
max_min_samples_leaf = 10

In [20]:
# hyperparam tuning for sklearn ensemble.GradientBoostingRegressor
np.random.seed(random_state)

def gradboosting_objective(trial):  
    gb_sk_opt = None
    some = 0
    
    if some == 1:
        learning_rate = trial.suggest_uniform('learning_rate', 0+sys.float_info.min, 1)
        n_estimators = trial.suggest_int('n_estimators', min_n_estimators, max_n_estimators)
        criterion = trial.suggest_categorical('criterion', ['mse','friedman_mse', 'mae'])
        min_samples_split = trial.suggest_uniform('min_samples_split', 0+sys.float_info.min, 1)
        min_samples_leaf = trial.suggest_int('min_samples_split',min_min_samples_leaf, max_min_samples_leaf)
        max_depth = trial.suggest_int('max_depth', min_max_depth, max_max_depth)
        max_leaf_nodes = trial.suggest_int('max_depth', min_max_leaf_nodes, max_max_leaf_nodes)
        
        
        clf = ensemble.GradientBoostingRegressor(learning_rate=learning_rate, 
                                                   n_estimators=n_estimators,
                                                   criterion=criterion,
                                                   min_samples_split=min_samples_split,
                                                   min_samples_leaf=min_samples_leaf,
                                                   max_depth=max_depth,
                                                   max_leaf_nodes=max_leaf_nodes,
                                                   random_state=random_state,
                                                   verbose=verbose)
    
    else:
        learning_rate = trial.suggest_uniform('learning_rate', 0+sys.float_info.min, 1)
        n_estimators = trial.suggest_int('n_estimators', min_n_estimators, max_n_estimators)
        #criterion = trial.suggest_categorical('criterion', ['mse','friedman_mse', 'mae'])
        min_samples_split = trial.suggest_uniform('min_samples_split', 0+sys.float_info.min, 1)
        #min_samples_leaf = trial.suggest_int('min_samples_split',min_min_samples_leaf, max_min_samples_leaf)
        max_depth = trial.suggest_int('max_depth', min_max_depth, max_max_depth)
        #max_leaf_nodes = trial.suggest_int('max_depth', min_max_leaf_nodes, max_max_leaf_nodes)
        
        
        clf = ensemble.GradientBoostingRegressor(learning_rate=learning_rate, 
                                                   n_estimators=n_estimators,
                                                   #criterion=criterion,
                                                   min_samples_split=min_samples_split,
                                                  # min_samples_leaf=min_samples_leaf,
                                                   max_depth=max_depth,
                                                  # max_leaf_nodes=max_leaf_nodes,
                                                   random_state=random_state,
                                                   verbose=verbose)
    
        
    
    clf = clf.fit(x_train, y_train)
    y_val_pred = clf.predict(x_val)
    return math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))

gb_optuna = optuna.create_study(direction='minimize')
start_time = time.time()
gb_optuna.optimize(gradboosting_objective, n_trials=budget)
end_time = time.time()

summary['gradient_boosting'] = summary['gradient_boosting'].append(pd.Series({
    'Time (sec)': '{:.4f}'.format(end_time - start_time), 
    'Score (RMSE)': gb_optuna.best_value,
    'Learning rate': gb_optuna.best_params['learning_rate'],
    'N. estimators': gb_optuna.best_params['n_estimators'],
    'Criterion': 'friedman_mse', 
    'Min. samples split': gb_optuna.best_params['min_samples_split'], 
    #'Min. samples leaf': gb_optuna.best_params['min_samples_leaf'],
    'Min. samples leaf': 1,
    'Max. depth': gb_optuna.best_params['max_depth'],
    'Max. leaf nodes': 'None'
    #'Max. leaf nodes': gb_optuna.best_params['max_leaf_nodes']
    },
    name='optuna'))

[32m[I 2021-01-13 09:50:34,141][0m A new study created in memory with name: no-name-b1d1519a-4e2a-4230-a1a2-75d2f55f5597[0m
[32m[I 2021-01-13 09:51:23,541][0m Trial 0 finished with value: 516.1392210643189 and parameters: {'learning_rate': 0.924201322701786, 'n_estimators': 127, 'min_samples_split': 0.14264499434583455, 'max_depth': 4}. Best is trial 0 with value: 516.1392210643189.[0m
[32m[I 2021-01-13 09:51:34,293][0m Trial 1 finished with value: 397.64320467369106 and parameters: {'learning_rate': 0.24880511526807125, 'n_estimators': 90, 'min_samples_split': 0.992062606787323, 'max_depth': 19}. Best is trial 1 with value: 397.64320467369106.[0m
[32m[I 2021-01-13 09:52:00,096][0m Trial 2 finished with value: 380.711099231849 and parameters: {'learning_rate': 0.11769016943457755, 'n_estimators': 67, 'min_samples_split': 0.6939913120059997, 'max_depth': 12}. Best is trial 2 with value: 380.711099231849.[0m
[32m[I 2021-01-13 09:52:19,810][0m Trial 3 finished with value: 55

In [21]:
summary['knn']

Unnamed: 0,Time (sec),Score (RMSE),N. neighbors,Weights,P
default,0.1176,455.123868,5,uniform,2
optuna,33.7278,424.95488,11,distance,1


In [48]:
summary['random_forest']

Unnamed: 0,Time (sec),Score (RMSE),Min. samples split,Criterion,Max. depth,N. estimators,Max. features
default,82.5335,375.560721,2.0,mse,,100,1
optuna,233.6654,374.129312,0.00872,mse,,171,0.667976
optuna,1031.8257,373.977008,0.00742,mae,,101,sqrt


In [23]:
summary['gradient_boosting']

Unnamed: 0,Time (sec),Score (RMSE),Criterion,Learning rate,Max. depth,Max. leaf nodes,Min. samples leaf,Min. samples split,N. estimators
default,30.6806,389.357849,friedman_mse,0.1,3.0,,1.0,2.0,100.0
optuna,1896.7269,374.685337,friedman_mse,0.031102,17.0,,1.0,0.316966,143.0


In [22]:
#Dummy regressor(mean)
math.sqrt(metrics.mean_squared_error(y_val, [y_val.mean() for i in range(len(y_val))]))

666.6691142412726

[32m[I 2021-01-13 13:26:39,766][0m Trial 1 finished with value: 423.6779507465516 and parameters: {'k': 347, 'min_samples_split': 0.37053292247586933, 'criterion': 'mae', 'max_depth': 12, 'n_estimators': 465, 'max_features': 0.5351454204911513}. Best is trial 5 with value: 394.7416180729391.[0m


# 2. ATTRIBUTE SELECTION

## 2.1 Select from all attributes

**Are all 550 input attributes actually necessary in order to get a good model? Is it possible to have an accurate model that uses fewer than 550 variables? How many?**

For this question we will be using the best model we had in previous section and now include the parameter for select only certain attributes.

In [13]:
min_max_depth = 2
max_max_depth = 32
min_n_estimators = 50
max_n_estimators = 200
min_n_k = 10

In [None]:
np.random.seed(random_state)
def random_forest_objective_attr(trial):
    k = trial.suggest_int('k', min_n_k, x.shape[1])
    min_samples_split = trial.suggest_uniform('min_samples_split', 0+sys.float_info.min, 1)
    criterion = trial.suggest_categorical('criterion', ['mse','mae'])
    max_depth = trial.suggest_int('max_depth', min_max_depth, max_max_depth, log=True)
    n_estimators = trial.suggest_int('n_estimators', min_n_estimators, max_n_estimators)
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2'])

    clf = Pipeline([
      ('feature_selection', feature_selection.SelectKBest(feature_selection.f_regression, k=k)),
      ('regression', ensemble.RandomForestRegressor(
          random_state=random_state,
          min_samples_split=min_samples_split,
          criterion=criterion,
          max_depth=max_depth,
          n_estimators=n_estimators,
          max_features=max_features
      ))
    ])

    clf = clf.fit(x_train, y_train)
    y_val_pred = clf.predict(x_val)
    return math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))

rf_attr_optuna = optuna.create_study(direction='minimize')
start_time = time.time()
rf_attr_optuna.optimize(random_forest_objective_attr, n_trials=budget, n_jobs=n_jobs)
end_time = time.time()
print(end_time-start_time)

[32m[I 2021-01-13 13:53:57,632][0m A new study created in memory with name: no-name-17af4ec0-df08-4dd8-b45e-e16164dfde4b[0m
[32m[I 2021-01-13 13:53:57,769][0m Trial 0 finished with value: 668.4779174312936 and parameters: {'k': 192, 'min_samples_split': 0.7390228289606785, 'criterion': 'mse', 'max_depth': 7, 'n_estimators': 86, 'max_features': 'sqrt'}. Best is trial 0 with value: 668.4779174312936.[0m
[32m[I 2021-01-13 13:53:58,136][0m Trial 1 finished with value: 719.4778527314924 and parameters: {'k': 65, 'min_samples_split': 0.6694258631419047, 'criterion': 'mae', 'max_depth': 27, 'n_estimators': 150, 'max_features': 'log2'}. Best is trial 0 with value: 668.4779174312936.[0m
[32m[I 2021-01-13 13:53:58,540][0m Trial 2 finished with value: 719.5022353639617 and parameters: {'k': 42, 'min_samples_split': 0.9501910351635974, 'criterion': 'mae', 'max_depth': 13, 'n_estimators': 176, 'max_features': 'log2'}. Best is trial 0 with value: 668.4779174312936.[0m
[32m[I 2021-01-13 

In [None]:
#TODO: Conclusions
print(rf_attr_optuna.best_params, rf_attr_optuna.best_value)

## 2.2 Use only Sotavento attributes
**Is it enough to use only the attributes for the actual Sotavento location? (13th location in the grid)**

We will select only Sotavento attributes and use the best model in previous section to train a model.

In [24]:
sot_attr = []
for attr in x_train.columns:
    if int(attr.split('.')[-1]) == 13:
        sot_attr.append(attr)

x_train_sot = x_train[sot_attr]
x_val_sot = x_val[sot_attr]
x_test_sot = x_test[sot_attr]
print(x_train_sot.shape,x_val_sot.shape,x_test_sot.shape)

(2528, 22) (1299, 22) (2110, 22)


In [17]:
np.random.seed(random_state)
def random_forest_sot_objective(trial):
    min_samples_split = trial.suggest_uniform('min_samples_split', 0+sys.float_info.min, 1)
    criterion = trial.suggest_categorical('criterion', ['mse','mae'])
    max_depth = trial.suggest_int('max_depth', min_max_depth, max_max_depth)
    n_estimators = trial.suggest_int('n_estimators', min_n_estimators, max_n_estimators)
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2'])

    clf = ensemble.RandomForestRegressor(
        random_state=random_state,
        min_samples_split=min_samples_split,
        criterion=criterion,
        max_depth=max_depth,
        n_estimators=n_estimators,
        max_features=max_features
        )

    clf = clf.fit(x_train, y_train)
    y_val_pred = clf.predict(x_val)
    return math.sqrt(metrics.mean_squared_error(y_val, y_val_pred))

rf_sot_optuna = optuna.create_study(direction='minimize')
start_time = time.time()
rf_sot_optuna.optimize(random_forest_sot_objective, n_trials=budget, n_jobs=n_jobs)
end_time = time.time()

[32m[I 2021-01-13 13:41:40,158][0m A new study created in memory with name: no-name-47d55393-09de-4246-a862-7ba94ba8ee42[0m
[32m[I 2021-01-13 13:41:40,344][0m Trial 1 finished with value: 668.3057723890951 and parameters: {'min_samples_split': 0.7773231353617598, 'criterion': 'mse', 'max_depth': 16, 'n_estimators': 81, 'max_features': 'sqrt'}. Best is trial 1 with value: 668.3057723890951.[0m
[32m[I 2021-01-13 13:41:41,298][0m Trial 0 finished with value: 415.3216110107275 and parameters: {'min_samples_split': 0.20071034246198016, 'criterion': 'mse', 'max_depth': 22, 'n_estimators': 157, 'max_features': 'log2'}. Best is trial 0 with value: 415.3216110107275.[0m
[32m[I 2021-01-13 13:41:41,419][0m Trial 4 finished with value: 668.3204020271736 and parameters: {'min_samples_split': 0.7537659986365126, 'criterion': 'mse', 'max_depth': 8, 'n_estimators': 108, 'max_features': 'log2'}. Best is trial 0 with value: 415.3216110107275.[0m
[32m[I 2021-01-13 13:41:42,324][0m Trial 2 f

KeyboardInterrupt: 

In [None]:
#TODO: Conclusions
print(rf_sot_optuna.best_params, rf_sot_optuna.best_value)