# Considerações iniciais

Para fazer um stacking de qualidade, precisamos que nossos modelos tenham diversidade entre si, ou seja, utilizando vários parâmetros devemos fazer eles serem diferentes. Neste projeto, irei utilizar a otimização bayesian optimization.

# Detalhe: todos os algoritmos foram executados e suas previsões salvas para utilizações nos próximos notebooks

# Importações 

In [None]:
import pandas as pd
import numpy as np
import joblib as jb
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
from skopt import gp_minimize
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from lightgbm import LGBMRegressor

# Leitura e separação das bases em X e Y

In [None]:
train_lvl0 = pd.read_csv('train_lvl0.csv')
train_lvl1 = pd.read_csv('train_lvl1.csv')
val = pd.read_csv("valid.csv")

X_train0, Y_train0 = train_lvl0.drop(columns=['Date','Sales']), train_lvl0['Sales']
X_train1, Y_train1 = train_lvl1.drop(columns=['Date','Sales']), train_lvl1['Sales']
X_val, Y_val = val.drop(columns=['Date','Sales']), val['Sales']

# LGBMRegressor

In [None]:
def tune_lgbm(params):
    num_leaves, min_data_in_leaf, learning_rate,n_estimators = params
    mdl = LGBMRegressor(num_leaves=num_leaves,
                        min_data_in_leaf=min_data_in_leaf,
                        learning_rate=learning_rate, 
                        n_estimators=n_estimators,
                        random_state=0)
    mdl.fit(X_train0, Y_train0)
    
    p = mdl.predict(X_train1)
    model_name_train1 = "./preds_train1/lgbm_{}_{}_{}_{}.pkl.z".format(num_leaves, min_data_in_leaf, learning_rate,n_estimators) 
    jb.dump(p, model_name_train1)
    #Salvando previsões do train1
    
    metric = np.sqrt(mean_squared_error(Y_train1,p))
    
    p = mdl.predict(X_val)
    model_name_val1 = "./preds_val/lgbm_{}_{}_{}_{}.pkl.z".format(num_leaves, min_data_in_leaf, learning_rate,n_estimators) 
    jb.dump(p, model_name_val1)
    #Salvando previsões do val1
    print(params, metric)
    print()
    
    return metric

space = [(2, 500),#num_leaves
         (1, 100),#min_data_in_leaf
         (1e-4, 1e-1),#learning_rate
        (50,150)]#n_estimators

res = gp_minimize(tune_lgbm, space, random_state=0, verbose=0, n_calls=20)

# RandomForestRegressor

In [None]:
def tune_rf(params):
    n_estimators,max_depth,min_samples_leaf = params
    mdl = RandomForestRegressor(n_estimators=n_estimators,
                               max_depth=max_depth,
                               min_samples_leaf=min_samples_leaf,
                               n_jobs=-1)
    
    mdl.fit(X_train0, Y_train0)
    p = mdl.predict(X_train1)
    model_name_train1 = "./preds_train1/rf_{}_{}_{}.pkl.z".format(n_estimators, max_depth, min_samples_leaf) 
    jb.dump(p, model_name_train1)
    #Salvando previsões do train1
    
    metric = np.sqrt(mean_squared_error(Y_train1,p))
    
    p = mdl.predict(X_val)
    model_name_val1 = "./preds_val/rf_{}_{}_{}.pkl.z".format(n_estimators, max_depth, min_samples_leaf)
    jb.dump(p, model_name_val1)
    #Salvando previsões do val1
    
    print(params, metric)
    print()
    return metric
space = [(100, 200),#n_estimators
         (1, 20),#max_depth
         (1, 100)]#min_samples_leaf

res = gp_minimize(tune_rf, space, random_state=0, verbose=0, n_calls=20)

# KNeighborsRegressor

In [None]:
def tune_knn(params):
    n_neighbors,metric = params
    mdl = KNeighborsRegressor(n_neighbors=n_neighbors,
                              metric=metric,
                              n_jobs=-1)
    
    mdl.fit(X_train0, Y_train0)
    p = mdl.predict(X_train1)
    model_name_train1 = "./preds_train1/knn_{}_{}.pkl.z".format(n_neighbors, metric) 
    jb.dump(p, model_name_train1)
    #Salvando previsões do train1
    metric2 = np.sqrt(mean_squared_error(Y_train1,p))
    
    p = mdl.predict(X_val)
    model_name_val1 = "./preds_val/knn_{}_{}.pkl.z".format(n_neighbors, metric) 
    jb.dump(p, model_name_val1)
    #Salvando previsões do val1
    print(params, metric2)
    print()
    return metric2

space = [(1, 50),#n_neighbors
         ['euclidean','manhattan','chebyshev','minkowski']]#metric

res = gp_minimize(tune_knn, space, random_state=0, verbose=0, n_calls=30)

# SVR

In [None]:
def tune_svm(params):
    kernel,C = params
    mdl = SVR(kernel=kernel,
            C=C)
    
    mdl.fit(X_train0, Y_train0)
    p = mdl.predict(X_train1)
    model_name_train1 = "./preds_train1/svm_{}_{}.pkl.z".format(kernel, C) 
    jb.dump(p, model_name_train1)
    #Salvando previsões do train1
    metric = np.sqrt(mean_squared_error(Y_train1,p))
    
    p = mdl.predict(X_val)
    model_name_val1 = "./preds_val/svm_{}_{}.pkl.z".format(kernel, C)
    jb.dump(p, model_name_val1)
    #Salvando previsões do val1
    
    print(params, metric)
    print()
    return metric
space = [['sigmoid','rbf'#kernel
         ],
         (0.1,10)]#c

res = gp_minimize(tune_svm, space, random_state=0, verbose=0, n_calls=20)

Explicando o funcionamento da otimização:
No space temos os parâmetros a serem tunados definidos em intervalos (1,10) significa que as otimizações irão de 1 a 10, 1 2 3 4 5 6 7 8 9 10, porém de forma diferente do grid search, não é um algoritmo de força bruta.

De forma análoga para as categóricas, 'sigmoid','rbf', serão testadas uma de cada vez.