# Imports

In [32]:
import pandas as pd
import numpy as np
import optuna
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import plotly

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet

# Helper Functions

In [3]:
def calculate_metrics(y_true, y_pred, model_name):
    # Calcular R2
    r2 = r2_score(y_true, y_pred)
    
    # Calcular MSE
    mse = mean_squared_error(y_true, y_pred)
    
    # Calcular RMSE
    rmse = np.sqrt(mse)
    
    # Calcular MAE
    mae = mean_absolute_error(y_true, y_pred)
    
    # Calcular MAPE
    y_true_arr, y_pred_arr = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true_arr - y_pred_arr) / y_true_arr)) * 100
    
    return {
        "model_name": model_name,
        "R2": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MAPE": mape
    }


# Data load

In [4]:
# Dados de Treinamento
X_train = pd.read_csv('../../data/Regressao/X_training.csv')
y_train = pd.read_csv('../../data/Regressao/y_training.csv').values.ravel()

# Dados de Test
X_test = pd.read_csv('../../data/Regressao/X_test.csv')
y_test = pd.read_csv('../../data/Regressao/y_test.csv').values.ravel()

# Dados de Validacao
X_val = pd.read_csv('../../data/Regressao/X_validation.csv')
y_val = pd.read_csv('../../data/Regressao/y_val.csv').values.ravel()

# Machine Learning

## Decision Tree Regression

In [4]:
def dt_fine_tuning(trial):
    max_depth = trial.suggest_int('max_depth', 2,100)
    dt_model = DecisionTreeRegressor(max_depth=max_depth)
    dt_model.fit(X_train, y_train)
    y_pred = dt_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Decision Tree')['RMSE']


In [5]:
study = optuna.create_study(direction='minimize')
study.optimize(dt_fine_tuning, n_trials=10)

[I 2023-09-15 11:47:41,527] A new study created in memory with name: no-name-facd8f19-ee71-422d-85b0-a2f0f76e5b0b
[I 2023-09-15 11:47:41,745] Trial 0 finished with value: 21.5852938990622 and parameters: {'max_depth': 9}. Best is trial 0 with value: 21.5852938990622.
[I 2023-09-15 11:47:42,264] Trial 1 finished with value: 24.934630354098868 and parameters: {'max_depth': 41}. Best is trial 0 with value: 21.5852938990622.
[I 2023-09-15 11:47:42,560] Trial 2 finished with value: 22.782650821623626 and parameters: {'max_depth': 13}. Best is trial 0 with value: 21.5852938990622.
[I 2023-09-15 11:47:43,066] Trial 3 finished with value: 24.79908434654332 and parameters: {'max_depth': 97}. Best is trial 0 with value: 21.5852938990622.
[I 2023-09-15 11:47:43,542] Trial 4 finished with value: 25.03519607593713 and parameters: {'max_depth': 95}. Best is trial 0 with value: 21.5852938990622.
[I 2023-09-15 11:47:43,592] Trial 5 finished with value: 21.43718691701056 and parameters: {'max_depth': 2

In [6]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [7]:
dt_best_params = study.best_params
dt_best_params

{'max_depth': 2}

## Random Forest Regressor

In [8]:
def rf_fine_tuning(trial):
    n_estimators = trial.suggest_int('n_estimators', 10,200)
    max_depth = trial.suggest_int('max_depth', 2,50)
    rf_model = RandomForestRegressor(n_estimators = n_estimators, max_depth=max_depth)
    rf_model.fit(X_train, y_train)
    y_pred = rf_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Random Forest')['RMSE']

In [9]:
study = optuna.create_study(direction='minimize')
study.optimize(rf_fine_tuning, n_trials=10)

[I 2023-09-15 11:47:53,975] A new study created in memory with name: no-name-3f977cfc-cebd-468a-bea6-6cc5157ad1b4
[I 2023-09-15 11:48:58,448] Trial 0 finished with value: 17.863900419140624 and parameters: {'n_estimators': 182, 'max_depth': 19}. Best is trial 0 with value: 17.863900419140624.
[I 2023-09-15 11:49:54,995] Trial 1 finished with value: 17.8108138603054 and parameters: {'n_estimators': 160, 'max_depth': 27}. Best is trial 1 with value: 17.8108138603054.
[I 2023-09-15 11:50:03,864] Trial 2 finished with value: 18.318664781176345 and parameters: {'n_estimators': 22, 'max_depth': 36}. Best is trial 1 with value: 17.8108138603054.
[I 2023-09-15 11:50:09,390] Trial 3 finished with value: 20.017552198844534 and parameters: {'n_estimators': 37, 'max_depth': 8}. Best is trial 1 with value: 17.8108138603054.
[I 2023-09-15 11:50:41,722] Trial 4 finished with value: 18.14341479038604 and parameters: {'n_estimators': 105, 'max_depth': 16}. Best is trial 1 with value: 17.8108138603054.


In [11]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [12]:
rf_best_params = study.best_params
rf_best_params

{'n_estimators': 189, 'max_depth': 25}

## Polinomial Regression

In [5]:
# poly = PolynomialFeatures(degree=5)
# poly_features = poly.fit_transform(X_train)
# X_poly_val = poly.transform(X_val)

# model = LinearRegression()
# model.fit(poly_features, y_train)
# y_pred = model.predict(X_poly_val)


In [6]:
def pol_reg_fine_tuning(trial):
    degree = trial.suggest_int('degree', 2,4)

    poly = PolynomialFeatures(degree=degree)
    poly_features = poly.fit_transform(X_train)
    X_poly_val = poly.transform(X_val)

    model = LinearRegression()
    model.fit(poly_features, y_train)
    y_pred = model.predict(X_poly_val)

    return calculate_metrics(y_val, y_pred, 'Polinomial Regression')['RMSE']


In [7]:
study = optuna.create_study(direction='minimize')
study.optimize(pol_reg_fine_tuning, n_trials=5)

[I 2023-09-15 12:00:45,699] A new study created in memory with name: no-name-584912b3-687b-46e4-afc1-dfdf2ba8f7ea
[I 2023-09-15 12:00:59,743] Trial 0 finished with value: 222.76611247058707 and parameters: {'degree': 4}. Best is trial 0 with value: 222.76611247058707.
[I 2023-09-15 12:01:00,508] Trial 1 finished with value: 22.367973841803234 and parameters: {'degree': 3}. Best is trial 1 with value: 22.367973841803234.
[I 2023-09-15 12:01:16,058] Trial 2 finished with value: 222.76611247058707 and parameters: {'degree': 4}. Best is trial 1 with value: 22.367973841803234.
[I 2023-09-15 12:01:17,088] Trial 3 finished with value: 22.367973841803234 and parameters: {'degree': 3}. Best is trial 1 with value: 22.367973841803234.
[I 2023-09-15 12:01:17,218] Trial 4 finished with value: 21.113223900467585 and parameters: {'degree': 2}. Best is trial 4 with value: 21.113223900467585.


In [8]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [9]:
pol_best_params = study.best_params
pol_best_params

{'degree': 2}

[I 2023-08-24 06:21:34,065] A new study created in memory with name: no-name-4c779134-470e-451c-9609-0cb0533dea81
[I 2023-08-24 06:21:34,958] Trial 0 finished with value: 22.367973841803234 and parameters: {'degree': 3}. Best is trial 0 with value: 22.367973841803234.


## Lasso

In [18]:
def lasso_fine_tuning(trial):
    #degree = trial.suggest_int('degree', 2,4)
    alpha = trial.suggest_float('alpha', 0.1,5.0)
    #l1_ratio = trial.suggest_int('l1_ratio', 1,10)
    max_iter = trial.suggest_int('max_iter', 500,3000)

    model_lasso = Lasso(alpha = alpha, max_iter = max_iter)
    model_lasso.fit(X_train, y_train)

    y_pred = model_lasso.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Lasso')['RMSE']


In [24]:
study = optuna.create_study(direction='minimize')
study.optimize(lasso_fine_tuning, n_trials=500)

[I 2023-09-15 16:27:52,809] A new study created in memory with name: no-name-1fe036a2-c348-4aa2-a796-75847ef35c1d
[I 2023-09-15 16:27:52,838] Trial 0 finished with value: 21.85204694359998 and parameters: {'alpha': 2.1827841976698124, 'max_iter': 1835}. Best is trial 0 with value: 21.85204694359998.
[I 2023-09-15 16:27:52,866] Trial 1 finished with value: 21.74480004476041 and parameters: {'alpha': 0.6187283815064416, 'max_iter': 2113}. Best is trial 1 with value: 21.74480004476041.
[I 2023-09-15 16:27:52,887] Trial 2 finished with value: 21.740634917239536 and parameters: {'alpha': 0.5433346752133993, 'max_iter': 2611}. Best is trial 2 with value: 21.740634917239536.
[I 2023-09-15 16:27:52,917] Trial 3 finished with value: 21.700524944194182 and parameters: {'alpha': 0.38884373991018306, 'max_iter': 1893}. Best is trial 3 with value: 21.700524944194182.
[I 2023-09-15 16:27:52,936] Trial 4 finished with value: 21.85204694359998 and parameters: {'alpha': 4.984512431584904, 'max_iter': 2

In [25]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [26]:
lasso_best_params = study.best_params
lasso_best_params

{'alpha': 0.10002352788737827, 'max_iter': 1461}

## Ridge

In [28]:
def ridge_fine_tuning(trial):
    #degree = trial.suggest_int('degree', 2,4)
    alpha = trial.suggest_float('alpha', 0.1,5.0)
    #l1_ratio = trial.suggest_int('l1_ratio', 1,10)
    max_iter = trial.suggest_int('max_iter', 500,3000)

    model_ridge = Ridge(alpha = alpha, max_iter = max_iter)
    model_ridge.fit(X_train, y_train)

    y_pred = model_ridge.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Lasso')['RMSE']


In [29]:
study = optuna.create_study(direction='minimize')
study.optimize(ridge_fine_tuning, n_trials=500)

[I 2023-09-15 16:30:58,971] A new study created in memory with name: no-name-10430c13-8a2d-4a2b-a13e-ab4c6e40500a
[I 2023-09-15 16:30:59,001] Trial 0 finished with value: 21.411296991184592 and parameters: {'alpha': 2.4437515069180598, 'max_iter': 572}. Best is trial 0 with value: 21.411296991184592.
[I 2023-09-15 16:30:59,011] Trial 1 finished with value: 21.41131302412556 and parameters: {'alpha': 1.8586991001410298, 'max_iter': 2134}. Best is trial 0 with value: 21.411296991184592.
[I 2023-09-15 16:30:59,022] Trial 2 finished with value: 21.411309099719315 and parameters: {'alpha': 1.9959405853722365, 'max_iter': 2017}. Best is trial 0 with value: 21.411296991184592.
[I 2023-09-15 16:30:59,033] Trial 3 finished with value: 21.411320941048217 and parameters: {'alpha': 1.5919907531105149, 'max_iter': 1043}. Best is trial 0 with value: 21.411296991184592.
[I 2023-09-15 16:30:59,051] Trial 4 finished with value: 21.41134702477322 and parameters: {'alpha': 0.7923208123421324, 'max_iter':

In [30]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [31]:
ridge_best_params = study.best_params
ridge_best_params

{'alpha': 4.999937147342942, 'max_iter': 950}

## Elastic Net

In [36]:
def elasticnet_fine_tuning(trial):
    #degree = trial.suggest_int('degree', 2,4)
    alpha = trial.suggest_float('alpha', 0.1,5.0)
    l1_ratio = trial.suggest_int('l1_ratio', 0,1)
    max_iter = trial.suggest_int('max_iter', 500,3000)

    model_elasticnet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iter)
    model_elasticnet.fit(X_train, y_train)

    y_pred = model_elasticnet.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Lasso')['RMSE']


In [38]:
study = optuna.create_study(direction='minimize')
study.optimize(elasticnet_fine_tuning, n_trials=10)

[I 2023-09-15 16:34:41,018] A new study created in memory with name: no-name-73c9a7b3-936b-436d-90a0-ff6ae7d5620e
[I 2023-09-15 16:34:41,042] Trial 0 finished with value: 21.78830570776083 and parameters: {'alpha': 1.3780331480595023, 'l1_ratio': 1, 'max_iter': 1630}. Best is trial 0 with value: 21.78830570776083.
[I 2023-09-15 16:34:41,060] Trial 1 finished with value: 21.85204694359998 and parameters: {'alpha': 4.236388501836851, 'l1_ratio': 1, 'max_iter': 931}. Best is trial 0 with value: 21.78830570776083.

Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.510e+06, tolerance: 5.042e+02 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.

[I 2023-09-15 16:34:45,115] Trial 2 finished with value: 21.769345310453748 and parameters: {'alpha'

In [39]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [40]:
elastic_best_params = study.best_params
elastic_best_params

{'alpha': 0.11463029885210174, 'l1_ratio': 1, 'max_iter': 2885}