# Imports

In [7]:
import pandas as pd
import numpy as np
import optuna
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import plotly

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet


import logging

# Definir o nível de log para WARNING
optuna.logging.set_verbosity(optuna.logging.WARNING)

import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)


# Helper Functions

In [8]:
def calculate_metrics(y_true, y_pred, model_name):
    # Calcular R2
    r2 = r2_score(y_true, y_pred)
    
    # Calcular MSE
    mse = mean_squared_error(y_true, y_pred)
    
    # Calcular RMSE
    rmse = np.sqrt(mse)
    
    # Calcular MAE
    mae = mean_absolute_error(y_true, y_pred)
    
    # Calcular MAPE
    y_true_arr, y_pred_arr = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true_arr - y_pred_arr) / y_true_arr)) * 100
    
    return {
        "model_name": model_name,
        "R2": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MAPE": mape
    }


# Data load

In [9]:
# Dados de Treinamento
X_train = pd.read_csv('../../data/Regressao/X_training.csv')
y_train = pd.read_csv('../../data/Regressao/y_training.csv').values.ravel()

# Dados de Test
X_test = pd.read_csv('../../data/Regressao/X_test.csv')
y_test = pd.read_csv('../../data/Regressao/y_test.csv').values.ravel()

# Dados de Validacao
X_val = pd.read_csv('../../data/Regressao/X_validation.csv')
y_val = pd.read_csv('../../data/Regressao/y_val.csv').values.ravel()

# Machine Learning

## Decision Tree Regression

In [10]:
def dt_fine_tuning(trial):
    max_depth = trial.suggest_int('max_depth', 2,100)
    dt_model = DecisionTreeRegressor(max_depth=max_depth)
    dt_model.fit(X_train, y_train)
    y_pred = dt_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Decision Tree')['RMSE']


In [11]:
study = optuna.create_study(direction='minimize')
study.optimize(dt_fine_tuning, n_trials=10)

In [12]:
optuna.visualization.plot_optimization_history(study)

In [13]:
dt_best_params = study.best_params
dt_best_params

{'max_depth': 3}

## Random Forest Regressor

In [14]:
def rf_fine_tuning(trial):
    n_estimators = trial.suggest_int('n_estimators', 10,200)
    max_depth = trial.suggest_int('max_depth', 2,50)
    rf_model = RandomForestRegressor(n_estimators = n_estimators, max_depth=max_depth)
    rf_model.fit(X_train, y_train)
    y_pred = rf_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Random Forest')['RMSE']

In [15]:
study = optuna.create_study(direction='minimize')
study.optimize(rf_fine_tuning, n_trials=10)

In [16]:
optuna.visualization.plot_optimization_history(study)

In [17]:
rf_best_params = study.best_params
rf_best_params

{'n_estimators': 102, 'max_depth': 45}

## Polinomial Regression

In [18]:
# poly = PolynomialFeatures(degree=5)
# poly_features = poly.fit_transform(X_train)
# X_poly_val = poly.transform(X_val)

# model = LinearRegression()
# model.fit(poly_features, y_train)
# y_pred = model.predict(X_poly_val)


In [19]:
def pol_reg_fine_tuning(trial):
    degree = trial.suggest_int('degree', 2,4)

    poly = PolynomialFeatures(degree=degree)
    poly_features = poly.fit_transform(X_train)
    X_poly_val = poly.transform(X_val)

    model = LinearRegression()
    model.fit(poly_features, y_train)
    y_pred = model.predict(X_poly_val)

    return calculate_metrics(y_val, y_pred, 'Polinomial Regression')['RMSE']


In [20]:
study = optuna.create_study(direction='minimize')
study.optimize(pol_reg_fine_tuning, n_trials=5)

In [21]:
optuna.visualization.plot_optimization_history(study)

In [22]:
pol_best_params = study.best_params
pol_best_params

{'degree': 3}

## Lasso

In [23]:
def lasso_fine_tuning(trial):
    #degree = trial.suggest_int('degree', 2,4)
    alpha = trial.suggest_float('alpha', 0.1,5.0)
    #l1_ratio = trial.suggest_int('l1_ratio', 1,10)
    max_iter = trial.suggest_int('max_iter', 500,3000)

    model_lasso = Lasso(alpha = alpha, max_iter = max_iter)
    model_lasso.fit(X_train, y_train)

    y_pred = model_lasso.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Lasso')['RMSE']


In [24]:
study = optuna.create_study(direction='minimize')
study.optimize(lasso_fine_tuning, n_trials=500)

In [25]:
optuna.visualization.plot_optimization_history(study)

In [26]:
lasso_best_params = study.best_params
lasso_best_params

{'alpha': 0.10005528026157884, 'max_iter': 1685}

## Ridge

In [27]:
def ridge_fine_tuning(trial):
    #degree = trial.suggest_int('degree', 2,4)
    alpha = trial.suggest_float('alpha', 0.1,5.0)
    #l1_ratio = trial.suggest_int('l1_ratio', 1,10)
    max_iter = trial.suggest_int('max_iter', 500,3000)

    model_ridge = Ridge(alpha = alpha, max_iter = max_iter)
    model_ridge.fit(X_train, y_train)

    y_pred = model_ridge.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Lasso')['RMSE']


In [28]:
study = optuna.create_study(direction='minimize')
study.optimize(ridge_fine_tuning, n_trials=500)

In [29]:
optuna.visualization.plot_optimization_history(study)

In [30]:
ridge_best_params = study.best_params
ridge_best_params

{'alpha': 4.999992096236858, 'max_iter': 2242}

## Elastic Net

In [31]:
def elasticnet_fine_tuning(trial):
    #degree = trial.suggest_int('degree', 2,4)
    alpha = trial.suggest_float('alpha', 0.1,5.0)
    l1_ratio = trial.suggest_int('l1_ratio', 0,1)
    max_iter = trial.suggest_int('max_iter', 500,3000)

    model_elasticnet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iter)
    model_elasticnet.fit(X_train, y_train)

    y_pred = model_elasticnet.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Lasso')['RMSE']


In [32]:
study = optuna.create_study(direction='minimize')
study.optimize(elasticnet_fine_tuning, n_trials=10)

In [33]:
optuna.visualization.plot_optimization_history(study)

In [34]:
elastic_best_params = study.best_params
elastic_best_params

{'alpha': 0.33926880895565015, 'l1_ratio': 1, 'max_iter': 1510}