# Imports

In [19]:
import pandas as pd
import numpy as np
import optuna
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


from sklearn.tree import DecisionTreeRegressor

  from .autonotebook import tqdm as notebook_tqdm


# Helper Functions

In [9]:
def calculate_metrics(y_true, y_pred, model_name):
    # Calcular R2
    r2 = r2_score(y_true, y_pred)
    
    # Calcular MSE
    mse = mean_squared_error(y_true, y_pred)
    
    # Calcular RMSE
    rmse = np.sqrt(mse)
    
    # Calcular MAE
    mae = mean_absolute_error(y_true, y_pred)
    
    # Calcular MAPE
    y_true_arr, y_pred_arr = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true_arr - y_pred_arr) / y_true_arr)) * 100
    
    return {
        "model_name": model_name,
        "R2": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MAPE": mape
    }


# Data load

In [3]:
# Dados de Treinamento
X_train = pd.read_csv('../../data/Regressao/X_training.csv')
y_train = pd.read_csv('../../data/Regressao/y_training.csv').values.ravel()

# Dados de Test
X_test = pd.read_csv('../../data/Regressao/X_test.csv')
y_test = pd.read_csv('../../data/Regressao/y_test.csv').values.ravel()

# Dados de Validacao
X_val = pd.read_csv('../../data/Regressao/X_validation.csv')
y_val = pd.read_csv('../../data/Regressao/y_val.csv').values.ravel()

# Machine Learning

## Decision Tree Regression

In [32]:
def dt_fine_tuning(trial):
    max_depth = trial.suggest_int('max_depth', 2,100)
    dt_model = DecisionTreeRegressor(max_depth=max_depth)
    dt_model.fit(X_train, y_train)
    y_pred = dt_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Decision Tree')['RMSE']


In [33]:
study = optuna.create_study(direction='maximize')
study.optimize(dt_fine_tuning, n_trials=100)

[I 2023-08-24 05:36:38,834] A new study created in memory with name: no-name-33748551-5264-4f0e-a0df-14a4bc6816d2
[I 2023-08-24 05:36:39,004] Trial 0 finished with value: 21.199652147130326 and parameters: {'max_depth': 7}. Best is trial 0 with value: 21.199652147130326.
[I 2023-08-24 05:36:39,446] Trial 1 finished with value: 24.874275027223135 and parameters: {'max_depth': 84}. Best is trial 1 with value: 24.874275027223135.
[I 2023-08-24 05:36:39,916] Trial 2 finished with value: 24.918837234266007 and parameters: {'max_depth': 53}. Best is trial 2 with value: 24.918837234266007.
[I 2023-08-24 05:36:40,315] Trial 3 finished with value: 24.806287901580383 and parameters: {'max_depth': 86}. Best is trial 2 with value: 24.918837234266007.
[I 2023-08-24 05:36:40,645] Trial 4 finished with value: 24.201393233533107 and parameters: {'max_depth': 18}. Best is trial 2 with value: 24.918837234266007.
[I 2023-08-24 05:36:41,028] Trial 5 finished with value: 24.749732725993702 and parameters: 

In [34]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [35]:
dt_best_params = study.best_params
dt_best_params

{'max_depth': 97}