# Imports

In [1]:
import pandas as pd
import numpy as np
import optuna
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

  from .autonotebook import tqdm as notebook_tqdm


# Helper Functions

In [9]:
def calculate_metrics(y_true, y_pred, model_name):
    # Calcular R2
    r2 = r2_score(y_true, y_pred)
    
    # Calcular MSE
    mse = mean_squared_error(y_true, y_pred)
    
    # Calcular RMSE
    rmse = np.sqrt(mse)
    
    # Calcular MAE
    mae = mean_absolute_error(y_true, y_pred)
    
    # Calcular MAPE
    y_true_arr, y_pred_arr = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true_arr - y_pred_arr) / y_true_arr)) * 100
    
    return {
        "model_name": model_name,
        "R2": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MAPE": mape
    }


# Data load

In [6]:
# Dados de Treinamento
X_train = pd.read_csv('../../data/Regressao/X_training.csv')
y_train = pd.read_csv('../../data/Regressao/y_training.csv').values.ravel()

# Dados de Test
X_test = pd.read_csv('../../data/Regressao/X_test.csv')
y_test = pd.read_csv('../../data/Regressao/y_test.csv').values.ravel()

# Dados de Validacao
X_val = pd.read_csv('../../data/Regressao/X_validation.csv')
y_val = pd.read_csv('../../data/Regressao/y_val.csv').values.ravel()

# Machine Learning

## Decision Tree Regression

In [32]:
def dt_fine_tuning(trial):
    max_depth = trial.suggest_int('max_depth', 2,100)
    dt_model = DecisionTreeRegressor(max_depth=max_depth)
    dt_model.fit(X_train, y_train)
    y_pred = dt_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Decision Tree')['RMSE']


In [36]:
study = optuna.create_study(direction='minimize')
study.optimize(dt_fine_tuning, n_trials=100)

[I 2023-08-24 05:44:42,897] A new study created in memory with name: no-name-457c4841-a3b0-41d4-a7bf-3fbc3939b19d


[I 2023-08-24 05:44:43,335] Trial 0 finished with value: 24.81939354806063 and parameters: {'max_depth': 90}. Best is trial 0 with value: 24.81939354806063.
[I 2023-08-24 05:44:43,736] Trial 1 finished with value: 24.775432690699933 and parameters: {'max_depth': 59}. Best is trial 1 with value: 24.775432690699933.
[I 2023-08-24 05:44:44,133] Trial 2 finished with value: 24.96646246658271 and parameters: {'max_depth': 71}. Best is trial 1 with value: 24.775432690699933.
[I 2023-08-24 05:44:44,557] Trial 3 finished with value: 24.955350143904226 and parameters: {'max_depth': 37}. Best is trial 1 with value: 24.775432690699933.
[I 2023-08-24 05:44:44,964] Trial 4 finished with value: 25.11935903561836 and parameters: {'max_depth': 30}. Best is trial 1 with value: 24.775432690699933.
[I 2023-08-24 05:44:45,347] Trial 5 finished with value: 24.581984630523106 and parameters: {'max_depth': 22}. Best is trial 5 with value: 24.581984630523106.
[I 2023-08-24 05:44:45,532] Trial 6 finished with 

In [37]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [38]:
dt_best_params = study.best_params
dt_best_params

{'max_depth': 5}

## Random Forest Regressor

In [7]:
def rf_fine_tuning(trial):
    n_estimators = trial.suggest_int('n_estimators', 10,200)
    max_depth = trial.suggest_int('max_depth', 2,50)
    rf_model = RandomForestRegressor(n_estimators = n_estimators, max_depth=max_depth)
    rf_model.fit(X_train, y_train)
    y_pred = rf_model.predict(X_val)

    return calculate_metrics(y_val, y_pred, 'Random Forest')['RMSE']

In [10]:
study = optuna.create_study(direction='minimize')
study.optimize(rf_fine_tuning, n_trials=100)

[I 2023-08-30 08:16:08,273] A new study created in memory with name: no-name-495f3be9-9c82-43cc-a834-98216c468498
[I 2023-08-30 08:16:21,915] Trial 0 finished with value: 18.164772892443366 and parameters: {'n_estimators': 23, 'max_depth': 50}. Best is trial 0 with value: 18.164772892443366.
[I 2023-08-30 08:18:01,661] Trial 1 finished with value: 17.730473967826537 and parameters: {'n_estimators': 183, 'max_depth': 45}. Best is trial 1 with value: 17.730473967826537.
[I 2023-08-30 08:18:51,196] Trial 2 finished with value: 17.82874999027615 and parameters: {'n_estimators': 93, 'max_depth': 38}. Best is trial 1 with value: 17.730473967826537.
[I 2023-08-30 08:18:57,748] Trial 3 finished with value: 18.481848359077937 and parameters: {'n_estimators': 14, 'max_depth': 44}. Best is trial 1 with value: 17.730473967826537.
[I 2023-08-30 08:19:43,608] Trial 4 finished with value: 17.929504135880197 and parameters: {'n_estimators': 98, 'max_depth': 38}. Best is trial 1 with value: 17.73047396

In [11]:
optuna.visualization.plot_optimization_history(study)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [12]:
rf_best_params = study.best_params
rf_best_params

{'n_estimators': 183, 'max_depth': 45}

## Polinomial Regression

In [61]:
# poly = PolynomialFeatures(degree=2)
# poly_features = poly.fit_transform(X_train)
# X_poly_val = poly.transform(X_val)

# model = LinearRegression()
# model.fit(poly_features, y_train)
# y_pred = model.predict(X_poly_val)


In [None]:
# def pol_reg_fine_tuning(trial):
#     degree = trial.suggest_int('degree', 2,5)

#     poly = PolynomialFeatures(degree=degree)
#     poly_features = poly.fit_transform(X_train)
#     X_poly_val = poly.transform(X_val)

#     model = LinearRegression()
#     model.fit(poly_features, y_train)
#     y_pred = model.predict(X_poly_val)

#     return calculate_metrics(y_val, y_pred, 'Polinomial Regression')['RMSE']


In [None]:
# study = optuna.create_study(direction='minimize')
# study.optimize(pol_reg_fine_tuning, n_trials=10)

[I 2023-08-24 06:21:34,065] A new study created in memory with name: no-name-4c779134-470e-451c-9609-0cb0533dea81
[I 2023-08-24 06:21:34,958] Trial 0 finished with value: 22.367973841803234 and parameters: {'degree': 3}. Best is trial 0 with value: 22.367973841803234.
