In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from tabulate import tabulate

In [2]:
X_train = pd.read_csv('X_training.csv')
y_train = pd.read_csv('y_training.csv')
X_validation = pd.read_csv('X_validation.csv')
y_validation = pd.read_csv('y_val.csv')
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')

In [3]:
X_train = np.array(X_train)
y_train = np.array(y_train).ravel()

X_validation = np.array(X_validation)
y_validation = np.array(y_validation).ravel()

X_test = np.array(X_test)
y_test = np.array(y_test).ravel()

In [6]:
# Função para calcular as métricas de regressão
def calculate_regression_metrics(y_true, y_pred, model_name):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return [model_name, r2, mse, rmse, mae, mape]

# Inicializando os DataFrames para armazenar as métricas
train_results = []
validation_results = []
test_results = []

# List of models to evaluate
models = {
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor(),
    'Linear Regression': LinearRegression(),
    'Lasso Regression': Lasso(),
    'Ridge Regression': Ridge(),
    'ElasticNet Regression': ElasticNet()
}

# Função para treinar, prever e calcular métricas
def evaluate_model(model, model_name):
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_validation_pred = model.predict(X_validation)
    y_test_pred = model.predict(X_test)
    
    train_results.append(calculate_regression_metrics(y_train, y_train_pred, model_name))
    validation_results.append(calculate_regression_metrics(y_validation, y_validation_pred, model_name))
    test_results.append(calculate_regression_metrics(y_test, y_test_pred, model_name))

# Treinando e avaliando os modelos
for name, model in models.items():
    evaluate_model(model, name)

# Função para treinar e avaliar regressões polinomiais
def evaluate_poly_models(degree):
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train)
    X_validation_poly = poly.transform(X_validation)
    X_test_poly = poly.transform(X_test)

    poly_models = {
        'Polynomial Linear Regressor': LinearRegression(),
        'Polynomial Ridge': Ridge(),
        'Polynomial Lasso': Lasso(),
        'Polynomial ElasticNet': ElasticNet()
    }

    for name, model in poly_models.items():
        model.fit(X_train_poly, y_train)
        y_train_pred = model.predict(X_train_poly)
        y_validation_pred = model.predict(X_validation_poly)
        y_test_pred = model.predict(X_test_poly)
        
        train_results.append(calculate_regression_metrics(y_train, y_train_pred, name))
        validation_results.append(calculate_regression_metrics(y_validation, y_validation_pred, name))
        test_results.append(calculate_regression_metrics(y_test, y_test_pred, name))

# Avaliando modelos polinomiais
degree = 2
evaluate_poly_models(degree)

# Criando DataFrames com todos os resultados
headers = ["Nome do Algoritmo", "R2", "MSE", "RMSE", "MAE", "MAPE"]
train_df = pd.DataFrame(train_results, columns=headers)
validation_df = pd.DataFrame(validation_results, columns=headers)
test_df = pd.DataFrame(test_results, columns=headers)

# Mostrando os resultados em formato de tabela com divisórias
print("Performance sobre os dados de treinamento")
print(tabulate(train_df, headers='keys', tablefmt='grid'))
print("\nPerformance sobre os dados de validação")
print(tabulate(validation_df, headers='keys', tablefmt='grid'))
print("\nPerformance sobre os dados de teste")
print(tabulate(test_df, headers='keys', tablefmt='grid'))


Performance sobre os dados de treinamento
+----+-----------------------------+------------+----------+----------+-----------+-----------+
|    | Nome do Algoritmo           |         R2 |      MSE |     RMSE |       MAE |      MAPE |
|  0 | Decision Tree Regressor     | 0.991757   |   3.9404 |  1.98504 |  0.214099 | 0.0826279 |
+----+-----------------------------+------------+----------+----------+-----------+-----------+
|  1 | Random Forest Regressor     | 0.90312    |  46.3101 |  6.80515 |  4.85127  | 2.54656   |
+----+-----------------------------+------------+----------+----------+-----------+-----------+
|  2 | Linear Regression           | 0.0460583  | 455.996  | 21.3541  | 16.9982   | 8.65319   |
+----+-----------------------------+------------+----------+----------+-----------+-----------+
|  3 | Lasso Regression            | 0.00740091 | 474.475  | 21.7824  | 17.3055   | 8.7367    |
+----+-----------------------------+------------+----------+----------+-----------+-----------