In [1]:
# Import de bibliotecas 
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np
from tabulate import tabulate

In [2]:
# Carregamento dos dados dos arquivos CSV
X_df = pd.read_csv('../dataset/X_training.csv')
y_df = pd.read_csv('../dataset/y_training.csv')

# Separaração das features (X) e os labels (y)
X = X_df.values
y = y_df.iloc[:, 0].values


In [3]:
# Divisão dos dados (Treino, Validação e Teste)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Dividir os dados de treinamento em treinamento e validação
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [5]:
# Está função faz o cáculo do R2, MSE, RMSE, MAE e MAPE

# Está função calcula MAPE com verificação para zeros
def mean_absolute_percentage_error(y_true, y_pred):
    y_true = np.where(y_true == 0, np.finfo(float).eps, y_true)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Função para calcular métricas
def metrics_calculation(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return r2, mse, rmse, mae, mape


In [6]:
# Está função avalia e treina os  modelos
def train_and_evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test):
    # Treino
    model.fit(X_train, y_train)
    
    # Previsões
    y_pred_train = model.predict(X_train)
    y_pred_val = model.predict(X_val)
    y_pred_test = model.predict(X_test)
    
    # Cálculo das métricas
    metrics_train = metrics_calculation(y_train, y_pred_train)
    metrics_validation = metrics_calculation(y_val, y_pred_val)
    metrics_test = metrics_calculation(y_test, y_pred_test)
    
    return metrics_train, metrics_validation, metrics_test


In [7]:
# Inicializando os modelos
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "Polynomial Regression": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
    "Linear Regression Lasso": Lasso(),
    "Linear Regression Ridge": Ridge(),
    "Linear Regression Elastic Net": ElasticNet(),
    "Polynomial Regression Lasso": make_pipeline(PolynomialFeatures(degree=2), Lasso()),
    "Polynomial Regression Ridge": make_pipeline(PolynomialFeatures(degree=2), Ridge()),
    "Polynomial Regression Elastic Net": make_pipeline(PolynomialFeatures(degree=2), ElasticNet())
}

# Dicionário para armazenar os resultados
results = {}

# Treinar e avaliar cada modelo
for name, model in models.items():
    # Treinamento e avaliação do modelo
    metrics_train, metrics_validation, metrics_test = train_and_evaluate_model(model, X_train, y_train, X_val, y_val, X_test, y_test)
    
    # Armazenando os resultados no dicionário
    results[name] = {
        "train": metrics_train,
        "validation": metrics_validation,
        "test": metrics_test
    }

In [9]:
# Organização dos resultados nas tabelas

# Está função para cria um DataFrame contendo os resultados
def create_df_results(results, type):
    data = {
        "Algoritmo": [],
        "R2": [],
        "MSE": [],
        "RMSE": [],
        "MAE": [],
        "MAPE": []
    }
    for name, metrics in results.items():
        data["Algoritmo"].append(name)
        data["R2"].append(metrics[type][0])
        data["MSE"].append(metrics[type][1])
        data["RMSE"].append(metrics[type][2])
        data["MAE"].append(metrics[type][3])
        data["MAPE"].append(metrics[type][4])
    return pd.DataFrame(data)

# Ajustar a configuração para exibir todas as linhas do DataFrame
pd.set_option('display.max_rows', None)

# Tabelas de resultados
table_train = create_df_results(results, "train")  
table_validation = create_df_results(results, "validation") 
table_test = create_df_results(results, "test") 

In [None]:
table_train


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.555842,0.108955,0.330084,0.24792,5.99073e+16
1,Decision Tree Regressor,1.0,0.0,0.0,0.0,56.85031
2,Random Forest Regressor,0.982471,0.0043,0.065574,0.023596,5467171000000000.0
3,Polynomial Regression,0.752098,0.060812,0.246601,0.17732,3.927776e+16
4,Linear Regression Lasso,0.0,0.245307,0.495285,0.490615,1.104766e+17
5,Linear Regression Ridge,0.555842,0.108955,0.330084,0.247922,5.99075e+16
6,Linear Regression Elastic Net,0.0,0.245307,0.495285,0.490615,1.104766e+17
7,Polynomial Regression Lasso,0.0,0.245307,0.495285,0.490615,1.104766e+17
8,Polynomial Regression Ridge,0.752098,0.060812,0.246601,0.177319,3.927729e+16
9,Polynomial Regression Elastic Net,0.0,0.245307,0.495285,0.490615,1.104766e+17


In [11]:
table_validation

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.54807,0.111217,0.333492,0.249915,5.954174e+16
1,Decision Tree Regressor,0.766047,0.057574,0.239946,0.057574,1.279379e+16
2,Random Forest Regressor,0.874149,0.030971,0.175986,0.063976,1.427687e+16
3,Polynomial Regression,0.74614,0.062473,0.249947,0.179253,3.93748e+16
4,Linear Regression Lasso,-0.000146,0.246129,0.496114,0.491437,1.093109e+17
5,Linear Regression Ridge,0.54807,0.111217,0.333492,0.249916,5.954191e+16
6,Linear Regression Elastic Net,-0.000146,0.246129,0.496114,0.491437,1.093109e+17
7,Polynomial Regression Lasso,-0.000146,0.246129,0.496114,0.491437,1.093109e+17
8,Polynomial Regression Ridge,0.74614,0.062473,0.249946,0.179252,3.937429e+16
9,Polynomial Regression Elastic Net,-0.000146,0.246129,0.496114,0.491437,1.093109e+17


In [12]:
table_test

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.551402,0.110255,0.332047,0.249421,5.922815e+16
1,Decision Tree Regressor,0.756488,0.05985,0.244642,0.05985,1.375643e+16
2,Random Forest Regressor,0.874482,0.030849,0.17564,0.063525,1.418092e+16
3,Polynomial Regression,0.749046,0.061679,0.248352,0.1788,3.875213e+16
4,Linear Regression Lasso,-5e-05,0.245789,0.495771,0.491096,1.097932e+17
5,Linear Regression Ridge,0.551403,0.110255,0.332046,0.249423,5.922831e+16
6,Linear Regression Elastic Net,-5e-05,0.245789,0.495771,0.491096,1.097932e+17
7,Polynomial Regression Lasso,-5e-05,0.245789,0.495771,0.491096,1.097932e+17
8,Polynomial Regression Ridge,0.749047,0.061678,0.248351,0.178799,3.875153e+16
9,Polynomial Regression Elastic Net,-5e-05,0.245789,0.495771,0.491096,1.097932e+17
