In [63]:
import pandas as pd
import numpy as np
import plotly.express as ex

train_df = pd.read_csv("../datasets/train_energy_data.csv")
test_df  = pd.read_csv("../datasets/test_energy_data.csv")

In [25]:
train_df.head(5)

Unnamed: 0,Building Type,Square Footage,Number of Occupants,Appliances Used,Average Temperature,Day of Week,Energy Consumption
0,Residential,7063,76,10,29.84,Weekday,2713.95
1,Commercial,44372,66,45,16.72,Weekday,5744.99
2,Industrial,19255,37,17,14.3,Weekend,4101.24
3,Residential,13265,14,41,32.82,Weekday,3009.14
4,Commercial,13375,26,18,11.92,Weekday,3279.17


In [19]:
display(
    f'Train dataframe shape: {train_df.shape}',
    f'Test dataframe shape: {test_df.shape}',
)

'Train dataframe shape: (1000, 7)'

'Test dataframe shape: (100, 7)'

In [23]:
train_df.isna().sum()

Building Type          0
Square Footage         0
Number of Occupants    0
Appliances Used        0
Average Temperature    0
Day of Week            0
Energy Consumption     0
dtype: int64

In [27]:
X_train = train_df.drop('Energy Consumption', axis=1)
y_train = train_df['Energy Consumption']
X_test  = test_df.drop('Energy Consumption', axis=1)
y_test  = test_df['Energy Consumption']

In [55]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer


def build_preprocessor(X):
    num_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
    categ_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
    
    preprocessor = ColumnTransformer([
        ('num', StandardScaler(), num_cols),
        ('categ', OneHotEncoder(drop='first', handle_unknown='ignore'), categ_cols)
    ])
    return preprocessor


preprocessor = build_preprocessor(X_train)

In [62]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.metrics import mean_absolute_error, r2_score

modelos = {
    'RidgeCV': RidgeCV(alphas=[0.1, 1.0, 10.0, 100.0], cv=5),
    'LassoCV': LassoCV(alphas=[0.0001, 0.001, 0.01, 0.1, 1.0], cv=5, max_iter=10000),
    'ElasticNetCV': ElasticNetCV(
        alphas=[0.001, 0.01, 0.1, 1.0, 10.0],
        l1_ratio=[0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
        cv=5,
        max_iter=10000
    )
}

results = []

for name, modelo in modelos.items():
    
    pipeline = Pipeline([
        ('preprocessing', preprocessor),
        ('regressor', modelo)
    ])
    
    pipeline.fit(X_train, y_train)
    target_pred = pipeline.predict(X_test)

    mae = mean_absolute_error(y_test, target_pred)
    r2  = r2_score(y_test, target_pred)

    results.append({
        'Modelo': name,
        'MAE': mae,
        'R2': r2,
        'Alpha': modelo.alpha_,
        'L1_ratio': getattr(modelo, 'l1_ratio_', None)
    })

    print(f"--- {name} ---")
    print("Melhor alpha:", pipeline.named_steps['regressor'].alpha_)
    
    if hasattr(pipeline.named_steps['regressor'], 'l1_ratio_'):
        print("Melhor l1_ratio:", pipeline.named_steps['regressor'].l1_ratio_)
    
    print("MAE:", mae)
    print("R²:", r2)
    print()

--- RidgeCV ---
Melhor alpha: 0.1
MAE: 0.12031228396629558
R²: 0.9999999714633484

--- LassoCV ---
Melhor alpha: 0.0001
MAE: 0.013602866714459197
R²: 0.9999999996306028

--- ElasticNetCV ---
Melhor alpha: 0.001
Melhor l1_ratio: 1.0
MAE: 0.013492622774606389
R²: 0.9999999996364866



In [65]:
df_results = pd.DataFrame(results)

df_results

Unnamed: 0,Modelo,MAE,R2,Alpha,L1_ratio
0,RidgeCV,0.120312,1.0,0.1,
1,LassoCV,0.013603,1.0,0.0001,
2,ElasticNetCV,0.013493,1.0,0.001,1.0


In [91]:
fig = ex.bar(df_results, 
             x="Modelo", 
             y=["R2", "MAE"], 
             barmode="group", 
             title="Comparação de Métricas")

fig.update_layout(
    title="Comparação de Métricas dos Modelos",
    title_x=0.5,  # centraliza o título
    yaxis_title="Valor da Métrica",  # muda o nome do eixo Y
    legend_title="Métrica",          # muda o nome da legenda
    template="plotly_dark",
    bargap=0.3,
    height=500
)

for trace in fig.data:
    if trace.name == "R2":
        trace.text = [f"{val:.10f}" for val in trace.y]
        trace.textposition = 'outside'
    else:
        trace.text = [f"{val:.3f}" for val in trace.y]
        trace.textposition = 'outside'

fig.show()