In [2]:
import joblib
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score

In [3]:
X_train = joblib.load("X_train_processed.pkl")
X_test = joblib.load("X_test_processed.pkl")
y_train = joblib.load("y_train.pkl")
y_test = joblib.load("y_test.pkl")


In [6]:
import numpy as np
import scipy.sparse as sp

# Função auxiliar para mostrar o shape corretamente
def show_shape(name, obj):
    if sp.issparse(obj):  # Se for uma matriz esparsa (CSR, por exemplo)
        print(f"{name}: {obj.shape} (sparse matrix)")
    elif isinstance(obj, np.ndarray):  # Se for um array NumPy
        print(f"{name}: {obj.shape} (ndarray)")
    else:  # Caso seja algo como uma Series ou DataFrame
        try:
            print(f"{name}: {obj.shape} ({type(obj).__name__})")
        except AttributeError:
            print(f"{name}: tipo {type(obj).__name__}, sem atributo shape")

# Verificar shapes
show_shape("X_train", X_train)
show_shape("X_test", X_test)
show_shape("y_train", y_train)
show_shape("y_test", y_test)


X_train: (186180, 35) (sparse matrix)
X_test: (46545, 35) (sparse matrix)
y_train: (186180,) (Series)
y_test: (46545,) (Series)


In [7]:
models = {
    'LinearRegression': LinearRegression()
    # 'RandomForest': RandomForestRegressor(random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    rmse = root_mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {'R²': r2, 'RMSE': rmse, 'MAE': mae}
    print(f"Resultados para {name}: R²={r2:.4f}, RMSE={rmse:.4f}, MAE={mae:.4f}")


Resultados para LinearRegression: R²=0.7227, RMSE=9.6081, MAE=7.2692
