In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.linear_model import Ridge
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_squared_error
from math import sqrt


In [2]:
df = pd.read_csv("./data/Vehicle_Price_Cleaned_prepared.csv")


In [3]:
X = df.drop("price", axis=1)
y = df["price"]


In [4]:
# Detectar columnas categóricas y numéricas
cat_cols = X.select_dtypes(include="object").columns.tolist()
num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()



In [5]:
# Preprocesamiento
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols)
])



In [6]:
# Pipeline base
pipeline = Pipeline([
    ("preprocess", preprocessor),
    ("model", Ridge())
])



In [7]:
# Rango de valores de alpha para probar
param_grid = {
    "model__alpha": [0.01, 0.1, 1.0, 10.0, 100.0]
}

# GridSearchCV con validación cruzada
grid_search = GridSearchCV(
    pipeline,
    param_grid,
    scoring="neg_root_mean_squared_error",
    cv=5,
    verbose=1
)

# Ajustar al conjunto completo
grid_search.fit(X, y)

# Resultados
best_alpha = grid_search.best_params_["model__alpha"]
best_score = -grid_search.best_score_

print(f"Mejor alpha: {best_alpha}")
print(f"Mejor RMSE (CV): {best_score:.2f}")

Fitting 5 folds for each of 5 candidates, totalling 25 fits
Mejor alpha: 1.0
Mejor RMSE (CV): 9035.48
