In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
# Cargar de datos
df = pd.read_csv("../data/vehicles.csv")

# Codificación
df_encoded = pd.get_dummies(df, drop_first=True)

# Separar precio
X = df_encoded.drop("price", axis=1)
y = df_encoded["price"]

# Dividir train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalización
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Modelos diferentes para hacer la comparación
models = {
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),  # Ganador
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "XGBoost": xgb.XGBRegressor(n_estimators=100, random_state=42)
}

# Entrenamiento
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    
    mae = mean_absolute_error(y_test, preds)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)
    
    print(f"\n-- {name} -- ")
    print(f"R²: {r2:.3f}")
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")



-- Decision Tree -- 
R²: 0.714
MAE: 3409.48
RMSE: 6448.26

-- Random Forest -- 
R²: 0.835
MAE: 2777.78
RMSE: 4888.08

-- Gradient Boosting -- 
R²: 0.719
MAE: 4176.82
RMSE: 6386.58

-- XGBoost -- 
R²: 0.801
MAE: 3433.85
RMSE: 5371.43
