In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score, median_absolute_error
)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.linear_model import Ridge

print("Current working directory:", os.getcwd())

Current working directory: /Users/matiaslein/Desktop/Proyecto-final-ML/entrenamientos_df_intermedio


In [13]:


# Dataset
df = pd.read_csv('../datasetintermedio.csv')
y = df['Precio_usd']
X = df.drop(columns=['Precio_usd'])

# Manejo de NaN
X = X.fillna(X.median())

# Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:

# Modelos
modelos = {
    'LinearRegression': make_pipeline(StandardScaler(), LinearRegression()),
    'Ridge': make_pipeline(StandardScaler(), Ridge(alpha=1.0)),
    'Lasso': make_pipeline(StandardScaler(), Lasso(alpha=0.5, max_iter=10000))
}

# Evaluación
for nombre, modelo in modelos.items():
    modelo.fit(X_train, y_train)
    y_pred = modelo.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    medae = median_absolute_error(y_test, y_pred)

    print(f"\n📊 Modelo: {nombre}")
    print(f"MAE:    ${mae:,.0f}")
    print(f"MedAE:  ${medae:,.0f}")
    print(f"RMSE:   ${rmse:,.0f}")
    print(f"R²:     {r2:.3f}")
    print(f"MAPE:   {mape:.2f}%")

    # Mostrar top 5 coeficientes
    coef_model = modelo.named_steps[nombre.lower()]
    coefs = coef_model.coef_
    top = pd.Series(coefs, index=X.columns).sort_values(ascending=False)
    print("Top + features:", list(top.head(3).index))
    print("Top - features:", list(top.tail(3).index))


📊 Modelo: LinearRegression
MAE:    $276,476
MedAE:  $3,661
RMSE:   $16,333,009
R²:     -602310.945
MAPE:   1635.31%
Top + features: ['Año', 'Cilindrada', 'Marca_BMW']
Top - features: ['Marca_Renault', 'Transmision_Automática', 'Transmision_Manual']

📊 Modelo: Ridge
MAE:    $276,388
MedAE:  $3,661
RMSE:   $16,327,762
R²:     -601924.045
MAPE:   1634.79%
Top + features: ['Año', 'Cilindrada', 'Marca_BMW']
Top - features: ['Marca_Renault', 'Transmision_Automática', 'Transmision_Manual']

📊 Modelo: Lasso
MAE:    $276,417
MedAE:  $3,656
RMSE:   $16,329,535
R²:     -602054.812
MAPE:   1634.96%
Top + features: ['Año', 'Cilindrada', 'Marca_BMW']
Top - features: ['Marca_Renault', 'Transmision_Automática', 'Transmision_Manual']


In [19]:
print(df['Precio_usd'].describe())

count     18254.000000
mean      27852.262131
std       20317.490113
min         102.145923
25%       16309.012876
50%       23690.987124
75%       32698.819742
max      610000.000000
Name: Precio_usd, dtype: float64
