In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('datos_limpios_modelo.csv')

X = df.drop(columns=['precio_contado'])
y = df['precio_contado']

print(f"Shape de X: {X.shape}, Shape de y: {y.shape}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"X_train: {X_train.shape}, X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}, y_test: {y_test.shape}")

In [None]:
modelos = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42, n_estimators=100),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42, n_estimators=100),
    'AdaBoost': AdaBoostRegressor(random_state=42, n_estimators=50),
    'K-Nearest Neighbors': KNeighborsRegressor(n_neighbors=3)
}

results = {}

for name, model in modelos.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'R2': r2}
    print(f"{name} -> MAE: {mae:.2f}, MSE: {mse:.2f}, RMSE: {rmse:.2f}, R2: {r2:.4f}")


In [None]:
mejor_modelo = modelos['Linear Regression']
mejor_modelo.fit(X, y)  

In [None]:
features = X.columns
coeficientes = mejor_modelo.coef_

df_importancia = pd.DataFrame({
    'feature': features,
    'coefficient': coeficientes
}).reindex(coeficientes.argsort()[::-1]) 

df_importancia = df_importancia.reindex(df_importancia['coefficient'].abs().sort_values(ascending=False).index)

plt.figure(figsize=(10, 8))
bars = plt.barh(df_importancia['feature'], df_importancia['coefficient'], color=['tab:blue' if c > 0 else 'tab:red' for c in df_importancia['coefficient']])
plt.xlabel('Coeficiente')
plt.title('Importancia de características - Coeficientes del Modelo Lineal')
plt.gca().invert_yaxis()
plt.show()