In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import numpy as np


In [16]:
test = pd.read_csv("../data/processed/test_preprocessed.csv")
train = pd.read_csv("../data/processed/train_preprocessed.csv")

In [17]:
X = train.drop('SalePrice', axis=1)
y = train['SalePrice']

In [18]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [19]:
models = {
    'LinearRegression': {
        'model': LinearRegression(),
        'params': {}
    },
    'Ridge': {
        'model': Ridge(),
        'params': {'alpha': [0.1, 1, 10]}
    },
    'Lasso': {
        'model': Lasso(),
        'params': {'alpha': [0.01, 0.1, 1]}
    },
    'RandomForest': {
        'model': RandomForestRegressor(random_state=42),
        'params': {'n_estimators': [100, 200, 300], 'max_depth': [10, 20, None]}
    },
    'GradientBoosting': {
        'model': GradientBoostingRegressor(random_state=42),
        'params': {'n_estimators': [100, 200, 300], 'learning_rate': [0.05, 0.1, 0.2]}
    },
    'SVR': {
        'model': SVR(),
        'params': {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
    }
}


In [20]:
results = []


In [21]:
for model_name, config in models.items():
    print(f"Evaluando modelo: {model_name}")
    
    # Configuración de GridSearchCV
    grid_search = GridSearchCV(estimator=config['model'], param_grid=config['params'], 
                               scoring='neg_mean_squared_error', cv=5, verbose=1)
    grid_search.fit(X_train, y_train)
    
    # Mejor modelo y su rendimiento
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    y_pred = best_model.predict(X_val)
    mse = mean_squared_error(y_val, y_pred)
    rmse = np.sqrt(mse)

Evaluando modelo: LinearRegression
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Evaluando modelo: Ridge
Fitting 5 folds for each of 3 candidates, totalling 15 fits
Evaluando modelo: Lasso
Fitting 5 folds for each of 3 candidates, totalling 15 fits


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Evaluando modelo: RandomForest
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Evaluando modelo: GradientBoosting
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Evaluando modelo: SVR
Fitting 5 folds for each of 6 candidates, totalling 30 fits
