## Modelo 3

In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score

In [14]:
df = pd.read_csv('dataset6.csv')
df.head()


Unnamed: 0,temperatura,sensacion_termica,humedad,velocidad_viento,total_alquileres,temporada_1,temporada_2,temporada_3,temporada_4,anio_0,...,dia_semana_3.0,dia_semana_4.0,dia_semana_5.0,dia_semana_6.0,dia_trabajo_0,dia_trabajo_1,clima_1,clima_2,clima_3,clima_4
0,0.24,0.2879,0.81,0.0,16.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
1,0.22,0.2727,0.8,0.0,40.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
2,0.22,0.2727,0.8,0.0,32.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
3,0.24,0.2879,0.75,0.0,13.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
4,0.24,0.2879,0.75,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0


In [15]:
# Separar características (X) y etiqueta (y)
X = df.drop(columns=["total_alquileres"])
y = df["total_alquileres"]

In [16]:
y_log = np.log1p(y)

In [24]:
# División en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y_log, test_size=0.2, random_state=42)

In [83]:
numeric_cols = ['temperatura', 'sensacion_termica', 'humedad', 'velocidad_viento']
ohe_columns = [col for col in X.columns if col not in numeric_cols]

In [84]:
# Suponiendo que 'numeric_cols' son las columnas numéricas originales
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_cols),  # Escalar numéricas
        ('cat', 'passthrough', ohe_columns)       # No tocar las dummies
    ])

In [85]:
X_train_scaled = preprocessor.fit_transform(X_train)
X_test_scaled = preprocessor.fit_transform(X_test)

In [25]:
# Escalar características para modelos sensibles a la escala
'''
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)\
'''

In [46]:
def evaluar_modelo(modelo, X_train, X_test, y_train, y_test):
    modelo.fit(X_train, y_train)
    predicciones = modelo.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predicciones))
    r2 = r2_score(y_test, predicciones)
    return rmse, r2, modelo

In [27]:
def modelo_lineal(X_train, X_test, y_train, y_test):
    lr = LinearRegression()
    return evaluar_modelo(lr, X_train, X_test, y_train, y_test)

In [94]:
def modelo_lasso(X_train, X_test, y_train, y_test, alphas=[0.01, 0.1, 1.0]):
    grid = GridSearchCV(Lasso(max_iter=10000), {'alpha': alphas}, cv=3, scoring='neg_root_mean_squared_error')
    grid.fit(X_train, y_train)
    return evaluar_modelo(grid.best_estimator_, X_train, X_test, y_train, y_test)

In [61]:
def modelo_elasticnet(X_train, X_test, y_train, y_test, params=None):
    if params is None:
        params = {'alpha': [0.01, 0.1, 1.0], 'l1_ratio': [0.1, 0.5, 0.9]}
    grid = GridSearchCV(ElasticNet(max_iter=10000), params, cv=3, scoring='neg_root_mean_squared_error')
    grid.fit(X_train, y_train)
    return evaluar_modelo(grid.best_estimator_, X_train, X_test, y_train, y_test)

In [96]:
def modelo_ridge(X_train, X_test, y_train, y_test, alphas=[0.1, 1.0, 10]):
    grid = GridSearchCV(Ridge(), {'alpha': alphas}, cv=3, scoring='neg_root_mean_squared_error')
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    return evaluar_modelo(best_model, X_train, X_test, y_train, y_test)

In [42]:
def modelo_gradient_boosting(X_train, X_test, y_train, y_test, n_iter=10):
    param_dist = {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5, 10]
    }
    gb = RandomizedSearchCV(GradientBoostingRegressor(), param_dist, n_iter=n_iter, cv=3,
                            scoring='neg_root_mean_squared_error', n_jobs=-1, random_state=42)
    gb.fit(X_train, y_train)
    return evaluar_modelo(gb.best_estimator_, X_train, X_test, y_train, y_test)

In [50]:
def modelo_random_forest(X_train, X_test, y_train, y_test, n_iter=10):
    param_dist = {
        'n_estimators': [100, 200],
        'max_depth': [10, 20, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    }
    rf = RandomizedSearchCV(RandomForestRegressor(random_state=42), param_dist, n_iter=n_iter, cv=3,
                            scoring='neg_root_mean_squared_error', n_jobs=-1, random_state=42)
    rf.fit(X_train, y_train)
    return evaluar_modelo(rf.best_estimator_, X_train, X_test, y_train, y_test)

In [56]:
def modelo_gradient_boosting_hibrido(X_train, X_test, y_train, y_test,
                                     param_dist, n_iter=20, cv=3):
    # Paso 1: RandomizedSearchCV (exploración)
    random_search = RandomizedSearchCV(
        GradientBoostingRegressor(),
        param_distributions=param_dist,
        n_iter=n_iter,
        cv=cv,
        scoring='neg_root_mean_squared_error',
        n_jobs=-1,
        random_state=42
    )
    random_search.fit(X_train, y_train)

    best_params = random_search.best_params_

    # Paso 2: GridSearchCV (explotación)
    param_grid = {
        'n_estimators': list(set([
            max(10, best_params['n_estimators'] - 50),  # evitando valores negativos
            best_params['n_estimators'],
            best_params['n_estimators'] + 50
        ])),
        'max_depth': list(set([
            max(1, best_params['max_depth'] - 1),
            best_params['max_depth'],
            best_params['max_depth'] + 1
        ])),
        'learning_rate': sorted(list(set([
            round(best_params['learning_rate'] * 0.5, 3),
            best_params['learning_rate'],
            round(best_params['learning_rate'] * 1.5, 3)
        ])))
    }

    grid_search = GridSearchCV(
        GradientBoostingRegressor(subsample=best_params['subsample']),
        param_grid=param_grid,
        cv=cv,
        scoring='neg_root_mean_squared_error',
        n_jobs=-1
    )
    grid_search.fit(X_train, y_train)

    return evaluar_modelo(grid_search.best_estimator_, X_train, X_test, y_train, y_test)

In [89]:
# Llamada de ejemplo
rmse_lr, r2_lr, modelo_lr = modelo_lineal(X_train_scaled, X_test_scaled, y_train, y_test)
rmse_ridge, r2_ridge, modelo_ridge = modelo_ridge(X_train_scaled, X_test_scaled, y_train, y_test)
rmse_lasso, r2_lasso, modelo_lasso = modelo_lasso(X_train_scaled, X_test_scaled, y_train, y_test)

In [90]:
print("Resultados de los Modelos:")
print(f"Linear Regression     -> RMSE: {rmse_lr:.2f}, R²: {r2_lr:.2f}")
print(f"Ridge Regression      -> RMSE: {rmse_ridge:.2f}, R²: {r2_ridge:.2f}")
print(f"Lasso Regression      -> RMSE: {rmse_lasso:.2f}, R²: {r2_lasso:.2f}")

Resultados de los Modelos:
Linear Regression     -> RMSE: 0.60, R²: 0.82
Ridge Regression      -> RMSE: 0.60, R²: 0.82
Lasso Regression      -> RMSE: 0.64, R²: 0.79


In [47]:
rmse_gb, r2_gb, modelo_gb = modelo_gradient_boosting(X_train, X_test, y_train, y_test)

In [48]:
print(f"Gradient Boosting       -> RMSE: {rmse_gb:.2f}, R²: {r2_gb:.2f}")

Gradient Boosting       -> RMSE: 0.33, R²: 0.94


In [51]:
rmse_rf, r2_rf, modelo_rf = modelo_random_forest(X_train, X_test, y_train, y_test)

In [54]:
print(f"Random Forest         -> RMSE: {rmse_rf:.2f}, R²: {r2_rf:.2f}")

Random Forest         -> RMSE: 0.34, R²: 0.94


In [57]:
param_dist_gb = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [3, 5, 7, 10],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0]
}

rmse_gb, r2_gb, modelo_gb = modelo_gradient_boosting_hibrido(X_train, X_test, y_train, y_test, param_dist_gb)

In [59]:
print(f"Gradient Boosting (Híbrido) -> RMSE: {rmse_gb:.2f}, R²: {r2_gb:.2f}")
print("Mejores hiperparámetros Gradient Boosting:", modelo_gb.get_params())

Gradient Boosting (Híbrido) -> RMSE: 0.32, R²: 0.95
Mejores hiperparámetros Gradient Boosting: {'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.2, 'loss': 'squared_error', 'max_depth': 4, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 350, 'n_iter_no_change': None, 'random_state': None, 'subsample': 0.6, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}


Quizas un random forest hibrido

Vamos a hacer que X no tenga escalamiento ya que sus variables estan todas normalizadas de 0 a 1

In [97]:
rmse_lr, r2_lr, modelo_lr = modelo_lineal(X_train, X_test, y_train, y_test)
rmse_ridge, r2_ridge, modelo_ridge = modelo_ridge(X_train, X_test, y_train, y_test)
rmse_lasso, r2_lasso, modelo_lasso = modelo_lasso(X_train, X_test, y_train, y_test)

In [98]:
print("Resultados de los Modelos:")
print(f"Linear Regression     -> RMSE: {rmse_lr:.2f}, R²: {r2_lr:.2f}")
print(f"Ridge Regression      -> RMSE: {rmse_ridge:.2f}, R²: {r2_ridge:.2f}")
print(f"Lasso Regression      -> RMSE: {rmse_lasso:.2f}, R²: {r2_lasso:.2f}")

Resultados de los Modelos:
Linear Regression     -> RMSE: 0.60, R²: 0.82
Ridge Regression      -> RMSE: 0.60, R²: 0.82
Lasso Regression      -> RMSE: 0.64, R²: 0.79
