In [4]:
pip install optuna



In [5]:
pip install -U scikit-learn




In [7]:
pip install pandas numpy holidays lightgbm scikit-learn joblib azure-identity azure-ai-ml




In [None]:
import os
import json
import joblib
import pandas as pd
import numpy as np
import holidays
from lightgbm import LGBMRegressor
from sklearn.preprocessing import MinMaxScaler
from azure.identity import DeviceCodeCredential
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Model

df = (
    pd.read_csv("/content/ocupacion_diaria_hotel.csv", parse_dates=["fecha"])
      .iloc[4:]                             
)
df = df[df["fecha"] <= "2021-01-01"].reset_index(drop=True)
df["dow"]        = df["fecha"].dt.dayofweek
df["dow_sin"]    = np.sin(2 * np.pi * df["dow"]   / 7)
df["dow_cos"]    = np.cos(2 * np.pi * df["dow"]   / 7)
df["month"]      = df["fecha"].dt.month
df["month_sin"]  = np.sin(2 * np.pi * df["month"] / 12)
df["month_cos"]  = np.cos(2 * np.pi * df["month"] / 12)
df["es_pandemia"]         = (df["fecha"] >= "2020-03-01").astype(int)
df["es_viernes_o_sabado"] = df["dow"].isin([4, 5]).astype(int)
df["temporada_alta"]      = df["month"].isin([6, 7, 11]).astype(int)
mx_holidays               = holidays.MX(years=[2019, 2020])
df["es_festivo"]          = df["fecha"].isin(mx_holidays).astype(int)

for lag in (1, 7, 14):
    df[f"lag_{lag}"] = df["personas"].shift(lag)
df["rolling_mean_7"] = df["personas"].shift(1).rolling(7).mean()

df = df.dropna().reset_index(drop=True)
features_to_scale = [
    "personas",
    "lag_1", "lag_7", "lag_14", "rolling_mean_7",
    "dow_sin", "dow_cos", "month_sin", "month_cos"
]
scaler = MinMaxScaler()
df[features_to_scale] = scaler.fit_transform(df[features_to_scale])
scaler_params = {
    "feature_range": list(scaler.feature_range), 
    "data_min": scaler.data_min_.tolist(),
    "data_max": scaler.data_max_.tolist(),
    "data_range": scaler.data_range_.tolist(),
    "scale": scaler.scale_.tolist(),
    "min": scaler.min_.tolist(),
    "n_features_in": int(scaler.n_features_in_),
}
feature_cols = [
    "lag_1", "lag_7", "lag_14", "rolling_mean_7",
    "dow_sin", "dow_cos", "month_sin", "month_cos",
    "temporada_alta", "es_festivo",
    "es_viernes_o_sabado", "es_pandemia"
]
target_col = "personas"
FECHA_TEST = pd.to_datetime("2020-08-10")
mask_train = df["fecha"] < FECHA_TEST
X_train = df.loc[mask_train, feature_cols]
y_train = df.loc[mask_train, target_col]
print(f"Tamaño de entrenamiento: {X_train.shape}")
best_params = {
    "n_estimators": 310,
    "max_depth": 7,
    "learning_rate": 0.064,
    "subsample": 0.83,
    "colsample_bytree": 0.89,
    "random_state": 42,
    "n_jobs": -1,
}
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train)
os.makedirs("outputs", exist_ok=True)
joblib.dump(model, "outputs/lgb_model.pkl")
with open("outputs/scaler_params.json", "w") as f:
    json.dump(scaler_params, f)
with open("outputs/feature_cols.json", "w") as f:
    json.dump(feature_cols, f)

print("Artefactos guardados en ./outputs:"
      "\n   • lgb_model.pkl"
      "\n   • scaler_params.json"
      "\n   • feature_cols.json")

cred = DeviceCodeCredential()
ml_client = MLClient(
    credential=cred,
    subscription_id="PEDIR_A_HIBRAN",
    resource_group_name="azure-for-students",
    workspace_name="rar-solutions-ml-workspace"
)

registered_model = ml_client.models.create_or_update(
    Model(
        name        = "hotel-lightgbm-occupancy",
        path        = "outputs",   
        type        = "custom_model",
        description = "LightGBM + MinMaxScaler (parámetros en JSON) para forecast ocupación hotelera"
    )
)
print(f"Modelo registrado en Azure ML: {registered_model.name}, versión {registered_model.version}")

  df["es_festivo"]          = df["fecha"].isin(mx_holidays).astype(int)


Tamaño de entrenamiento: (530, 12)
✅ Artefactos guardados en ./outputs:
   • lgb_model.pkl
   • scaler_params.json
   • feature_cols.json
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code BFTPWVQRF to authenticate.


[32mUploading outputs (0.45 MBs): 100%|██████████| 451115/451115 [00:00<00:00, 4203640.97it/s]
[39m



✅ Modelo registrado en Azure ML: hotel-lightgbm-occupancy, versión 3
