In [2]:
!pip install lightgbm pandas scikit-learn joblib matplotlib seaborn

Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl (1.6 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m[31m6.6 MB/s[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: lightgbm
Successfully installed lightgbm-4.6.0


In [22]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib
import os

train_merged = pd.read_csv("data/train_merged.csv")

TARGET = "Weekly_Sales"

cols_to_drop = ["Date"]
X = train_merged.drop(columns=[TARGET] + cols_to_drop)
y = train_merged[TARGET]

non_numeric = X.select_dtypes(include=["object", "bool", "category"]).columns.tolist()
if non_numeric:
    X = pd.get_dummies(X, columns=non_numeric)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = lgb.Dataset(X_train, label=y_train)
dval = lgb.Dataset(X_val, label=y_val)

params = {
    "objective": "regression",
    "metric": "rmse",
    "boosting_type": "gbdt",
    "verbosity": -1,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "seed": 42
}

model = lgb.train(
    params,
    dtrain,
    valid_sets=[dtrain, dval],
    valid_names=["train", "val"],
    num_boost_round=1000,
)

val_preds = model.predict(X_val)
rmse = mean_squared_error(y_val, val_preds, squared=False)
print(f"Validation RMSE: {rmse:.4f}\n")

os.makedirs("model_registry", exist_ok=True)
joblib.dump(model, model_path)
print(f"Model saved to: {model_path}")

Validation RMSE: 5779.2921

Model saved to: model_registry/lightgbm_model.pkl


