In [4]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# =======================
# LOAD DATA
# =======================
df = pd.read_csv("C:/Users/HP/Downloads/Demand_forecast.csv")

# =======================
# DATE PROCESSING
# =======================
df["Date"] = pd.to_datetime(df["Date"])
df["Day"] = df["Date"].dt.day
df["Month"] = df["Date"].dt.month
df["Year"] = df["Date"].dt.year
df = df.drop(columns=["Date"])

# =======================
# REMOVE SEASONALITY COMPLETELY
# =======================
if "Seasonality" in df.columns:
    df = df.drop(columns=["Seasonality"])

# =======================
# LABEL ENCODE CATEGORICAL
# =======================
categorical_cols = [
    "Store ID", "Product ID", "Category", "Region",
    "Weather Condition"
]  # Seasonality removed

encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    encoders[col] = le

joblib.dump(encoders, "encoders.pkl")

# =======================
# TRAIN/TEST SPLIT
# =======================
target = "Demand Forecast"
y = df[target]
X = df.drop(columns=[target])

joblib.dump(list(X.columns), "model_columns.pkl")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# =======================
# ENSEMBLE MODEL
# =======================
rf = RandomForestRegressor(
    n_estimators=180,
    max_depth=14,
    random_state=42
)

gb = GradientBoostingRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

model = VotingRegressor([("rf", rf), ("gb", gb)])
model.fit(X_train, y_train)

# =======================
# METRICS
# =======================
y_pred = model.predict(X_test)

print("\n===== ENSEMBLE MODEL METRICS =====")
print("MAE :", mean_absolute_error(y_test, y_pred))
print("MSE :", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))

# =======================
# SAVE MODEL
# =======================
joblib.dump(model, "model.pkl")
print("\nModel + Columns + Encoders saved successfully.")


  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listlike)
  cache_array = _maybe_cache(arg, format, cache, convert_listl


===== ENSEMBLE MODEL METRICS =====
MAE : 7.494077002691941
MSE : 77.35559249895836
RMSE: 8.795202811701294
R² Score: 0.9931642714480778

Model + Columns + Encoders saved successfully.
