In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

In [35]:
# Load dataset
file_path = "retail_store_inventory.csv"
df = pd.read_csv(file_path)

In [39]:
df.head(5)

Unnamed: 0,Date,Store ID,Product ID,Category,Region,Inventory Level,Units Sold,Units Ordered,Demand Forecast,Price,Discount,Weather Condition,Holiday/Promotion,Competitor Pricing,Seasonality
0,2022-01-01,S001,P0001,Groceries,North,231,127,55,135.47,33.5,20,Rainy,0,29.69,Autumn
1,2022-01-01,S001,P0002,Toys,South,204,150,66,144.04,63.01,20,Sunny,0,66.16,Autumn
2,2022-01-01,S001,P0003,Toys,West,102,65,51,74.02,27.99,10,Sunny,1,31.32,Summer
3,2022-01-01,S001,P0004,Toys,North,469,61,164,62.18,32.72,10,Cloudy,1,34.74,Autumn
4,2022-01-01,S001,P0005,Electronics,East,166,14,135,9.26,73.64,0,Sunny,0,68.95,Summer


In [41]:
# Convert Date to datetime format
df["Date"] = pd.to_datetime(df["Date"])
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["Day"] = df["Date"].dt.day
df.drop(columns=["Date"], inplace=True)

In [43]:
# Define categorical and numerical features
categorical_features = ["Store ID", "Product ID", "Category", "Region", "Weather Condition", "Seasonality"]
numerical_features = ["Inventory Level", "Units Sold", "Units Ordered", "Price", "Discount", "Competitor Pricing", "Year", "Month", "Day"]

In [65]:
# One-hot encoding for categorical variables
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
categorical_encoded = ohe.fit_transform(df[categorical_features])

In [67]:
# Standardizing numerical features
scaler = StandardScaler()
numerical_scaled = scaler.fit_transform(df[numerical_features])


In [69]:
categorical_encoded.shape

(73100, 42)

In [73]:
numerical_scaled.shape

(73100, 9)

In [75]:
# Combine features
import numpy as np
X = np.hstack((numerical_scaled, categorical_encoded))
y = df["Demand Forecast"].values


In [77]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [79]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [81]:
# Evaluate model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f"MAE: {mae}, MSE: {mse}")

MAE: 7.579870827633379, MSE: 78.42998693564638


In [83]:
# Save model
joblib.dump(model, "retail_demand_model.pkl")
joblib.dump(ohe, "ohe_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']