## Forecasting

In [2]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('Sales_Data.csv')

## Preprocessing

In [5]:
# Convert date to datetime and sort

df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

In [6]:
# Aggregate Demand by Date

daily_demand = df.groupby('date')['demand_quantity'].sum().reset_index()

# Machine Learning Models

## Feature Engineering

In [7]:
# ML models don’t understand time by default
# Unlike models like ARIMA or Prophet, XGBoost and LSTM need time-encoded patterns manually. These features help them "see" the demand behavior over time.

df_ml = daily_demand.copy()
df_ml['day_of_week'] = df_ml['date'].dt.dayofweek
df_ml['month'] = df_ml['date'].dt.month
df_ml['lag_1'] = df_ml['demand_quantity'].shift(1)
df_ml['rolling_7'] = df_ml['demand_quantity'].rolling(7).mean()
df_ml = df_ml.dropna()

## Gradient Boosting (XGBoost)

In [8]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

In [9]:
from sklearn.model_selection import train_test_split

# Step 1: Define features and target
features = ['lag_1', 'rolling_7', 'day_of_week', 'month']
target = 'demand_quantity'

X = df_ml[features]
y = df_ml[target]

# Step 2: Split the data (e.g., 80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)  # no shuffle for time series

# For time series data, you should not shuffle the rows. You want the model to learn from the past and predict the future

In [10]:
model = XGBRegressor()

In [11]:
model.fit(X_train,y_train)

In [12]:
preds = model.predict(X_test)

In [13]:
from sklearn.metrics import mean_squared_error, r2_score

# Actual values
y_true = y_test.values
y_pred = preds

# Metrics
XGB_mae = mean_squared_error(y_true, y_pred)
XGB_rmse = np.sqrt(XGB_mae)
XGB_r2 = r2_score(y_true, y_pred)

# Print results
print("XGBoost Model Evaluation:")
print(f"MAE: {XGB_mae:.2f}")
print(f"RMSE: {XGB_rmse:.2f}")
print(f"R² Score: {XGB_r2:.2f}")

XGBoost Model Evaluation:
MAE: 1941291069.51
RMSE: 44060.08
R² Score: -0.10


In [14]:
df_result = pd.DataFrame([{
    'Model': 'XGB',   
    'MAE': XGB_mae,   
    'RMSE': XGB_rmse, 
    'R2': XGB_r2      
}])

df_result.to_csv('XGB_result.csv', index=False)

## Build Forecast DataFrame (Inventory optimization)

In [17]:
forecast_df = X_test.copy()
forecast_df['predicted_demand'] = y_pred

# If you have product_ID and date columns available from the original df:
forecast_df['product_ID'] = df.loc[X_test.index, 'product_ID']
forecast_df['date'] = df.loc[X_test.index, 'date']

# Reorder
forecast_df = forecast_df[['date', 'product_ID', 'predicted_demand']]
forecast_df = forecast_df.sort_values(by=['product_ID', 'date'])

In [18]:
forecast_df.to_csv("ml_forecast_demand.csv", index=False)

In [19]:
from scipy.stats import norm

In [20]:
# Load your forecast file
df = pd.read_csv("ml_forecast_demand.csv")


In [21]:
# Set parameters
lead_time = 7  # days
z_value = norm.ppf(0.95)  # service level = 95%


In [22]:
# Group forecast by product
inventory_rules = []

for product_id, group in df.groupby("product_ID"):
    group = group.sort_values("date")
    
    # Demand over lead time
    lead_time_demand = group['predicted_demand'].head(lead_time)
    
    avg_demand = lead_time_demand.mean()
    std_demand = lead_time_demand.std()
    
    # ROP = avg_demand * lead_time + safety_stock
    safety_stock = z_value * std_demand
    rop = avg_demand * lead_time + safety_stock
    
    inventory_rules.append({
        "product_ID": product_id,
        "avg_demand": avg_demand,
        "std_dev": std_demand,
        "safety_stock": safety_stock,
        "ROP": rop
    })


In [23]:
# Convert to DataFrame
inventory_df = pd.DataFrame(inventory_rules)
inventory_df.to_csv("inventory_rules_from_ml.csv", index=False)