 # Project Overview

This capstone project implements a data-driven system to optimize inventory management. It replaces static, rule-based reordering policies with a dynamic, predictive engine that forecasts future demand and calculates the optimal Reorder Point (ROP) and Reorder Quantity (ROQ) for each product.

The goal is to minimize total inventory costs by balancing the reduction of costly stockouts (lost sales) against the minimization of expensive holding costs (storage, capital tied up).

# 1. Import Libraries

In [3]:
# Install Statsmodels
! pip install statsmodels

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: C:\Python313\python.exe -m pip install --upgrade pip


In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, explained_variance_score, mean_absolute_percentage_error
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR, LinearSVR



  from scipy.stats import gaussian_kde


# 2. Load Data

In [23]:
data = pd.read_csv('retail_store_inventory.csv')
data.head()


Unnamed: 0,Date,Store ID,Product ID,Category,Region,Inventory Level,Units Sold,Units Ordered,Demand Forecast,Price,Discount,Weather Condition,Holiday/Promotion,Competitor Pricing,Seasonality
0,2022-01-01,S001,P0001,Groceries,North,231,127,55,135.47,33.5,20,Rainy,0,29.69,Autumn
1,2022-01-01,S001,P0002,Toys,South,204,150,66,144.04,63.01,20,Sunny,0,66.16,Autumn
2,2022-01-01,S001,P0003,Toys,West,102,65,51,74.02,27.99,10,Sunny,1,31.32,Summer
3,2022-01-01,S001,P0004,Toys,North,469,61,164,62.18,32.72,10,Cloudy,1,34.74,Autumn
4,2022-01-01,S001,P0005,Electronics,East,166,14,135,9.26,73.64,0,Sunny,0,68.95,Summer


In [24]:
#check the columns and data types
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73100 entries, 0 to 73099
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Date                73100 non-null  object 
 1   Store ID            73100 non-null  object 
 2   Product ID          73100 non-null  object 
 3   Category            73100 non-null  object 
 4   Region              73100 non-null  object 
 5   Inventory Level     73100 non-null  int64  
 6   Units Sold          73100 non-null  int64  
 7   Units Ordered       73100 non-null  int64  
 8   Demand Forecast     73100 non-null  float64
 9   Price               73100 non-null  float64
 10  Discount            73100 non-null  int64  
 11  Weather Condition   73100 non-null  object 
 12  Holiday/Promotion   73100 non-null  int64  
 13  Competitor Pricing  73100 non-null  float64
 14  Seasonality         73100 non-null  object 
dtypes: float64(3), int64(5), object(7)
memory usage: 8.4+

- No missing values in my data

In [27]:
data.columns

Index(['Date', 'Store ID', 'Product ID', 'Category', 'Region',
       'Inventory Level', 'Units Sold', 'Units Ordered', 'Demand Forecast',
       'Price', 'Discount', 'Weather Condition', 'Holiday/Promotion',
       'Competitor Pricing', 'Seasonality', 'SKU_Compound_ID'],
      dtype='object')

# 3. Data Preparation and Feature Engineering

In [25]:
# Convert 'Date' to datetime and create 'SKU_Compound_ID'
data['Date'] = pd.to_datetime(data['Date'])
data['SKU_Compound_ID'] = data['Store ID'] + '_'+ data['Product ID']

In [26]:
data.sort_values(by = ['SKU_Compound_ID', 'Date'], inplace=True)

In [2]:
# Define a helper function to shift the series before calculating rolling features
# This prevents data leakage by ensuring that the rolling calculations do not include the current day's data
def create_rolling_feature(series, window, func):
        return series.shift(1).rolling(window=window).apply(func, raw=True)

In [None]:
# Lagged Demand 
data['demand_lag_7'] = data.groupby('SKU_Compound_ID')['Units Sold'].shift(7)
data['demand_lag_14'] = data.groupby('SKU_Compound_ID')['Units Sold'].shift(14)
data['demand_lag_30'] = data.groupby('SKU_Compound_ID')['Units Sold'].shift(30)


In [None]:
# Rolling Mean and Std Dev Features
data['RollingMean_7'] = create_rolling_feature(data.groupby('SKU_Compound_ID')['Units Sold'], 7, np.mean)
data['RollingStd_7'] = create_rolling_feature(data.groupby('SKU_Compound_ID')['Units Sold'], 7, np.std)

In [30]:
data['RollingMean_14'] = create_rolling_feature(data.groupby('SKU_Compound_ID')['Units Sold'], 14, np.mean)
data['RollingStd_14'] = create_rolling_feature(data.groupby('SKU_Compound_ID')['Units Sold'], 14, np.std)

data['RollingMean_30'] = create_rolling_feature(data.groupby('SKU_Compound_ID')['Units Sold'], 30, np.mean)
data['RollingStd_30'] = create_rolling_feature(data.groupby('SKU_Compound_ID')['Units Sold'], 30, np.std)