Retail Inventory and Pricing Forecasting Project

In [1]:
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split, GridSearchCV 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score 
from sklearn.preprocessing import LabelEncoder, StandardScaler 
import xgboost as xgb 
import joblib

1. Load Data

In [3]:
df = pd.read_csv("retail_store_inventory.csv")

2. Preprocessing

In [6]:
df['Date'] = pd.to_datetime(df['Date']) 
df.sort_values(['Store ID', 'Product ID', 'Date'], inplace=True)

In [9]:
#Encode categorical columns
cat_cols = ['Store ID', 'Product ID', 'Category', 'Region', 'Weather Condition', 'Seasonality'] 
le_dict = {} 
for col in cat_cols: 
    le = LabelEncoder() 
    df[col] = le.fit_transform(df[col]) 
    le_dict[col] = le

3. Feature Engineering

In [12]:
df['day_of_week'] = df['Date'].dt.dayofweek 
df['month'] = df['Date'].dt.month 
df['year'] = df['Date'].dt.year

In [14]:
def create_lag_features(group): 
    group['lag_7'] = group['Units Sold'].shift(7) 
    group['lag_14'] = group['Units Sold'].shift(14) 
    group['rolling_mean_7'] = group['Units Sold'].shift(1).rolling(window=7).mean() 
    group['rolling_std_7'] = group['Units Sold'].shift(1).rolling(window=7).std() 
    return group

df = df.groupby(['Store ID', 'Product ID']).apply(create_lag_features) 
df.dropna(inplace=True)

  df = df.groupby(['Store ID', 'Product ID']).apply(create_lag_features)


4. Feature Selection

In [17]:
features = [ 'Store ID', 'Product ID', 'Category', 'Region', 'Inventory Level', 'Units Ordered', 'Demand Forecast', 'Price', 'Discount', 'Weather Condition', 'Holiday/Promotion', 'Competitor Pricing', 'Seasonality', 'day_of_week', 'month', 'year', 'lag_7', 'lag_14', 'rolling_mean_7', 'rolling_std_7' ] 
X = df[features] 
y = df['Units Sold']

5. Train-Test Split

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

6. Scaling

In [23]:
scaler = StandardScaler() 
X_train_scaled = scaler.fit_transform(X_train) 
X_test_scaled = scaler.transform(X_test)

7. Model Training with GridSearchCV

In [26]:
params = { 'n_estimators': [100, 150], 'max_depth': [3, 5], 'learning_rate': [0.1, 0.05] } 
model = xgb.XGBRegressor(random_state=42) 
gs = GridSearchCV(model, params, cv=3, scoring='neg_root_mean_squared_error') 
gs.fit(X_train_scaled, y_train)

In [36]:
best_model = gs.best_estimator_

8. Evaluation

In [39]:
y_pred = best_model.predict(X_test_scaled) 
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False)) 
print("MAE:", mean_absolute_error(y_test, y_pred)) 
print("R2:", r2_score(y_test, y_pred))


RMSE: 8.374999632734818
MAE: 7.160161323876561
R2: 0.9941677451133728




9. Save Model and Scaler

In [42]:
joblib.dump(best_model, "xgb_inventory_model.pkl")

['xgb_inventory_model.pkl']

In [44]:
joblib.dump(scaler, "inventory_scaler.pkl")

['inventory_scaler.pkl']

In [46]:
joblib.dump(le_dict, "le_dict.pkl")

['le_dict.pkl']

10. Inventory Management & Dynamic Pricing Logic

In [49]:
def calculate_inventory_recommendation(predicted_sales, lead_time_days=7, safety_buffer=0.15): 
        safety_stock = predicted_sales * safety_buffer 
        reorder_point = predicted_sales * lead_time_days + safety_stock 
        recommended_stock = predicted_sales + safety_stock 
        return { 
            'Predicted Sales': round(predicted_sales, 2), 
            'Safety Stock': round(safety_stock, 2), 
            'Recommended Stock Level': round(recommended_stock, 2), 
            'Reorder Point': round(reorder_point, 2) }

In [51]:
def dynamic_pricing_strategy(predicted_sales, base_price, inventory_level, competitor_price, elasticity=0.1): 
    demand_gap = predicted_sales - inventory_level 
    price_adjustment = elasticity * (demand_gap / predicted_sales) 
    competitor_adjustment = (competitor_price - base_price) * 0.2 
    new_price = base_price + base_price * price_adjustment + competitor_adjustment 
    return round(max(new_price, 0.01), 2)


In [59]:
def predict_units_sold(input_data): 
    input_df = pd.DataFrame([input_data]) 
    for col in cat_cols:
        value=input_df[col].iloc[0]
        encoder=le_dict[col]
        if value not in encoder.classes_:
            encoder.classes_=np.append(encoder.classes_,value)
        input_df[col]=encoder.transform([value])
        
    input_df['Date'] = pd.to_datetime(input_df['Date']) 
    input_df['day_of_week'] = input_df['Date'].dt.dayofweek 
    input_df['month'] = input_df['Date'].dt.month 
    input_df['year'] = input_df['Date'].dt.year
    input_df=input_df.drop(columns=['Date'])
        
    input_df = input_df[features]
    input_scaled=scaler.transform(input_df)
        
    predicted_sales = best_model.predict(input_scaled)[0]

    recommendation = calculate_inventory_recommendation(predicted_sales)
    new_price = dynamic_pricing_strategy(predicted_sales, input_data['Price'], input_data['Inventory Level'], input_data['Competitor Pricing'])

    recommendation['Suggested Price'] = new_price
    return recommendation

In [61]:
if __name__ == "__main__":
    print("Enter input values:")
    
    input_data = {
        'Store ID': input("Store ID (e.g., Store_01): "),
        'Product ID': input("Product ID (e.g., P001): "),
        'Category': input("Category (e.g., Snacks): "),
        'Region': input("Region (e.g., East): "),
        'Inventory Level': float(input("Inventory Level: ")),
        'Units Ordered': float(input("Units Ordered: ")),
        'Demand Forecast': float(input("Demand Forecast: ")),
        'Price': float(input("Price: ")),
        'Discount': float(input("Discount: ")),
        'Weather Condition': input("Weather Condition (e.g., Sunny): "),
        'Holiday/Promotion': int(input("Holiday/Promotion (0 or 1): ")),
        'Competitor Pricing': float(input("Competitor Pricing: ")),
        'Seasonality': input("Seasonality (e.g., Spring): "),
        'Date': input("Date (YYYY-MM-DD): "),
        'lag_7': float(input("Lag 7-day Units Sold: ")),
        'lag_14': float(input("Lag 14-day Units Sold: ")),
        'rolling_mean_7': float(input("Rolling Mean (7d): ")),
        'rolling_std_7': float(input("Rolling Std Dev (7d): "))
    }

    result = predict_units_sold(input_data)
    
    print("\n--- Inventory & Pricing Recommendation ---")
    for key, value in result.items():
        print(f"{key}: {value}")

Enter input values:


Store ID (e.g., Store_01):  Store_01
Product ID (e.g., P001):  P001
Category (e.g., Snacks):  Snacks
Region (e.g., East):  East
Inventory Level:  90
Units Ordered:  100
Demand Forecast:  110
Price:  1.99
Discount:  0.1
Weather Condition (e.g., Sunny):  Sunny
Holiday/Promotion (0 or 1):  1
Competitor Pricing:  2.09
Seasonality (e.g., Spring):  Spring
Date (YYYY-MM-DD):  2025-05-10
Lag 7-day Units Sold:  105
Lag 14-day Units Sold:  100
Rolling Mean (7d):  107
Rolling Std Dev (7d):  3.2



--- Inventory & Pricing Recommendation ---
Predicted Sales: 94.30000305175781
Safety Stock: 14.15
Recommended Stock Level: 108.45
Reorder Point: 674.27
Suggested Price: 2.02
