# 1. Data Generation
This notebook generates synthetic sales and SKU metadata for the Smart Inventory Replenishment System.

In [1]:
# Set up
import numpy as np, pandas as pd, os
np.random.seed(42)

In [2]:
# Generate synthetic sales data
n_days = 180
end_date = pd.Timestamp.today().floor('D')
start_date = end_date - pd.Timedelta(days=n_days-1)
dates = pd.date_range(start=start_date, end=end_date)
sku_ids = [f'SKU_{i:02d}' for i in range(1, 11)]
data_rows = []
intermittent_skus = ['SKU_02', 'SKU_07']
for sku in sku_ids:
    base_daily = np.random.randint(5, 201)
    for date in dates:
        dow = date.weekday()
        if np.random.rand() < 0.5:
            weekly_factor = 1.0 + np.random.uniform(0.10, 0.20) if dow >= 5 else 1.0
        else:
            weekly_factor = 1.0
        pct_trend = np.random.uniform(-0.05, 0.05)
        days_from_start = (date - start_date).days
        trend_multiplier = 1 + pct_trend * (days_from_start / n_days)
        on_promo = 1 if np.random.rand() < 0.10 else 0
        promo_multiplier = 1.0 + np.random.uniform(0.30, 1.00) if on_promo else 1.0
        if sku in intermittent_skus:
            base = max(1, int(base_daily * 0.1))
            if np.random.rand() < 0.85:
                lam = 0.0
            else:
                lam = base * weekly_factor * trend_multiplier * promo_multiplier
            units = np.random.poisson(lam=max(0.1, lam))
            if np.random.rand() < 0.02:
                units += np.random.randint(10, 80)
        else:
            lam = base_daily * weekly_factor * trend_multiplier * promo_multiplier
            units = np.random.poisson(lam=max(0.1, lam))
        unit_cost = round(float(np.random.uniform(20, 500)), 2)
        base_price = round(unit_cost * 1.5, 2)
        price = round(base_price * (1 + np.random.uniform(-0.02, 0.02)), 2)
        avg_views = int(max(1, units * np.random.randint(5, 20)))
        views = int(max(0, np.random.poisson(lam=max(1, avg_views))))
        data_rows.append({
            'date': date.strftime('%Y-%m-%d'),
            'sku_id': sku,
            'units_sold': int(units),
            'price': float(price),
            'on_promo': int(on_promo),
            'views': int(views)
        })
sales_df = pd.DataFrame(data_rows)
os.makedirs('../data', exist_ok=True)
sales_df.to_csv('../data/synthetic_sales.csv', index=False)
sales_df.head(10)

Unnamed: 0,date,sku_id,units_sold,price,on_promo,views
0,2025-02-23,SKU_01,110,101.81,0,999
1,2025-02-24,SKU_01,97,160.78,0,972
2,2025-02-25,SKU_01,90,311.27,0,1089
3,2025-02-26,SKU_01,101,521.39,0,1121
4,2025-02-27,SKU_01,115,180.21,0,2044
5,2025-02-28,SKU_01,124,307.03,1,1124
6,2025-03-01,SKU_01,95,333.43,0,1161
7,2025-03-02,SKU_01,92,475.53,0,994
8,2025-03-03,SKU_01,106,549.21,0,1453
9,2025-03-04,SKU_01,105,674.3,0,1786


In [3]:
# Generate SKU metadata
categories = ['Skincare', 'Makeup', 'Haircare', 'Supplements']
metadata_rows = []
for sku in sku_ids:
    metadata_rows.append({
        'sku_id': sku,
        'category': np.random.choice(categories),
        'lead_time_days': int(np.random.randint(3, 15)),
        'init_stock': int(np.random.randint(50, 1001)),
        'unit_cost': round(float(np.random.uniform(20, 500)), 2),
        'holding_cost_per_unit_per_year': round(0.1 * np.random.uniform(20, 500), 2),
        'ordering_cost': 500
    })
meta_df = pd.DataFrame(metadata_rows)
meta_df.to_csv('../data/sku_metadata.csv', index=False)
meta_df.head(10)

Unnamed: 0,sku_id,category,lead_time_days,init_stock,unit_cost,holding_cost_per_unit_per_year,ordering_cost
0,SKU_01,Supplements,4,575,27.55,15.91,500
1,SKU_02,Makeup,9,309,473.15,47.5,500
2,SKU_03,Supplements,14,909,323.21,29.69,500
3,SKU_04,Haircare,12,626,474.27,43.49,500
4,SKU_05,Supplements,4,295,45.66,16.8,500
5,SKU_06,Makeup,11,727,345.56,44.45,500
6,SKU_07,Skincare,6,330,415.82,49.37,500
7,SKU_08,Haircare,7,441,259.67,35.69,500
8,SKU_09,Makeup,6,558,227.22,4.99,500
9,SKU_10,Haircare,10,825,191.53,35.4,500
