# 1. Data Generation
This notebook generates synthetic sales and SKU metadata for the Smart Inventory Replenishment System.

In [1]:
# Set up
import numpy as np, pandas as pd, os
np.random.seed(42)

In [4]:
# Generate enhanced synthetic sales data for realism and scale
n_days = 365 * 2  # 2 years of daily data
end_date = pd.Timestamp.today().floor('D')
start_date = end_date - pd.Timedelta(days=n_days-1)
dates = pd.date_range(start=start_date, end=end_date)
sku_ids = [f'SKU_{i:02d}' for i in range(1, 21)]  # 20 SKUs for more variety
data_rows = []
intermittent_skus = ['SKU_02', 'SKU_07', 'SKU_15']
seasonal_skus = ['SKU_03', 'SKU_10', 'SKU_18']
for sku in sku_ids:
    base_daily = np.random.randint(10, 300)
    # Assign seasonality amplitude and phase for some SKUs
    season_amp = np.random.uniform(0.1, 0.4) if sku in seasonal_skus else 0.0
    season_phase = np.random.randint(0, 365)
    for date in dates:
        dow = date.weekday()
        # Weekly pattern: weekends higher for some SKUs
        if np.random.rand() < 0.5:
            weekly_factor = 1.0 + np.random.uniform(0.10, 0.30) if dow >= 5 else 1.0
        else:
            weekly_factor = 1.0
        # Yearly seasonality (e.g., holidays, summer, etc.)
        day_of_year = date.timetuple().tm_yday
        seasonality = 1.0 + season_amp * np.sin(2 * np.pi * (day_of_year + season_phase) / 365)
        # Trend: slow increase or decrease
        pct_trend = np.random.uniform(-0.03, 0.08)
        days_from_start = (date - start_date).days
        trend_multiplier = 1 + pct_trend * (days_from_start / n_days)
        # Promotions: more frequent for some SKUs
        promo_prob = 0.15 if sku in ['SKU_01', 'SKU_05', 'SKU_10'] else 0.08
        on_promo = 1 if np.random.rand() < promo_prob else 0
        promo_multiplier = 1.0 + np.random.uniform(0.25, 1.2) if on_promo else 1.0
        # Intermittent demand logic
        if sku in intermittent_skus:
            base = max(1, int(base_daily * 0.12))
            if np.random.rand() < 0.90:
                lam = 0.0
            else:
                lam = base * weekly_factor * seasonality * trend_multiplier * promo_multiplier
            units = np.random.poisson(lam=max(0.1, lam))
            if np.random.rand() < 0.03:
                units += np.random.randint(10, 120)  # occasional spikes
        else:
            lam = base_daily * weekly_factor * seasonality * trend_multiplier * promo_multiplier
            units = np.random.poisson(lam=max(0.1, lam))
            # Occasional outlier/spike
            if np.random.rand() < 0.01:
                units += np.random.randint(50, 200)
        # Price and cost logic
        unit_cost = round(float(np.random.uniform(20, 800)), 2)
        base_price = round(unit_cost * np.random.uniform(1.3, 1.7), 2)
        price = round(base_price * (1 + np.random.uniform(-0.03, 0.03)), 2)
        avg_views = int(max(1, units * np.random.randint(8, 30)))
        views = int(max(0, np.random.poisson(lam=max(1, avg_views))))
        data_rows.append({
            'date': date.strftime('%Y-%m-%d'),
            'sku_id': sku,
            'units_sold': int(units),
            'price': float(price),
            'on_promo': int(on_promo),
            'views': int(views)
        })
sales_df = pd.DataFrame(data_rows)
os.makedirs('../data', exist_ok=True)
sales_df.to_csv('../data/synthetic_sales.csv', index=False)
sales_df.head(10)

Unnamed: 0,date,sku_id,units_sold,price,on_promo,views
0,2023-08-28,SKU_01,410,1086.39,1,5741
1,2023-08-29,SKU_01,258,825.41,0,2327
2,2023-08-30,SKU_01,271,289.37,0,3222
3,2023-08-31,SKU_01,262,1033.81,0,3095
4,2023-09-01,SKU_01,253,838.93,0,7348
5,2023-09-02,SKU_01,249,862.97,0,5997
6,2023-09-03,SKU_01,264,915.35,0,6442
7,2023-09-04,SKU_01,234,1258.03,0,5749
8,2023-09-05,SKU_01,251,495.36,0,5179
9,2023-09-06,SKU_01,274,488.84,0,2423


In [5]:
# Generate SKU metadata
categories = ['Skincare', 'Makeup', 'Haircare', 'Supplements']
metadata_rows = []
for sku in sku_ids:
    metadata_rows.append({
        'sku_id': sku,
        'category': np.random.choice(categories),
        'lead_time_days': int(np.random.randint(3, 15)),
        'init_stock': int(np.random.randint(50, 1001)),
        'unit_cost': round(float(np.random.uniform(20, 500)), 2),
        'holding_cost_per_unit_per_year': round(0.1 * np.random.uniform(20, 500), 2),
        'ordering_cost': 500
    })
meta_df = pd.DataFrame(metadata_rows)
meta_df.to_csv('../data/sku_metadata.csv', index=False)
meta_df.head(10)

Unnamed: 0,sku_id,category,lead_time_days,init_stock,unit_cost,holding_cost_per_unit_per_year,ordering_cost
0,SKU_01,Skincare,9,857,240.1,41.26,500
1,SKU_02,Makeup,8,157,118.8,2.58,500
2,SKU_03,Skincare,6,103,205.09,12.68,500
3,SKU_04,Skincare,9,641,76.78,14.77,500
4,SKU_05,Haircare,7,136,214.56,22.02,500
5,SKU_06,Supplements,9,130,135.29,26.33,500
6,SKU_07,Skincare,4,589,319.84,31.16,500
7,SKU_08,Makeup,11,392,358.44,35.97,500
8,SKU_09,Skincare,8,929,385.01,23.94,500
9,SKU_10,Supplements,13,748,224.5,2.04,500
