In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

# Date range
dates = pd.date_range(start="2024-01-01", end="2025-12-31", freq="D")

n_skus = 50
skus = [f"SKU_{i}" for i in range(1, n_skus + 1)]
categories = ["Electronics", "Fashion", "Home", "Beauty", "Sports"]

data = []

for sku in skus:
    base_demand = np.random.randint(20, 100)
    category = np.random.choice(categories)
    trend = np.linspace(0, np.random.randint(5, 20), len(dates))
    
    for i, date in enumerate(dates):
        
        # Weekly seasonality
        weekly = 10 if date.weekday() >= 5 else 0
        
        # Yearly seasonality (Q4 spike)
        yearly = 20 if date.month in [11, 12] else 0
        
        # Promotion effect
        promotion = np.random.binomial(1, 0.1)
        promo_effect = 25 if promotion == 1 else 0
        
        # Holiday spike (simple example)
        holiday = 1 if date.strftime("%m-%d") in ["01-01", "12-25"] else 0
        holiday_effect = 40 if holiday == 1 else 0
        
        # Price variation
        price = np.random.uniform(10, 100)
        
        # Demand formula
        noise = np.random.normal(0, 5)
        demand = (
            base_demand
            + trend[i]
            + weekly
            + yearly
            + promo_effect
            + holiday_effect
            - 0.3 * price
            + noise
        )
        
        demand = max(0, round(demand))
        
        data.append([
            date,
            sku,
            category,
            round(price, 2),
            promotion,
            holiday,
            demand
        ])

df = pd.DataFrame(data, columns=[
    "date", "sku_id", "category", "price", 
    "promotion", "holiday", "demand"
])

df.head()


Unnamed: 0,date,sku_id,category,price,promotion,holiday,demand
0,2024-01-01,SKU_1,Sports,63.88,0,1,91
1,2024-01-02,SKU_1,Sports,87.96,0,0,43
2,2024-01-03,SKU_1,Sports,73.73,0,0,47
3,2024-01-04,SKU_1,Sports,26.51,0,0,66
4,2024-01-05,SKU_1,Sports,57.23,0,0,45


In [2]:
df.to_csv("../data/retail_demand_data.csv", index=False)

