# 01 Data

We generate a demand-like time series with:
- trend
- seasonality
- noise
- occasional shocks

This creates realistic conditions to test stability and tail risk.


In [3]:
import numpy as np
import pandas as pd
from pathlib import Path

PROC = Path("../data/processed")
PROC.mkdir(parents=True, exist_ok=True)

np.random.seed(11)

dates = pd.date_range("2020-01-01", periods=1200, freq="D")

trend = np.linspace(80, 130, len(dates))
season = 12 * np.sin(2 * np.pi * dates.dayofyear.to_numpy() / 365)
noise = np.random.normal(0, 6, len(dates))

y = trend + season + noise

# Inject shocks (regime shifts)
shock_idx = np.random.choice(np.arange(200, 1100), size=10, replace=False)
y[shock_idx] += np.random.choice([25, -25], size=len(shock_idx))

df = pd.DataFrame({"date": dates, "y": y})

# Regime flags
df["is_shock_window"] = False
for i in shock_idx:
    df.loc[max(0, i-3):min(len(df)-1, i+3), "is_shock_window"] = True

df.to_parquet(PROC / "series.parquet", index=False)
df.head()


Unnamed: 0,date,y,is_shock_window
0,2020-01-01,90.703289,False
1,2020-01-02,78.738323,False
2,2020-01-03,77.795448,False
3,2020-01-04,65.030822,False
4,2020-01-05,81.148675,False
