### Intro

#### This notebook is to create simulated data that one would use to build an MMM model

In [1]:
#pulling the packages that I need
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

### Creating the simulated Data

In [3]:
# Set seed for reproducibility
np.random.seed(42)

In [9]:
# -----------------------------
# 1) 4-4-5 calendar helpers
# -----------------------------
def generate_445_weeks(start_date: str, years: int = 2):
    """
    Build a list of weekly period starts following a 4-4-5 pattern per quarter.
    Returns:
        dates: list of week start datetimes (length = 52 * years)
        fiscal_month: list of fiscal month numbers (1..12) repeated per year
        fiscal_quarter: list of fiscal quarter numbers (1..4) repeated per year
    """
    start = datetime.strptime(start_date, "%Y-%m-%d")
    pattern = [4, 4, 5] * 4  # 12 months per year
    total_weeks_per_year = sum(pattern)  # 52

    dates = []
    fiscal_month = []
    fiscal_quarter = []

    for y in range(years):
        # Build one fiscal year
        week_idx = 0
        for m, w_in_m in enumerate(pattern, start=1):
            for w in range(w_in_m):
                dates.append(start + timedelta(weeks=y * total_weeks_per_year + week_idx))
                fiscal_month.append(m)
                fiscal_quarter.append((m - 1) // 3 + 1)
                week_idx += 1

    return dates, fiscal_month, fiscal_quarter

In [11]:
# -----------------------------
# 2) Adstock (used internally to generate targets only)
# -----------------------------
def adstock(series: pd.Series, decay: float) -> pd.Series:
    out = np.empty(series.shape[0], dtype=float)
    for i, v in enumerate(series.values):
        out[i] = v if i == 0 else v + decay * out[i - 1]
    return pd.Series(out, index=series.index)

In [13]:
# -----------------------------
# 3) Build base time index
# -----------------------------
# Common US/Canadian retail 4-4-5 start (Sunday): 2023-01-29
dates, fmonths, fquarters = generate_445_weeks(start_date="2023-01-29", years=2)
n = len(dates)  # 104 weeks

In [15]:
df = pd.DataFrame({
    "week": dates,
    "fiscal_month": fmonths,
    "fiscal_quarter": fquarters
})

In [17]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter
0,2023-01-29,1,1
1,2023-02-05,1,1
2,2023-02-12,1,1
3,2023-02-19,1,1
4,2023-02-26,2,1


In [19]:
# Seasonality signals (repeat each 52-week year)
t = np.arange(n)
yearly = np.sin(2 * np.pi * (t % 52) / 52)            # one big annual wave
semi   = np.sin(4 * np.pi * (t % 52) / 52)            # two bumps per year
seasonal = 0.7 * yearly + 0.3 * semi

In [21]:
# Bake season (Feb - Valentines Day, Apr - Easter, 10 - Halloween, 11 - American Thanksgiving, 12 - Christmas/New Years Eve)
bake_season = df["fiscal_month"].isin([2, 4, 7, 11, 12]).astype(int)
df["bake_season"] = bake_season
df["holiday_flag"] = df["fiscal_month"].isin([11, 12]).astype(int)  # heavy holiday demand
df["promo_flag"] = (np.random.rand(n) < 0.20).astype(int)           # ~20% of weeks on promo

In [25]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag
0,2023-01-29,1,1,0,0,0
1,2023-02-05,1,1,0,0,0
2,2023-02-12,1,1,0,0,0
3,2023-02-19,1,1,0,0,0
4,2023-02-26,2,1,1,0,1


In [29]:
# -----------------------------
# 4) Simulate RAW marketing spends (no adstock columns saved)
# -----------------------------
def make_spend(base_low, base_high, seasonal_scale):
    base = np.random.uniform(base_low, base_high, size=n)
    return base + seasonal_scale * (seasonal + np.random.normal(0, 0.3, n))

df["search_spend"]     = make_spend(8, 16, 3.0)   + 1.0 * bake_season
df["social_spend"]     = make_spend(6, 14, 3.0)   + 1.5 * bake_season
df["influencer_spend"] = make_spend(4, 12, 2.5)   + 2.0 * bake_season
df["display_spend"]    = make_spend(5, 12, 2.0)   + 0.5 * bake_season
df["email_spend"]      = make_spend(3,  8, 1.0)   + 0.8 * df["promo_flag"]

In [31]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957


In [33]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172


In [35]:
# Ensure no negatives in spends
for c in ["search_spend","social_spend","influencer_spend","display_spend","email_spend"]:
    df[c] = np.clip(df[c], a_min=0, a_max=None)

In [41]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957


In [43]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172


In [45]:
# -----------------------------
# 5) Non-marketing channels (owned/earned/other revenue sources)
# -----------------------------
# Organic search visits influenced by seasonal demand + social & influencer (soft correlation)
df["organic_search_visits"] = (
    800 + 200 * seasonal
    + 8.0 * df["social_spend"]
    + 6.0 * df["influencer_spend"]
    + 40  * df["promo_flag"]
    + 80  * df["bake_season"]
    + np.random.normal(0, 40, n)
)

In [47]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032


In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   week                   104 non-null    datetime64[ns]
 1   fiscal_month           104 non-null    int64         
 2   fiscal_quarter         104 non-null    int64         
 3   bake_season            104 non-null    int32         
 4   holiday_flag           104 non-null    int32         
 5   promo_flag             104 non-null    int32         
 6   search_spend           104 non-null    float64       
 7   social_spend           104 non-null    float64       
 8   influencer_spend       104 non-null    float64       
 9   display_spend          104 non-null    float64       
 10  email_spend            104 non-null    float64       
 11  organic_search_visits  104 non-null    float64       
dtypes: datetime64[ns](1), float64(6), int32(3), int64(2)
memory usag

In [51]:
# Affiliate sales: related to influencer + email, seasonal bumps
df["affiliate_sales"] = (
    1200 + 30 * df["influencer_spend"] + 20 * df["email_spend"]
    + 150 * df["bake_season"]
    + 100 * df["promo_flag"]
    + np.random.normal(0, 120, n)
)

In [53]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186


In [55]:
# Marketplace sales (e.g., Amazon/Etsy): driven by seasonal demand, holidays, and price sensitivity
df["marketplace_sales"] = (
    2000 + 350 * yearly + 200 * semi
    + 250 * df["holiday_flag"]
    + np.random.normal(0, 180, n)
)

In [57]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168


In [59]:
# Events / popups sales: show up strongly in bake seasons
df["events_popups"] = (
    100 + 120 * df["bake_season"]
    + np.random.normal(0, 35, n)
)

In [61]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605


In [63]:
# Clip non-marketing channels to realistic minimums
for c in ["organic_search_visits","affiliate_sales","marketplace_sales","events_popups"]:
    df[c] = np.clip(df[c], a_min=0, a_max=None)

In [65]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605


In [75]:
# -----------------------------
# 6) Control variables (non-marketing factors)
# -----------------------------
# Price index: around 1.0 (lower during promo/holiday)
price_base = 1.0 + 0.03 * np.random.randn(n) - 0.03 * df["promo_flag"] - 0.02 * df["holiday_flag"]
df["price_index"] = np.clip(price_base, 0.85, 1.10)

In [77]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221


In [79]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531,887.72481,1721.489531,1628.043568,237.774251,0.952939,0.954051,-0.786473,102.283834
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065,906.933254,1609.702987,1731.029657,238.840363,0.959242,0.893393,-0.563494,100.798021
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721,971.517947,1876.012305,1938.323311,257.537757,0.968251,0.932639,-0.51648,102.069725
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291,906.044111,1660.530498,1963.582687,207.226654,0.988074,0.914237,-0.047196,101.947353
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172,1023.34377,1881.713565,2126.294195,190.627662,0.969704,0.847682,-0.629537,101.791883


In [69]:
# Inventory level: share of target (penalize some holiday weeks to simulate stock pressure)
df["inventory_level"] = np.clip(0.92 + 0.05 * np.random.randn(n) - 0.03 * df["holiday_flag"], 0.70, 1.05)

In [81]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221


In [83]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531,887.72481,1721.489531,1628.043568,237.774251,0.952939,0.954051,-0.786473,102.283834
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065,906.933254,1609.702987,1731.029657,238.840363,0.959242,0.893393,-0.563494,100.798021
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721,971.517947,1876.012305,1938.323311,257.537757,0.968251,0.932639,-0.51648,102.069725
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291,906.044111,1660.530498,1963.582687,207.226654,0.988074,0.914237,-0.047196,101.947353
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172,1023.34377,1881.713565,2126.294195,190.627662,0.969704,0.847682,-0.629537,101.791883


In [71]:
# Weather index: colder (positive for baking) to warmer (negative); center ~ 0
df["weather_index"] = 1.2 * yearly + 0.3 * np.random.randn(n)

In [85]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221


In [87]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531,887.72481,1721.489531,1628.043568,237.774251,0.952939,0.954051,-0.786473,102.283834
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065,906.933254,1609.702987,1731.029657,238.840363,0.959242,0.893393,-0.563494,100.798021
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721,971.517947,1876.012305,1938.323311,257.537757,0.968251,0.932639,-0.51648,102.069725
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291,906.044111,1660.530498,1963.582687,207.226654,0.988074,0.914237,-0.047196,101.947353
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172,1023.34377,1881.713565,2126.294195,190.627662,0.969704,0.847682,-0.629537,101.791883


In [73]:
# Economic index: consumer confidence 95..105 with slow drift
eco_trend = np.linspace(0, 2, n)  # slight improvement over two years
df["economic_index"] = 100 + eco_trend + 1.0 * np.random.randn(n)

In [89]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221


In [91]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531,887.72481,1721.489531,1628.043568,237.774251,0.952939,0.954051,-0.786473,102.283834
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065,906.933254,1609.702987,1731.029657,238.840363,0.959242,0.893393,-0.563494,100.798021
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721,971.517947,1876.012305,1938.323311,257.537757,0.968251,0.932639,-0.51648,102.069725
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291,906.044111,1660.530498,1963.582687,207.226654,0.988074,0.914237,-0.047196,101.947353
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172,1023.34377,1881.713565,2126.294195,190.627662,0.969704,0.847682,-0.629537,101.791883


In [93]:
# Site visits organic baseline proxy (separate from organic_search_visits if desired)
df["site_visits"] = (
    1500 + 250 * seasonal
    + 0.5 * df["organic_search_visits"]
    + 120 * df["promo_flag"]
    + 180 * df["bake_season"]
    + np.random.normal(0, 80, n)
)

In [95]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index,site_visits
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,5.164115,976.651511,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318,2038.051507
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,6.858013,901.343544,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615,1960.145539
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,4.092867,1053.454051,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143,2133.629536
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,4.223407,1024.025298,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866,2121.461679
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,6.338957,1254.448032,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221,2660.358381


In [97]:
df.tail()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,email_spend,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index,site_visits
99,2024-12-22,12,4,1,1,1,13.718172,12.643135,7.391065,10.588295,6.64531,887.72481,1721.489531,1628.043568,237.774251,0.952939,0.954051,-0.786473,102.283834,2086.513246
100,2024-12-29,12,4,1,1,1,13.275048,11.763272,5.369685,8.711431,4.241065,906.933254,1609.702987,1731.029657,238.840363,0.959242,0.893393,-0.563494,100.798021,2044.622687
101,2025-01-05,12,4,1,1,0,7.095193,5.963418,9.446899,4.545262,6.760721,971.517947,1876.012305,1938.323311,257.537757,0.968251,0.932639,-0.51648,102.069725,2143.651372
102,2025-01-12,12,4,1,1,0,9.290396,6.795408,6.782621,6.499564,7.122291,906.044111,1660.530498,1963.582687,207.226654,0.988074,0.914237,-0.047196,101.947353,2056.304141
103,2025-01-19,12,4,1,1,0,14.536674,13.971313,9.018337,9.851129,5.57172,1023.34377,1881.713565,2126.294195,190.627662,0.969704,0.847682,-0.629537,101.791883,2151.885266


In [99]:
# -----------------------------
# 7) Generate targets (using INTERNAL adstock only; not exported)
# -----------------------------
# Internal adstocked versions for outcome generation
search_a     = adstock(df["search_spend"],     decay=0.25)
social_a     = adstock(df["social_spend"],     decay=0.35)
influencer_a = adstock(df["influencer_spend"], decay=0.60)
display_a    = adstock(df["display_spend"],    decay=0.40)

In [101]:
# Sales (short-term) — marketing + non-marketing + controls + noise + positive baseline
sales = (
    0.55 * search_a
  + 0.30 * social_a
  + 0.40 * influencer_a
  + 0.20 * df["email_spend"]
  + 0.15 * display_a

  + 0.0025 * df["affiliate_sales"]
  + 0.0012 * df["marketplace_sales"]
  + 0.0040 * df["events_popups"]
  + 0.0080 * df["organic_search_visits"]

  - 900  * df["price_index"]          # higher price reduces sales
  - 600  * (1.05 - df["inventory_level"])  # stock shortfall hurts sales

  + 0.06 * df["site_visits"]
  + 220  * df["promo_flag"]
  + 160  * df["holiday_flag"]
  + 18   * df["weather_index"]
  + 3.0  * (df["economic_index"] - 100)

  + np.random.normal(0, 110, n)
  + 2200  # positive baseline to avoid negatives
)
df["sales"] = np.clip(sales, a_min=0, a_max=None)

In [103]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,...,organic_search_visits,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index,site_visits,sales
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,...,976.651511,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318,2038.051507,1323.752273
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,...,901.343544,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615,1960.145539,1413.474685
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,...,1053.454051,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143,2133.629536,1503.732368
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,...,1024.025298,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866,2121.461679,1250.155883
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,...,1254.448032,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221,2660.358381,1610.55041


In [105]:
# Brand (long-term) — more brand-building drivers + softer controls
brand = (
    0.25 * search_a
  + 0.55 * social_a
  + 0.75 * influencer_a
  + 0.15 * display_a
  + 0.10 * df["email_spend"]

  + 0.50 * df["events_popups"]
  + 0.015 * df["affiliate_sales"]
  + 0.012 * df["marketplace_sales"]
  + 0.020 * df["organic_search_visits"]

  + 60   * df["holiday_flag"]
  + 14   * df["weather_index"]
  + 1.5  * (df["economic_index"] - 100)

  + np.random.normal(0, 24, n)
  + 120  # baseline brand activity
)
df["brand_mentions"] = np.clip(brand, a_min=0, a_max=None)

In [109]:
df.head()

Unnamed: 0,week,fiscal_month,fiscal_quarter,bake_season,holiday_flag,promo_flag,search_spend,social_spend,influencer_spend,display_spend,...,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index,site_visits,sales,brand_mentions
0,2023-01-29,1,1,0,0,0,16.939729,8.490115,11.124747,7.952858,...,1741.432845,1878.771688,101.982747,0.996144,0.895558,-0.193065,98.946318,2038.051507,1323.752273,254.366396
1,2023-02-05,1,1,0,0,0,10.889299,5.984467,7.273857,12.197289,...,1614.857791,2140.26992,118.539246,0.943545,0.864019,0.453632,98.951615,1960.145539,1413.474685,279.678652
2,2023-02-12,1,1,0,0,0,11.131704,10.207141,9.61893,8.140817,...,1588.475493,2026.342647,97.532543,0.983538,0.927044,0.186746,100.989143,2133.629536,1503.732368,238.943182
3,2023-02-19,1,1,0,0,0,15.976788,10.937152,6.532009,8.212603,...,1524.223713,2642.863085,117.027558,1.002785,0.831578,0.304431,101.768866,2121.461679,1250.155883,274.710063
4,2023-02-26,2,1,1,0,1,11.669776,10.882036,8.323506,11.817972,...,2114.894186,2113.482168,222.256605,0.974796,0.936158,0.271131,99.973221,2660.358381,1610.55041,338.473035


In [111]:
# -----------------------------
# 8) Final formatting & export (RAW dataset only)
# -----------------------------
# Round selected columns for readability
round_cols = [
    "search_spend","social_spend","influencer_spend","display_spend","email_spend",
    "organic_search_visits","affiliate_sales","marketplace_sales","events_popups",
    "price_index","inventory_level","weather_index","economic_index","site_visits",
    "sales","brand_mentions"
]
df[round_cols] = df[round_cols].round(2)

In [113]:
# Order columns (RAW only; no adstock columns exported)
cols_order = [
    "week","fiscal_quarter","fiscal_month","bake_season","promo_flag","holiday_flag",
    # marketing spends
    "search_spend","social_spend","influencer_spend","display_spend","email_spend",
    # non-marketing channels
    "organic_search_visits","affiliate_sales","marketplace_sales","events_popups",
    # controls
    "price_index","inventory_level","weather_index","economic_index","site_visits",
    # targets
    "sales","brand_mentions"
]
df = df[cols_order]


In [117]:
df.head(25)

Unnamed: 0,week,fiscal_quarter,fiscal_month,bake_season,promo_flag,holiday_flag,search_spend,social_spend,influencer_spend,display_spend,...,affiliate_sales,marketplace_sales,events_popups,price_index,inventory_level,weather_index,economic_index,site_visits,sales,brand_mentions
0,2023-01-29,1,1,0,0,0,16.94,8.49,11.12,7.95,...,1741.43,1878.77,101.98,1.0,0.9,-0.19,98.95,2038.05,1323.75,254.37
1,2023-02-05,1,1,0,0,0,10.89,5.98,7.27,12.2,...,1614.86,2140.27,118.54,0.94,0.86,0.45,98.95,1960.15,1413.47,279.68
2,2023-02-12,1,1,0,0,0,11.13,10.21,9.62,8.14,...,1588.48,2026.34,97.53,0.98,0.93,0.19,100.99,2133.63,1503.73,238.94
3,2023-02-19,1,1,0,0,0,15.98,10.94,6.53,8.21,...,1524.22,2642.86,117.03,1.0,0.83,0.3,101.77,2121.46,1250.16,274.71
4,2023-02-26,1,2,1,1,0,11.67,10.88,8.32,11.82,...,2114.89,2113.48,222.26,0.97,0.94,0.27,99.97,2660.36,1610.55,338.47
5,2023-03-05,1,2,1,1,0,12.36,14.33,7.73,10.23,...,1806.37,2441.59,150.86,0.94,0.91,0.81,99.93,2559.99,1670.99,311.81
6,2023-03-12,1,2,1,1,0,14.65,13.75,10.01,8.27,...,1893.49,2544.71,187.12,1.01,0.9,1.41,100.19,2504.59,1496.7,384.6
7,2023-03-19,1,2,1,0,0,12.02,14.75,9.85,9.84,...,1872.7,2535.0,214.96,0.97,0.84,0.58,101.3,2601.08,1324.47,370.19
8,2023-03-26,1,3,0,0,0,18.87,13.63,8.18,7.82,...,1661.03,2441.7,57.66,1.03,0.95,0.99,99.23,2193.41,1352.94,247.6
9,2023-04-02,1,3,0,0,0,17.44,8.06,6.03,7.38,...,1611.9,2451.14,121.0,1.01,0.89,1.49,100.41,2191.7,1493.96,313.6


In [123]:
# Save to CSV / Excel
df.to_csv("MMM_Simulated_Data.csv", index=False)
df.to_excel("MMM_Simulated_Data.xlsx", sheet_name="MMM_Simulated_Data", index=False)