In [None]:
from utils.synthesis_data import create_ads_spend, create_baseline, define_basic_parameters, generate_media
from utils.synthesis_data import generate_noisy_cvr, pivot_to_mmm_format, apply_adstock, apply_diminishing_returns, calculate_conversions, generate_final_df
from utils.plot_helpers import plot_baseline_sales, plot_ads_spend, plot_channel_transforms, plot_actual_vs_self_conversions
import os

# Define Basic Parameters

In [2]:
years = 2
channels_impressions = ["Programmatic", "Google.SEM", "TikTok", "SEO.Non.Brand", "Facebook", "CRM", "Affiliates"]
channels_sessions = ["Direct", "Unassigned"]
channels_clicks = []
frequency_of_campaigns = 1
true_cvr = [0.001, 0.01, 0.003, 0.002, 0.005, 0.004, 0.006, 0.007, 0.003]
revenue_per_conv = 1.0
start_date = "2020/1/1"

basic_parameters = define_basic_parameters(
            years=years,
            channels_impressions=channels_impressions,
            channels_sessions=channels_sessions,
            frequency_of_campaigns=frequency_of_campaigns,
            true_cvr=true_cvr,
            revenue_per_conv=revenue_per_conv,
            start_date=start_date
        )

***** Defining Basic Parameters *****
| Parameter                 | Value                                                                      |
|---------------------------|----------------------------------------------------------------------------|
| Years of Data to generate | 2                                                                          |
| Channels (impressions)    | Programmatic, Google.SEM, TikTok, SEO.Non.Brand, Facebook, CRM, Affiliates |
| Channels (clicks)         |                                                                            |
| Channels (sessions)       | Direct, Unassigned                                                         |
| Campaign frequency        | 1                                                                          |
| True CVRs                 | 0.001, 0.01, 0.003, 0.002, 0.005, 0.004, 0.006, 0.007, 0.003               |
| Revenue per conversion    | 1.0                                                                        |

In [3]:
basic_parameters

{'years': 2,
 'channels_impressions': ['Programmatic',
  'Google.SEM',
  'TikTok',
  'SEO.Non.Brand',
  'Facebook',
  'CRM',
  'Affiliates'],
 'channels_clicks': [],
 'channels_sessions': ['Direct', 'Unassigned'],
 'frequency_of_campaigns': 1,
 'true_cvr': [0.001, 0.01, 0.003, 0.002, 0.005, 0.004, 0.006, 0.007, 0.003],
 'revenue_per_conv': 1.0,
 'start_date': datetime.date(2020, 1, 1)}

# Simulate Daily Baseline Sales

Tips on Picking Parameters:

* How to select `base_p` and `trend_p`?: To reflect your business scenario, you can look at your own business's historical data or more aggregate data on the growth of your industry over time. You can play with this number to generate a variety of data sets and see how MMMs react to changes in base_p and trend_p.

* How can I pick a temp_coef_mean?: Play around with this and the other variables. The larger this number is, the more seasonality will have an impact on your data.

In [4]:
df_baseline = create_baseline(
                setup_variables = basic_parameters,
                base_p = 500000,
                trend_p = 1.8,
                temp_var = 8,
                temp_coef_mean = 50000,
                temp_coef_sd = 5000,
                error_std = 100000)

Generating baseline sales: Done!

Descriptive statistics:
                count          mean            std            min  \
day             730.0  3.655000e+02     210.877136       1.000000   
baseline_sales  730.0  9.445375e+05  310535.331716  241521.724210   
base            730.0  5.000000e+05       0.000000  500000.000000   
trend           730.0  4.506164e+05  259985.510661    1232.876712   
temp            730.0 -1.557354e-16       5.660733      -7.999926   
seasonality     730.0 -2.041255e-11  278009.892180 -392892.338046   
error           730.0 -6.078926e+03  101354.503137 -321927.467797   

                          25%           50%           75%           max  
day                183.250000  3.655000e+02  5.477500e+02  7.300000e+02  
baseline_sales  702570.437912  9.403506e+05  1.153976e+06  1.748361e+06  
base            500000.000000  5.000000e+05  5.000000e+05  5.000000e+05  
trend           225924.657534  4.506164e+05  6.753082e+05  9.000000e+05  
temp               

In [5]:
df_baseline

Unnamed: 0,day,baseline_sales,base,trend,temp,seasonality,error
0,1,3.984478e+05,500000,1232.876712,1.377068e-01,6.763058e+03,-109548.170386
1,2,3.356428e+05,500000,2465.753425,2.753729e-01,1.352411e+04,-180347.100448
2,3,5.709778e+05,500000,3698.630137,4.129573e-01,2.028116e+04,46997.973390
3,4,7.100948e+05,500000,4931.506849,5.504194e-01,2.703220e+04,178131.086853
4,5,5.427833e+05,500000,6164.383562,6.877184e-01,3.377522e+04,2843.669473
...,...,...,...,...,...,...,...
725,726,1.177988e+06,500000,895068.493151,-5.504194e-01,-2.703220e+04,-190048.598301
726,727,1.306904e+06,500000,896301.369863,-4.129573e-01,-2.028116e+04,-69115.723153
727,728,1.492467e+06,500000,897534.246575,-2.753729e-01,-1.352411e+04,108456.423738
728,729,1.435489e+06,500000,898767.123288,-1.377068e-01,-6.763058e+03,43485.343675


In [6]:
plot_baseline_sales(df_baseline, figsize=(900, 600))

# Generate Ad Spend

In [7]:
min_max_proportion = {
    "Programmatic": (0.45, 0.55),
    "Google.SEM": (0.15, 0.25),
    "TikTok": (0.10, 0.20),
    "SEO.Non.Brand": (0.05, 0.15),
    "Facebook": (0.05, 0.15),
    "CRM": (0.05, 0.10),
    "Affiliates": (0.02, 0.08),
    "Direct": (0, 0),  
    # last channel 'Unassigned' will get remainder
}

df_spend = create_ads_spend(
    setup_variables=basic_parameters,
    campaign_spend_mean=329000,
    campaign_spend_std=100000,
    min_max_proportion=min_max_proportion
)

df_spend.head()

Simulated ad spend: Done!


Unnamed: 0,day,channel,channel_type,total_campaign_spend,channel_prop_spend,spend_channel
0,2020-01-01,Programmatic,impressions,370406.724785,0.533678,197677.848024
1,2020-01-01,Google.SEM,impressions,407019.722762,0.207778,84569.682064
2,2020-01-01,TikTok,impressions,235569.394834,0.136442,32141.610091
3,2020-01-01,SEO.Non.Brand,impressions,279732.459336,0.05578,15603.35841
4,2020-01-01,Facebook,impressions,340963.993652,0.146112,49818.98604


In [8]:
# Per-channel spend
# Line chart (default)
plot_ads_spend(df_spend, by="channel", chart_type="line", figsize=(900, 600))

In [9]:
# Bar chart
plot_ads_spend(df_spend, by="channel", chart_type="pie", figsize=(900, 600))

In [10]:
# Total spend
plot_ads_spend(df_spend, by="total", figsize=(900, 600))

# Generate Media Variables

Tips on Picking Parameters:

* How do you pick `true_cpm` and `true_cpc`? We recommend you use historical data as a starting point. Figure out the average CPMs and CPCs on campaigns that you've run in the past. You can also play around with these numbers to see how MMMs react upon changing these numbers.

In [11]:
true_cpm = {
    "Programmatic": 2,
    "Google.SEM": 20,
    "TikTok": 10,
    "SEO.Non.Brand": 5,
    "Facebook": 15,
    "CRM": 30,
    "Affiliates": 8,
    "Direct": None,  # Direct does not have CPM
    "Unassigned": None  # Unassigned does not have CPM
}

# true_cpc = {
#     "Facebook": 0.25,
#     "CRM": 0.50
# }

mean_noisy = {
    "Programmatic": 0.01,
    "Google.SEM": 0.05,
    "TikTok": 0.03,
    "Facebook": 0.04,
    "CRM": 0.05,
    "SEO.Non.Brand": 0.02,
    "Direct": 0.06,
    "Unassigned": 0.05,
    "Affiliates": 0.02
}

std_noisy = {
    "Programmatic": 0.005,
    "Google.SEM": 0.02,
    "TikTok": 0.01,
    "Facebook": 0.02,
    "CRM": 0.03,
    "SEO.Non.Brand": 0.01,
    "Direct": 0.03,
    "Unassigned": 0.02,
    "Affiliates": 0.01
}

# --------------------------
# Define session configs
# --------------------------
sessions_config = {
    "Direct": {
        "base_sessions": 10000,
        "trend_per_year": 0.05,
        "weekly_strength": 0.12,
        "annual_strength": 0.15,
        "noise_cv": 0.08
    },
    "Unassigned": {
        "base_sessions": 5000,
        "trend_per_year": 0.03,
        "weekly_strength": 0.10,
        "annual_strength": 0.12,
        "noise_cv": 0.06
    }
}

# Run
df_media = generate_media(
    setup_variables=basic_parameters,
    df_ads_spends=df_spend,
    true_cpm=true_cpm,
    true_cpc={},
    mean_noisy=mean_noisy,
    std_noisy=std_noisy,
    sessions_config=sessions_config
)

Simulating media variables: Done!


In [12]:
df_media.columns

Index(['day', 'channel', 'channel_type', 'total_campaign_spend',
       'channel_prop_spend', 'spend_channel', 'campaign_id', 'true_cpm',
       'noisy_cpm', 'true_cpc', 'noisy_cpc', 'lifetime_impressions',
       'lifetime_clicks', 'lifetime_sessions',
       'impressions_Programmatic_after_running_day_1',
       'impressions_Google.SEM_after_running_day_1',
       'impressions_TikTok_after_running_day_1',
       'impressions_SEO.Non.Brand_after_running_day_1',
       'impressions_Facebook_after_running_day_1',
       'impressions_CRM_after_running_day_1',
       'impressions_Affiliates_after_running_day_1',
       'sessions_Direct_after_running_day_1',
       'sessions_Unassigned_after_running_day_1',
       'spend_Programmatic_after_running_day_1',
       'spend_Google.SEM_after_running_day_1',
       'spend_TikTok_after_running_day_1',
       'spend_SEO.Non.Brand_after_running_day_1',
       'spend_Facebook_after_running_day_1', 'spend_CRM_after_running_day_1',
       'spend_Affili

# Generate Noisy CVRs

In [13]:
# Optional noise (defaults to 0 if omitted)

mean_noisy_cvr = {
    "Programmatic": 0.002,
    "Google.SEM":  0.01,
    "TikTok":      0.003,
    "Facebook":    0.0015,
    "CRM":         0.0002,
    "Direct":      0.004,
    "Unassigned":  0.0055,
}
std_noisy_cvr = {
    "Programmatic": 0.001,
    "Google.SEM":   0.002,
    "TikTok":       0.003,
    "Facebook":     0.001,
    "CRM":          0.002,
    "Direct":       0.001,
    "Unassigned":   0.0015,
}

df_ads = generate_noisy_cvr(
    setup_variables=basic_parameters,
    df_media=df_media,
    mean_noisy_cvr=mean_noisy_cvr,
    std_noisy_cvr=std_noisy_cvr
)

You have completed running step 4: Simulating conversion rates.


In [14]:
df_ads.filter(regex=r"_cvr_")

Unnamed: 0,noisy_cvr_Affiliates_after_running_day_1,noisy_cvr_Google.SEM_after_running_day_1,noisy_cvr_Direct_after_running_day_1,noisy_cvr_CRM_after_running_day_1,noisy_cvr_TikTok_after_running_day_1,noisy_cvr_Unassigned_after_running_day_1,noisy_cvr_Programmatic_after_running_day_1,noisy_cvr_SEO.Non.Brand_after_running_day_1,noisy_cvr_Facebook_after_running_day_1
0,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002767,0.000,0.000000
1,0.000,0.020023,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000
2,0.000,0.000000,0.000000,0.000000,0.008312,0.000000,0.000000,0.000,0.000000
3,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002,0.000000
4,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.007732
...,...,...,...,...,...,...,...,...,...
6565,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.005501
6566,0.000,0.000000,0.000000,0.007329,0.000000,0.000000,0.000000,0.000,0.000000
6567,0.006,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.000000
6568,0.000,0.000000,0.013647,0.000000,0.000000,0.000000,0.000000,0.000,0.000000


In [15]:
df_ads.head(10)

Unnamed: 0,day,channel,channel_type,total_campaign_spend,channel_prop_spend,spend_channel,campaign_id,true_cpm,noisy_cpm,true_cpc,...,noisy_cvr,noisy_cvr_Affiliates_after_running_day_1,noisy_cvr_Google.SEM_after_running_day_1,noisy_cvr_Direct_after_running_day_1,noisy_cvr_CRM_after_running_day_1,noisy_cvr_TikTok_after_running_day_1,noisy_cvr_Unassigned_after_running_day_1,noisy_cvr_Programmatic_after_running_day_1,noisy_cvr_SEO.Non.Brand_after_running_day_1,noisy_cvr_Facebook_after_running_day_1
0,2020-01-01,Programmatic,impressions,370406.724785,0.533678,197677.848024,1,2.0,2.013484,,...,0.002767,0.0,0.0,0.0,0.0,0.0,0.0,0.002767,0.0,0.0
1,2020-01-01,Google.SEM,impressions,407019.722762,0.207778,84569.682064,1,20.0,20.02423,,...,0.020023,0.0,0.020023,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-01,TikTok,impressions,235569.394834,0.136442,32141.610091,1,10.0,10.016581,,...,0.008312,0.0,0.0,0.0,0.0,0.008312,0.0,0.0,0.0,0.0
3,2020-01-01,SEO.Non.Brand,impressions,279732.459336,0.05578,15603.35841,1,5.0,5.033589,,...,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.0
4,2020-01-01,Facebook,impressions,340963.993652,0.146112,49818.98604,1,15.0,15.037572,,...,0.007732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.007732
5,2020-01-01,CRM,impressions,252188.129776,0.098897,24940.688736,1,30.0,30.08485,,...,0.004909,0.0,0.0,0.0,0.004909,0.0,0.0,0.0,0.0,0.0
6,2020-01-01,Affiliates,impressions,236622.863351,0.071126,16830.074553,1,8.0,8.031338,,...,0.006,0.006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2020-01-01,Direct,sessions,301795.880273,0.0,0.0,1,,,,...,0.009922,0.0,0.0,0.009922,0.0,0.0,0.0,0.0,0.0,0.0
8,2020-01-01,Unassigned,sessions,467356.716017,0.0,0.0,1,,,,...,0.008876,0.0,0.0,0.0,0.0,0.0,0.008876,0.0,0.0,0.0
9,2020-01-02,Programmatic,impressions,414583.65495,0.511287,211971.204481,2,2.0,2.019996,,...,0.001063,0.0,0.0,0.0,0.0,0.0,0.0,0.001063,0.0,0.0


In [16]:
df_ads.columns

Index(['day', 'channel', 'channel_type', 'total_campaign_spend',
       'channel_prop_spend', 'spend_channel', 'campaign_id', 'true_cpm',
       'noisy_cpm', 'true_cpc', 'noisy_cpc', 'lifetime_impressions',
       'lifetime_clicks', 'lifetime_sessions',
       'impressions_Programmatic_after_running_day_1',
       'impressions_Google.SEM_after_running_day_1',
       'impressions_TikTok_after_running_day_1',
       'impressions_SEO.Non.Brand_after_running_day_1',
       'impressions_Facebook_after_running_day_1',
       'impressions_CRM_after_running_day_1',
       'impressions_Affiliates_after_running_day_1',
       'sessions_Direct_after_running_day_1',
       'sessions_Unassigned_after_running_day_1',
       'spend_Programmatic_after_running_day_1',
       'spend_Google.SEM_after_running_day_1',
       'spend_TikTok_after_running_day_1',
       'spend_SEO.Non.Brand_after_running_day_1',
       'spend_Facebook_after_running_day_1', 'spend_CRM_after_running_day_1',
       'spend_Affili

In [17]:
df_ads.filter(regex=r"^(impressions_|sessions_)").head(20)

Unnamed: 0,impressions_Programmatic_after_running_day_1,impressions_Google.SEM_after_running_day_1,impressions_TikTok_after_running_day_1,impressions_SEO.Non.Brand_after_running_day_1,impressions_Facebook_after_running_day_1,impressions_CRM_after_running_day_1,impressions_Affiliates_after_running_day_1,sessions_Direct_after_running_day_1,sessions_Unassigned_after_running_day_1
0,98177030.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,4223367.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,3208841.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,3099847.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,3312967.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,829011.576901,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,2095550.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11180.279536,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10488.01393
9,104936500.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Transforming Media Variables

## Pivoting the table to an MMM format

In [18]:
df_before_mmm = pivot_to_mmm_format(
    setup_variables=basic_parameters,
    df_ads=df_ads
)

Pivoting the data frame to an MMM format: Done!


In [19]:
df_before_mmm.head(20)

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,sum_spend_Unassigned_this_day,cvr_Programmatic_this_day,cvr_Google.SEM_this_day,cvr_TikTok_this_day,cvr_SEO.Non.Brand_this_day,cvr_Facebook_this_day,cvr_CRM_this_day,cvr_Affiliates_this_day,cvr_Direct_this_day,cvr_Unassigned_this_day
0,2020-01-01,98177030.0,4223367.0,3208841.0,3099847.0,3312967.0,829011.6,2095550.0,11180.279536,10488.01393,...,0.0,0.002767,0.020023,0.008312,0.002,0.007732,0.004909,0.006,0.009922,0.008876
1,2020-01-02,104936500.0,3031687.0,5474178.0,10187370.0,1201029.0,527540.5,4133869.0,10862.407928,11055.946695,...,0.0,0.001063,0.02281,0.004469,0.002,0.005744,0.004965,0.006,0.010302,0.007101
2,2020-01-03,87636250.0,3369535.0,6673298.0,9031184.0,3195861.0,291377.7,3469899.0,9261.949139,8896.421211,...,0.0,0.002519,0.020804,0.004393,0.002,0.007642,0.004947,0.006,0.011196,0.007778
3,2020-01-04,100592800.0,3704550.0,4292544.0,4497090.0,3897955.0,1127681.0,966751.1,9105.321811,8088.033502,...,0.0,0.003868,0.020468,0.004816,0.002,0.007527,0.004899,0.006,0.009901,0.007911
4,2020-01-05,92927590.0,3720350.0,4246725.0,4954724.0,1745126.0,675167.3,783057.2,11086.472263,10820.813942,...,0.0,0.001767,0.022558,0.005369,0.002,0.006585,0.001652,0.006,0.009799,0.007394
5,2020-01-06,32159960.0,2959688.0,2471449.0,2414977.0,1831050.0,568050.8,2518604.0,9614.402455,9478.789075,...,0.0,0.004491,0.020298,0.005259,0.002,0.005532,0.00573,0.006,0.010469,0.008782
6,2020-01-07,47054220.0,2194270.0,6460828.0,5272175.0,1937690.0,636726.8,2874995.0,10454.627842,10788.489685,...,0.0,0.002727,0.021737,0.004469,0.002,0.005502,0.006767,0.006,0.012253,0.009646
7,2020-01-08,71151190.0,4646109.0,5392101.0,5208660.0,1194115.0,696250.8,2227190.0,10467.233013,11012.015584,...,0.0,0.001637,0.020489,0.005825,0.002,0.005321,0.005676,0.006,0.008959,0.01115
8,2020-01-09,84287860.0,3972476.0,6497781.0,7648466.0,1936221.0,648249.4,1675897.0,11420.518327,11259.616827,...,0.0,0.002351,0.024048,0.005332,0.002,0.007039,0.004267,0.006,0.011425,0.010514
9,2020-01-10,88627510.0,3260216.0,4334795.0,2992169.0,1004135.0,1060994.0,937481.3,10199.806228,9886.352491,...,0.0,0.003779,0.016472,0.00183,0.002,0.004527,0.003371,0.006,0.011291,0.010461


In [20]:
df_before_mmm.columns

Index(['DATE', 'sum_n_Programmatic_imps_this_day',
       'sum_n_Google.SEM_imps_this_day', 'sum_n_TikTok_imps_this_day',
       'sum_n_SEO.Non.Brand_imps_this_day', 'sum_n_Facebook_imps_this_day',
       'sum_n_CRM_imps_this_day', 'sum_n_Affiliates_imps_this_day',
       'sum_n_Direct_sessions_this_day', 'sum_n_Unassigned_sessions_this_day',
       'sum_spend_Programmatic_this_day', 'sum_spend_Google.SEM_this_day',
       'sum_spend_TikTok_this_day', 'sum_spend_SEO.Non.Brand_this_day',
       'sum_spend_Facebook_this_day', 'sum_spend_CRM_this_day',
       'sum_spend_Affiliates_this_day', 'sum_spend_Direct_this_day',
       'sum_spend_Unassigned_this_day', 'cvr_Programmatic_this_day',
       'cvr_Google.SEM_this_day', 'cvr_TikTok_this_day',
       'cvr_SEO.Non.Brand_this_day', 'cvr_Facebook_this_day',
       'cvr_CRM_this_day', 'cvr_Affiliates_this_day', 'cvr_Direct_this_day',
       'cvr_Unassigned_this_day'],
      dtype='object')

## Apply Adstock

In [21]:
true_lambda_decay = {
    "Programmatic": 0.1,
    "Google.SEM": 0.2,
    "TikTok": 0.3,
    "SEO.Non.Brand": 0.15,
    "Facebook": 0.25,
    "CRM": 0.1,
    "Affiliates": 0.05,
    "Direct": 0.2,
    "Unassigned": 0.1
}

df_adstock = apply_adstock(
    setup_variables=basic_parameters,
    df_daily=df_before_mmm,
    true_lambda_decay=true_lambda_decay
)

Applied geometric adstock to all channels: Done!


In [22]:
df_adstock.head()

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,cvr_Unassigned_this_day,sum_n_Programmatic_imps_this_day_adstocked,sum_n_Google.SEM_imps_this_day_adstocked,sum_n_TikTok_imps_this_day_adstocked,sum_n_SEO.Non.Brand_imps_this_day_adstocked,sum_n_Facebook_imps_this_day_adstocked,sum_n_CRM_imps_this_day_adstocked,sum_n_Affiliates_imps_this_day_adstocked,sum_n_Direct_sessions_this_day_adstocked,sum_n_Unassigned_sessions_this_day_adstocked
0,2020-01-01,98177030.0,4223367.0,3208841.0,3099847.0,3312967.0,829011.6,2095550.0,11180.279536,10488.01393,...,0.008876,98177030.0,4223367.0,3208841.0,3099847.0,3312967.0,829011.6,2095550.0,11180.279536,10488.01393
1,2020-01-02,104936500.0,3031687.0,5474178.0,10187370.0,1201029.0,527540.5,4133869.0,10862.407928,11055.946695,...,0.007101,114754200.0,3876361.0,6436830.0,10652350.0,2029271.0,610441.6,4238647.0,13098.463836,12104.748088
2,2020-01-03,87636250.0,3369535.0,6673298.0,9031184.0,3195861.0,291377.7,3469899.0,9261.949139,8896.421211,...,0.007778,99111660.0,4144807.0,8604347.0,10629040.0,3703179.0,352421.9,3681831.0,11881.641906,10106.89602
3,2020-01-04,100592800.0,3704550.0,4292544.0,4497090.0,3897955.0,1127681.0,966751.1,9105.321811,8088.033502,...,0.007911,110504000.0,4533512.0,6873848.0,6091445.0,4823749.0,1162924.0,1150843.0,11481.650192,9098.723104
4,2020-01-05,92927590.0,3720350.0,4246725.0,4954724.0,1745126.0,675167.3,783057.2,11086.472263,10820.813942,...,0.007394,103978000.0,4627053.0,6308879.0,5868441.0,2951064.0,791459.7,840599.4,13382.802301,11730.686253


In [23]:
df_adstock.columns

Index(['DATE', 'sum_n_Programmatic_imps_this_day',
       'sum_n_Google.SEM_imps_this_day', 'sum_n_TikTok_imps_this_day',
       'sum_n_SEO.Non.Brand_imps_this_day', 'sum_n_Facebook_imps_this_day',
       'sum_n_CRM_imps_this_day', 'sum_n_Affiliates_imps_this_day',
       'sum_n_Direct_sessions_this_day', 'sum_n_Unassigned_sessions_this_day',
       'sum_spend_Programmatic_this_day', 'sum_spend_Google.SEM_this_day',
       'sum_spend_TikTok_this_day', 'sum_spend_SEO.Non.Brand_this_day',
       'sum_spend_Facebook_this_day', 'sum_spend_CRM_this_day',
       'sum_spend_Affiliates_this_day', 'sum_spend_Direct_this_day',
       'sum_spend_Unassigned_this_day', 'cvr_Programmatic_this_day',
       'cvr_Google.SEM_this_day', 'cvr_TikTok_this_day',
       'cvr_SEO.Non.Brand_this_day', 'cvr_Facebook_this_day',
       'cvr_CRM_this_day', 'cvr_Affiliates_this_day', 'cvr_Direct_this_day',
       'cvr_Unassigned_this_day', 'sum_n_Programmatic_imps_this_day_adstocked',
       'sum_n_Google.SEM_imps_

## Applying Diminishing Returns to Media Variables

In [24]:
alpha = 2.0  # same alpha for all channels
gamma = {
    "Programmatic": 0.2, "Google.SEM": 0.2, "TikTok": 0.25,
    "SEO.Non.Brand": 0.25, "Facebook": 0.2, "CRM": 0.2, "Affiliates": 0.2,
    "Direct": 0.2, "Unassigned": 0.2
}

df_dimissing = apply_diminishing_returns(
    setup_variables=basic_parameters,
    df_adstock=df_adstock,
    alpha_saturation=alpha,
    gamma_saturation=gamma,   # can also pass a single float for all channels
    x_marginal=1e6            # or e.g., 1e6 to use a constant factor or None for no marginal effect
)

Apply diminishing marginal returns: Done!


In [25]:
df_dimissing.head()

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,sum_n_Unassigned_sessions_this_day_adstocked,sum_n_Programmatic_imps_this_day_adstocked_decay_diminishing,sum_n_Google.SEM_imps_this_day_adstocked_decay_diminishing,sum_n_TikTok_imps_this_day_adstocked_decay_diminishing,sum_n_SEO.Non.Brand_imps_this_day_adstocked_decay_diminishing,sum_n_Facebook_imps_this_day_adstocked_decay_diminishing,sum_n_CRM_imps_this_day_adstocked_decay_diminishing,sum_n_Affiliates_imps_this_day_adstocked_decay_diminishing,sum_n_Direct_sessions_this_day_adstocked_decay_diminishing,sum_n_Unassigned_sessions_this_day_adstocked_decay_diminishing
0,2020-01-01,98177030.0,4223367.0,3208841.0,3099847.0,3312967.0,829011.6,2095550.0,11180.279536,10488.01393,...,10488.01393,29477.692007,734527.069615,131882.981509,82796.227069,760142.0,604943.636479,670758.5,11179.081995,10487.223854
1,2020-01-02,104936500.0,3031687.0,5474178.0,10187370.0,1201029.0,527540.5,4133869.0,10862.407928,11055.946695,...,12104.748088,34454.980478,674175.777418,264552.987326,284521.828701,465605.0,445449.473672,1356736.0,13097.060835,12103.836222
2,2020-01-03,87636250.0,3369535.0,6673298.0,9031184.0,3195861.0,291377.7,3469899.0,9261.949139,8896.421211,...,10106.89602,29758.315214,720863.818525,353637.681188,283899.181547,849674.0,257168.152911,1178506.0,11880.369241,10106.134654
3,2020-01-04,100592800.0,3704550.0,4292544.0,4497090.0,3897955.0,1127681.0,966751.1,9105.321811,8088.033502,...,9098.723104,33178.854743,788467.256866,282514.378811,162701.140087,1106783.0,848604.772137,368369.8,11480.420371,9098.037685
4,2020-01-05,92927590.0,3720350.0,4246725.0,4954724.0,1745126.0,675167.3,783057.2,11086.472263,10820.813942,...,11730.686253,31219.429723,804735.908485,259294.224354,156744.745493,677105.2,577541.380792,269065.0,13381.368844,11729.802565


In [26]:
# For an impressions channel
plot_channel_transforms(df_dimissing, channel="Direct", figsize=(900, 600))


# Calculating Conversions

In [27]:
df_dimissing.filter(regex=r"^cvr_")

Unnamed: 0,cvr_Programmatic_this_day,cvr_Google.SEM_this_day,cvr_TikTok_this_day,cvr_SEO.Non.Brand_this_day,cvr_Facebook_this_day,cvr_CRM_this_day,cvr_Affiliates_this_day,cvr_Direct_this_day,cvr_Unassigned_this_day
0,0.002767,0.020023,0.008312,0.002,0.007732,0.004909,0.006,0.009922,0.008876
1,0.001063,0.022810,0.004469,0.002,0.005744,0.004965,0.006,0.010302,0.007101
2,0.002519,0.020804,0.004393,0.002,0.007642,0.004947,0.006,0.011196,0.007778
3,0.003868,0.020468,0.004816,0.002,0.007527,0.004899,0.006,0.009901,0.007911
4,0.001767,0.022558,0.005369,0.002,0.006585,0.001652,0.006,0.009799,0.007394
...,...,...,...,...,...,...,...,...,...
725,0.002170,0.021860,0.002839,0.002,0.006588,0.005409,0.006,0.009822,0.009911
726,0.005060,0.021237,0.007523,0.002,0.003982,0.002181,0.006,0.012011,0.010445
727,0.002435,0.022151,0.007217,0.002,0.008360,0.002896,0.006,0.011239,0.006119
728,0.003733,0.019294,0.005403,0.002,0.005871,0.007623,0.006,0.011143,0.009667


In [28]:
df_conversion = calculate_conversions(setup_variables=basic_parameters, df_daily=df_dimissing)

Calculated conversions for all channel types: Done!


In [29]:
df_conversion

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,sum_n_Unassigned_sessions_this_day_adstocked_decay_diminishing,conv_Programmatic,conv_Google.SEM,conv_TikTok,conv_SEO.Non.Brand,conv_Facebook,conv_CRM,conv_Affiliates,conv_Direct,conv_Unassigned
0,2020-01-01,9.817703e+07,4.223367e+06,3.208841e+06,3.099847e+06,3.312967e+06,8.290116e+05,2.095550e+06,11180.279536,10488.013930,...,10487.223854,81.555629,14707.442920,1096.260673,165.592454,5877.157117,2969.909599,4024.551264,110.919230,93.086148
1,2020-01-02,1.049365e+08,3.031687e+06,5.474178e+06,1.018737e+07,1.201029e+06,5.275405e+05,4.133869e+06,10862.407928,11055.946695,...,12103.836222,36.608790,15377.910327,1182.166828,569.043657,2674.608591,2211.686281,8140.415698,134.919508,85.947889
2,2020-01-03,8.763625e+07,3.369535e+06,6.673298e+06,9.031184e+06,3.195861e+06,2.913777e+05,3.469899e+06,9261.949139,8896.421211,...,10106.134654,74.968046,14996.839272,1553.455212,567.798363,6493.363265,1272.105214,7071.038447,133.013821,78.606058
3,2020-01-04,1.005928e+08,3.704550e+06,4.292544e+06,4.497090e+06,3.897955e+06,1.127681e+06,9.667511e+05,9105.321811,8088.033502,...,9098.037685,128.341704,16138.624033,1360.475939,325.402280,8330.232110,4157.620967,2210.218936,113.664104,71.975094
4,2020-01-05,9.292759e+07,3.720350e+06,4.246725e+06,4.954724e+06,1.745126e+06,6.751673e+05,7.830572e+05,11086.472263,10820.813942,...,11729.802565,55.176790,18152.836357,1392.114772,313.489491,4458.496139,954.195214,1614.389846,131.120897,86.728935
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2021-12-26,1.223219e+08,5.762467e+06,4.816180e+06,5.316548e+06,1.317262e+06,1.334633e+06,1.497134e+06,10617.984556,10660.547494,...,11641.341519,84.480254,24387.891253,737.453354,340.841220,3179.264172,5510.162696,2970.672615,126.192538,115.372543
726,2021-12-27,6.559665e+07,5.425804e+06,5.479275e+06,7.718482e+06,3.570370e+06,1.211314e+06,3.244986e+06,9873.110566,10047.296461,...,11210.673737,119.345214,24779.182047,2280.441290,463.444001,3742.952648,2150.194221,6380.602433,149.441392,117.090491
727,2021-12-28,8.441329e+07,1.816529e+06,2.251220e+06,6.084051e+06,3.386591e+06,9.609858e+05,1.345652e+06,10575.717575,12727.989672,...,13848.098230,67.447922,12166.993249,1323.973649,394.523857,8460.616756,2315.930564,2903.385318,146.808986,84.739594
728,2021-12-29,6.654396e+07,3.978526e+06,5.008014e+06,3.813559e+06,1.663686e+06,5.734300e+05,3.748901e+06,9104.701490,10843.813784,...,12227.806728,84.921894,15470.352262,1409.379253,262.897176,3726.346988,3799.278344,7345.017505,130.554450,118.211905


# Generate Final DataFrame

Add self-claim ratio for each channel

In [36]:
self_claim_config = {
    "Facebook": (-0.1, 0.2),
    "Google.SEM": (-0.05, 0.15),
    "Direct": (0.0, 0.05)
}

final_df = generate_final_df(
    setup_variables=basic_parameters,
    df_daily=df_conversion,
    df_baseline=df_baseline,
    self_claim_config=self_claim_config,
    seed=42
)

Generate final dataframe with self-claim simulation (conv scaled from activity) — Done!


In [37]:
final_df.columns

Index(['DATE', 'actual_impressions_Programmatic',
       'self_impressions_Programmatic', 'actual_impressions_Google.SEM',
       'self_impressions_Google.SEM', 'actual_impressions_TikTok',
       'self_impressions_TikTok', 'actual_impressions_SEO.Non.Brand',
       'self_impressions_SEO.Non.Brand', 'actual_impressions_Facebook',
       'self_impressions_Facebook', 'actual_impressions_CRM',
       'self_impressions_CRM', 'actual_impressions_Affiliates',
       'self_impressions_Affiliates', 'actual_sessions_Direct',
       'self_sessions_Direct', 'actual_sessions_Unassigned',
       'self_sessions_Unassigned', 'actual_spend_Programmatic',
       'self_spend_Programmatic', 'actual_spend_Google.SEM',
       'self_spend_Google.SEM', 'actual_spend_TikTok', 'self_spend_TikTok',
       'actual_spend_SEO.Non.Brand', 'self_spend_SEO.Non.Brand',
       'actual_spend_Facebook', 'self_spend_Facebook', 'actual_spend_CRM',
       'self_spend_CRM', 'actual_spend_Affiliates', 'self_spend_Affiliates',

In [38]:
final_df.filter(regex=r"conv_")

Unnamed: 0,actual_conv_Programmatic,self_conv_Programmatic,actual_conv_Google.SEM,self_conv_Google.SEM,actual_conv_TikTok,self_conv_TikTok,actual_conv_SEO.Non.Brand,self_conv_SEO.Non.Brand,actual_conv_Facebook,self_conv_Facebook,actual_conv_CRM,self_conv_CRM,actual_conv_Affiliates,self_conv_Affiliates,actual_conv_Direct,self_conv_Direct,actual_conv_Unassigned,self_conv_Unassigned,total_conv_from_ads
0,81.555629,81.555629,14707.442920,13454.720823,1096.260673,1096.260673,165.592454,165.592454,5877.157117,0.0,2969.909599,2969.909599,4024.551264,4024.551264,110.919230,232.983970,93.086148,93.086148,29126.475035
1,36.608790,36.608790,15377.910327,13999.577670,1182.166828,1182.166828,569.043657,569.043657,2674.608591,0.0,2211.686281,2211.686281,8140.415698,8140.415698,134.919508,284.320020,85.947889,85.947889,30413.307571
2,74.968046,74.968046,14996.839272,13694.919822,1553.455212,1553.455212,567.798363,567.798363,6493.363265,0.0,1272.105214,1272.105214,7071.038447,7071.038447,133.013821,278.670876,78.606058,78.606058,32241.187699
3,128.341704,128.341704,16138.624033,14725.865591,1360.475939,1360.475939,325.402280,325.402280,8330.232110,0.0,4157.620967,4157.620967,2210.218936,2210.218936,113.664104,236.179733,71.975094,71.975094,32836.555166
4,55.176790,55.176790,18152.836357,16631.798487,1392.114772,1392.114772,313.489491,313.489491,4458.496139,0.0,954.195214,954.195214,1614.389846,1614.389846,131.120897,272.327350,86.728935,86.728935,27158.548441
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,84.480254,84.480254,24387.891253,22243.060778,737.453354,737.453354,340.841220,340.841220,3179.264172,0.0,5510.162696,5510.162696,2970.672615,2970.672615,126.192538,267.599921,115.372543,115.372543,37452.330645
726,119.345214,119.345214,24779.182047,22853.942182,2280.441290,2280.441290,463.444001,463.444001,3742.952648,0.0,2150.194221,2150.194221,6380.602433,6380.602433,149.441392,314.071869,117.090491,117.090491,40182.693737
727,67.447922,67.447922,12166.993249,11116.568292,1323.973649,1323.973649,394.523857,394.523857,8460.616756,0.0,2315.930564,2315.930564,2903.385318,2903.385318,146.808986,308.876181,84.739594,84.739594,27864.419894
728,84.921894,84.921894,15470.352262,14191.412182,1409.379253,1409.379253,262.897176,262.897176,3726.346988,0.0,3799.278344,3799.278344,7345.017505,7345.017505,130.554450,271.876396,118.211905,118.211905,32346.959777


In [33]:
final_df

Unnamed: 0,DATE,actual_impressions_Programmatic,self_impressions_Programmatic,actual_impressions_Google.SEM,self_impressions_Google.SEM,actual_impressions_TikTok,self_impressions_TikTok,actual_impressions_SEO.Non.Brand,self_impressions_SEO.Non.Brand,actual_impressions_Facebook,...,actual_spend_Affiliates,self_spend_Affiliates,actual_spend_Direct,self_spend_Direct,actual_spend_Unassigned,self_spend_Unassigned,total_conv_from_ads,revenue_from_ads,baseline_revenue,total_revenue
0,2020-01-01,9.817703e+07,9.817703e+07,4.223367e+06,3.863638e+06,3.208841e+06,3.208841e+06,3.099847e+06,3.099847e+06,3.312967e+06,...,16830.074553,16830.074553,0.0,0.0,0.0,0.0,0.0,0.0,3.984478e+05,3.984478e+05
1,2020-01-02,1.049365e+08,1.049365e+08,3.031687e+06,2.759955e+06,5.474178e+06,5.474178e+06,1.018737e+07,1.018737e+07,1.201029e+06,...,33131.055699,33131.055699,0.0,0.0,0.0,0.0,0.0,0.0,3.356428e+05,3.356428e+05
2,2020-01-03,8.763625e+07,8.763625e+07,3.369535e+06,3.077015e+06,6.673298e+06,6.673298e+06,9.031184e+06,9.031184e+06,3.195861e+06,...,27793.417301,27793.417301,0.0,0.0,0.0,0.0,0.0,0.0,5.709778e+05,5.709778e+05
3,2020-01-04,1.005928e+08,1.005928e+08,3.704550e+06,3.380258e+06,4.292544e+06,4.292544e+06,4.497090e+06,4.497090e+06,3.897955e+06,...,7753.840392,7753.840392,0.0,0.0,0.0,0.0,0.0,0.0,7.100948e+05,7.100948e+05
4,2020-01-05,9.292759e+07,9.292759e+07,3.720350e+06,3.408620e+06,4.246725e+06,4.246725e+06,4.954724e+06,4.954724e+06,1.745126e+06,...,6280.136341,6280.136341,0.0,0.0,0.0,0.0,0.0,0.0,5.427833e+05,5.427833e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2021-12-26,1.223219e+08,1.223219e+08,5.762467e+06,5.255678e+06,4.816180e+06,4.816180e+06,5.316548e+06,5.316548e+06,1.317262e+06,...,12018.849236,12018.849236,0.0,0.0,0.0,0.0,0.0,0.0,1.177988e+06,1.177988e+06
726,2021-12-27,6.559665e+07,6.559665e+07,5.425804e+06,5.004241e+06,5.479275e+06,5.479275e+06,7.718482e+06,7.718482e+06,3.570370e+06,...,26117.804071,26117.804071,0.0,0.0,0.0,0.0,0.0,0.0,1.306904e+06,1.306904e+06
727,2021-12-28,8.441329e+07,8.441329e+07,1.816529e+06,1.659701e+06,2.251220e+06,2.251220e+06,6.084051e+06,6.084051e+06,3.386591e+06,...,10779.980369,10779.980369,0.0,0.0,0.0,0.0,0.0,0.0,1.492467e+06,1.492467e+06
728,2021-12-29,6.654396e+07,6.654396e+07,3.978526e+06,3.649620e+06,5.008014e+06,5.008014e+06,3.813559e+06,3.813559e+06,1.663686e+06,...,30084.210350,30084.210350,0.0,0.0,0.0,0.0,0.0,0.0,1.435489e+06,1.435489e+06


In [None]:
plot_actual_vs_self_conversions(final_df)


# Save data

In [34]:
output_dir = "output"
output_file = os.path.join(output_dir, "mmm_data.csv")

# Create directory if not exists, then save
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

final_df.to_csv(output_file, index=False)
print(f"Saved file to: {output_file}")

Saved file to: output/mmm_data.csv
