In [1]:
from utils.synthesis_data import create_ads_spend, create_baseline, define_basic_parameters, generate_media
from utils.synthesis_data import generate_noisy_cvr, pivot_to_mmm_format, apply_adstock, apply_diminishing_returns, calculate_conversions, generate_final_df
from utils.plot_helpers import plot_baseline_sales, plot_ads_spend, plot_channel_transforms, plot_actual_vs_self_conversions
import os

# Define Basic Parameters

In [2]:
years = 2
channels_impressions = ["Programmatic", "Google.SEM", "TikTok", "SEO.Non.Brand", "Facebook", "CRM", "Affiliates"]
channels_sessions = ["Direct", "Unassigned"]
channels_clicks = []
frequency_of_campaigns = 1
true_cvr = [0.001, 0.01, 0.003, 0.002, 0.005, 0.004, 0.006, 0.007, 0.003]
revenue_per_conv = 1.0
start_date = "2020/1/1"

basic_parameters = define_basic_parameters(
            years=years,
            channels_impressions=channels_impressions,
            channels_sessions=channels_sessions,
            frequency_of_campaigns=frequency_of_campaigns,
            true_cvr=true_cvr,
            revenue_per_conv=revenue_per_conv,
            start_date=start_date
        )

***** Defining Basic Parameters *****
| Parameter                 | Value                                                                      |
|---------------------------|----------------------------------------------------------------------------|
| Years of Data to generate | 2                                                                          |
| Channels (impressions)    | Programmatic, Google.SEM, TikTok, SEO.Non.Brand, Facebook, CRM, Affiliates |
| Channels (clicks)         |                                                                            |
| Channels (sessions)       | Direct, Unassigned                                                         |
| Campaign frequency        | 1                                                                          |
| True CVRs                 | 0.001, 0.01, 0.003, 0.002, 0.005, 0.004, 0.006, 0.007, 0.003               |
| Revenue per conversion    | 1.0                                                                        |

In [3]:
basic_parameters

{'years': 2,
 'channels_impressions': ['Programmatic',
  'Google.SEM',
  'TikTok',
  'SEO.Non.Brand',
  'Facebook',
  'CRM',
  'Affiliates'],
 'channels_clicks': [],
 'channels_sessions': ['Direct', 'Unassigned'],
 'frequency_of_campaigns': 1,
 'true_cvr': [0.001, 0.01, 0.003, 0.002, 0.005, 0.004, 0.006, 0.007, 0.003],
 'revenue_per_conv': 1.0,
 'start_date': datetime.date(2020, 1, 1)}

# Simulate Daily Baseline Sales

Tips on Picking Parameters:

* How to select `base_p` and `trend_p`?: To reflect your business scenario, you can look at your own business's historical data or more aggregate data on the growth of your industry over time. You can play with this number to generate a variety of data sets and see how MMMs react to changes in base_p and trend_p.

* How can I pick a temp_coef_mean?: Play around with this and the other variables. The larger this number is, the more seasonality will have an impact on your data.

In [4]:
df_baseline = create_baseline(
                setup_variables = basic_parameters,
                base_p = 500000,
                trend_p = 1.8,
                temp_var = 8,
                temp_coef_mean = 50000,
                temp_coef_sd = 5000,
                error_std = 100000)

Generating baseline sales: Done!

Descriptive statistics:
                count          mean            std            min  \
day             730.0  3.655000e+02     210.877136       1.000000   
baseline_sales  730.0  9.485489e+05  286416.290773  280975.966072   
base            730.0  5.000000e+05       0.000000  500000.000000   
trend           730.0  4.506164e+05  259985.510661    1232.876712   
temp            730.0 -1.557354e-16       5.660733      -7.999926   
seasonality     730.0  0.000000e+00  222870.865023 -314968.127768   
error           730.0 -2.067537e+03  100064.462373 -329235.185016   

                          25%           50%           75%           max  
day                183.250000  3.655000e+02  5.477500e+02  7.300000e+02  
baseline_sales  719559.020376  9.531372e+05  1.168603e+06  1.665495e+06  
base            500000.000000  5.000000e+05  5.000000e+05  5.000000e+05  
trend           225924.657534  4.506164e+05  6.753082e+05  9.000000e+05  
temp               

In [6]:
plot_baseline_sales(df_baseline, figsize=(900, 600))

# Generate Ad Spend

In [39]:
min_max_proportion = {
    "Programmatic": (0.15, 0.20),
    "Google.SEM": (0.25, 0.55),
    "TikTok": (0.10, 0.20),
    "SEO.Non.Brand": (0.05, 0.15),
    "Facebook": (0.15, 0.35),
    "CRM": (0.05, 0.10),
    "Affiliates": (0.02, 0.08),
    "Direct": (0, 0),  
    # last channel 'Unassigned' will get remainder
}

df_spend = create_ads_spend(
    setup_variables=basic_parameters,
    campaign_spend_mean=329000,
    campaign_spend_std=100000,
    min_max_proportion=min_max_proportion
)

df_spend.head()

Simulated ad spend: Done!


Unnamed: 0,day,channel,channel_type,total_campaign_spend,channel_prop_spend,spend_channel
0,2020-01-01,Programmatic,impressions,423786.450689,0.164744,69816.246041
1,2020-01-01,Google.SEM,impressions,330376.793748,0.344226,113724.328341
2,2020-01-01,TikTok,impressions,308881.220922,0.183571,56701.495129
3,2020-01-01,SEO.Non.Brand,impressions,195784.601738,0.064021,12534.300947
4,2020-01-01,Facebook,impressions,239954.933343,0.344274,82610.183601


In [40]:
# Per-channel spend
# Line chart (default)
plot_ads_spend(df_spend, by="channel", chart_type="line", figsize=(900, 600))

In [41]:
# Bar chart
plot_ads_spend(df_spend, by="channel", chart_type="pie", figsize=(900, 600))

In [42]:
# Total spend
plot_ads_spend(df_spend, by="total", figsize=(900, 600))

# Generate Media Variables

Tips on Picking Parameters:

* How do you pick `true_cpm` and `true_cpc`? We recommend you use historical data as a starting point. Figure out the average CPMs and CPCs on campaigns that you've run in the past. You can also play around with these numbers to see how MMMs react upon changing these numbers.

In [43]:
true_cpm = {
    "Programmatic": 2,
    "Google.SEM": 20,
    "TikTok": 10,
    "SEO.Non.Brand": 5,
    "Facebook": 15,
    "CRM": 30,
    "Affiliates": 8,
    "Direct": None,  # Direct does not have CPM
    "Unassigned": None  # Unassigned does not have CPM
}

# true_cpc = {
#     "Facebook": 0.25,
#     "CRM": 0.50
# }

mean_noisy = {
    "Programmatic": 0.01,
    "Google.SEM": 0.05,
    "TikTok": 0.03,
    "Facebook": 0.04,
    "CRM": 0.05,
    "SEO.Non.Brand": 0.02,
    "Direct": 0.06,
    "Unassigned": 0.05,
    "Affiliates": 0.02
}

std_noisy = {
    "Programmatic": 0.005,
    "Google.SEM": 0.02,
    "TikTok": 0.01,
    "Facebook": 0.02,
    "CRM": 0.03,
    "SEO.Non.Brand": 0.01,
    "Direct": 0.03,
    "Unassigned": 0.02,
    "Affiliates": 0.01
}

# --------------------------
# Define session configs
# --------------------------
sessions_config = {
    "Direct": {
        "base_sessions": 10000,
        "trend_per_year": 0.05,
        "weekly_strength": 0.12,
        "annual_strength": 0.15,
        "noise_cv": 0.08
    },
    "Unassigned": {
        "base_sessions": 5000,
        "trend_per_year": 0.03,
        "weekly_strength": 0.10,
        "annual_strength": 0.12,
        "noise_cv": 0.06
    }
}

# Run
df_media = generate_media(
    setup_variables=basic_parameters,
    df_ads_spends=df_spend,
    true_cpm=true_cpm,
    true_cpc={},
    mean_noisy=mean_noisy,
    std_noisy=std_noisy,
    sessions_config=sessions_config
)

Simulating media variables: Done!


In [44]:
df_media.columns

Index(['day', 'channel', 'channel_type', 'total_campaign_spend',
       'channel_prop_spend', 'spend_channel', 'campaign_id', 'true_cpm',
       'noisy_cpm', 'true_cpc', 'noisy_cpc', 'lifetime_impressions',
       'lifetime_clicks', 'lifetime_sessions',
       'impressions_Programmatic_after_running_day_1',
       'impressions_Google.SEM_after_running_day_1',
       'impressions_TikTok_after_running_day_1',
       'impressions_SEO.Non.Brand_after_running_day_1',
       'impressions_Facebook_after_running_day_1',
       'impressions_CRM_after_running_day_1',
       'impressions_Affiliates_after_running_day_1',
       'sessions_Direct_after_running_day_1',
       'sessions_Unassigned_after_running_day_1',
       'spend_Programmatic_after_running_day_1',
       'spend_Google.SEM_after_running_day_1',
       'spend_TikTok_after_running_day_1',
       'spend_SEO.Non.Brand_after_running_day_1',
       'spend_Facebook_after_running_day_1', 'spend_CRM_after_running_day_1',
       'spend_Affili

# Generate Noisy CVRs

In [45]:
# Optional noise (defaults to 0 if omitted)

mean_noisy_cvr = {
    "Programmatic": 0.002,
    "Google.SEM":  0.01,
    "TikTok":      0.003,
    "Facebook":    0.0015,
    "CRM":         0.0002,
    "Direct":      0.004,
    "Unassigned":  0.0055,
}
std_noisy_cvr = {
    "Programmatic": 0.001,
    "Google.SEM":   0.002,
    "TikTok":       0.003,
    "Facebook":     0.001,
    "CRM":          0.002,
    "Direct":       0.001,
    "Unassigned":   0.0015,
}

df_ads = generate_noisy_cvr(
    setup_variables=basic_parameters,
    df_media=df_media,
    mean_noisy_cvr=mean_noisy_cvr,
    std_noisy_cvr=std_noisy_cvr
)

You have completed running step 4: Simulating conversion rates.


In [46]:
df_ads.filter(regex=r"_cvr_")

Unnamed: 0,noisy_cvr_Affiliates_after_running_day_1,noisy_cvr_SEO.Non.Brand_after_running_day_1,noisy_cvr_Google.SEM_after_running_day_1,noisy_cvr_Unassigned_after_running_day_1,noisy_cvr_TikTok_after_running_day_1,noisy_cvr_Direct_after_running_day_1,noisy_cvr_CRM_after_running_day_1,noisy_cvr_Programmatic_after_running_day_1,noisy_cvr_Facebook_after_running_day_1
0,0.000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002933,0.000000
1,0.000,0.000,0.021271,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000,0.000,0.000000,0.000000,0.006831,0.000000,0.000000,0.000000,0.000000
3,0.000,0.002,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.005060
...,...,...,...,...,...,...,...,...,...
6565,0.000,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.006594
6566,0.000,0.000,0.000000,0.000000,0.000000,0.000000,0.005558,0.000000,0.000000
6567,0.006,0.000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6568,0.000,0.000,0.000000,0.000000,0.000000,0.010982,0.000000,0.000000,0.000000


In [47]:
df_ads.head(10)

Unnamed: 0,day,channel,channel_type,total_campaign_spend,channel_prop_spend,spend_channel,campaign_id,true_cpm,noisy_cpm,true_cpc,...,noisy_cvr,noisy_cvr_Affiliates_after_running_day_1,noisy_cvr_SEO.Non.Brand_after_running_day_1,noisy_cvr_Google.SEM_after_running_day_1,noisy_cvr_Unassigned_after_running_day_1,noisy_cvr_TikTok_after_running_day_1,noisy_cvr_Direct_after_running_day_1,noisy_cvr_CRM_after_running_day_1,noisy_cvr_Programmatic_after_running_day_1,noisy_cvr_Facebook_after_running_day_1
0,2020-01-01,Programmatic,impressions,423786.450689,0.164744,69816.246041,1,2.0,2.009463,,...,0.002933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002933,0.0
1,2020-01-01,Google.SEM,impressions,330376.793748,0.344226,113724.328341,1,20.0,20.095766,,...,0.021271,0.0,0.0,0.021271,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-01,TikTok,impressions,308881.220922,0.183571,56701.495129,1,10.0,10.027108,,...,0.006831,0.0,0.0,0.0,0.0,0.006831,0.0,0.0,0.0,0.0
3,2020-01-01,SEO.Non.Brand,impressions,195784.601738,0.064021,12534.300947,1,5.0,5.013289,,...,0.002,0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-01,Facebook,impressions,239954.933343,0.344274,82610.183601,1,15.0,15.021502,,...,0.00506,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00506
5,2020-01-01,CRM,impressions,419226.447718,0.08974,37621.433854,1,30.0,30.049083,,...,0.0019,0.0,0.0,0.0,0.0,0.0,0.0,0.0019,0.0,0.0
6,2020-01-01,Affiliates,impressions,146213.971993,0.024849,3633.300596,1,8.0,8.01012,,...,0.006,0.006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2020-01-01,Direct,sessions,319401.532771,0.0,0.0,1,,,,...,0.010037,0.0,0.0,0.0,0.0,0.0,0.010037,0.0,0.0,0.0
8,2020-01-01,Unassigned,sessions,262968.164201,0.0,0.0,1,,,,...,0.007873,0.0,0.0,0.0,0.007873,0.0,0.0,0.0,0.0,0.0
9,2020-01-02,Programmatic,impressions,299315.79595,0.165139,49428.733531,2,2.0,2.003231,,...,0.004294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004294,0.0


In [48]:
df_ads.columns

Index(['day', 'channel', 'channel_type', 'total_campaign_spend',
       'channel_prop_spend', 'spend_channel', 'campaign_id', 'true_cpm',
       'noisy_cpm', 'true_cpc', 'noisy_cpc', 'lifetime_impressions',
       'lifetime_clicks', 'lifetime_sessions',
       'impressions_Programmatic_after_running_day_1',
       'impressions_Google.SEM_after_running_day_1',
       'impressions_TikTok_after_running_day_1',
       'impressions_SEO.Non.Brand_after_running_day_1',
       'impressions_Facebook_after_running_day_1',
       'impressions_CRM_after_running_day_1',
       'impressions_Affiliates_after_running_day_1',
       'sessions_Direct_after_running_day_1',
       'sessions_Unassigned_after_running_day_1',
       'spend_Programmatic_after_running_day_1',
       'spend_Google.SEM_after_running_day_1',
       'spend_TikTok_after_running_day_1',
       'spend_SEO.Non.Brand_after_running_day_1',
       'spend_Facebook_after_running_day_1', 'spend_CRM_after_running_day_1',
       'spend_Affili

In [49]:
df_ads.filter(regex=r"^(impressions_|sessions_)").head(20)

Unnamed: 0,impressions_Programmatic_after_running_day_1,impressions_Google.SEM_after_running_day_1,impressions_TikTok_after_running_day_1,impressions_SEO.Non.Brand_after_running_day_1,impressions_Facebook_after_running_day_1,impressions_CRM_after_running_day_1,impressions_Affiliates_after_running_day_1,sessions_Direct_after_running_day_1,sessions_Unassigned_after_running_day_1
0,34743730.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,5659119.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,5654821.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,2500215.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,5499462.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,1251999.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,453588.8,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11595.747425,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10490.66662
9,24674510.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Transforming Media Variables

## Pivoting the table to an MMM format

In [50]:
df_before_mmm = pivot_to_mmm_format(
    setup_variables=basic_parameters,
    df_ads=df_ads
)

Pivoting the data frame to an MMM format: Done!


In [51]:
df_before_mmm.head(20)

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,sum_spend_Unassigned_this_day,cvr_Programmatic_this_day,cvr_Google.SEM_this_day,cvr_TikTok_this_day,cvr_SEO.Non.Brand_this_day,cvr_Facebook_this_day,cvr_CRM_this_day,cvr_Affiliates_this_day,cvr_Direct_this_day,cvr_Unassigned_this_day
0,2020-01-01,34743730.0,5659119.0,5654821.0,2500215.0,5499462.0,1251999.0,453588.8,11595.747425,10490.66662,...,0.0,0.002933,0.021271,0.006831,0.002,0.00506,0.0019,0.006,0.010037,0.007873
1,2020-01-02,24674510.0,7259378.0,5200713.0,7901119.0,4305324.0,1042125.0,2952186.0,11631.90039,9884.142587,...,0.0,0.004294,0.02019,0.000898,0.002,0.006877,0.006761,0.006,0.011229,0.01091
2,2020-01-03,22061280.0,3674833.0,3042234.0,4773498.0,5707378.0,641544.1,895717.0,10917.796677,8023.489039,...,0.0,0.002499,0.020061,0.005767,0.002,0.005568,0.003372,0.006,0.00894,0.007525
3,2020-01-04,23805380.0,3048467.0,4759823.0,5598131.0,2666085.0,914037.3,1741163.0,8880.5261,8373.874793,...,20870.009556,0.003415,0.019625,0.008345,0.002,0.005322,0.007349,0.006,0.009098,0.008682
4,2020-01-05,41616350.0,6458023.0,8822863.0,11073520.0,4737718.0,959275.6,1729475.0,10228.282925,7679.995451,...,0.0,0.003667,0.021851,0.006631,0.002,0.005691,0.003052,0.006,0.009796,0.008717
5,2020-01-06,22923350.0,1477637.0,2811217.0,9074476.0,2821642.0,471792.8,2798876.0,11209.016321,10561.233036,...,0.0,0.004456,0.021786,0.001477,0.002,0.005762,0.00431,0.006,0.010968,0.006978
6,2020-01-07,18621650.0,7000092.0,3611693.0,6375061.0,3170620.0,848082.5,3479469.0,10651.09935,11783.925991,...,0.0,0.003488,0.020994,0.004896,0.002,0.008061,0.006433,0.006,0.010435,0.009921
7,2020-01-08,21288600.0,6829025.0,4735285.0,7469114.0,4443435.0,960516.0,2741408.0,13177.099166,11280.950554,...,0.0,0.003356,0.023008,0.007362,0.002,0.006292,0.004688,0.006,0.01081,0.012035
8,2020-01-09,21147660.0,6568426.0,3123805.0,4323392.0,6390591.0,590925.3,1722812.0,11613.153523,10012.824719,...,0.0,0.001648,0.019311,0.006462,0.002,0.007698,0.004302,0.006,0.013118,0.009406
9,2020-01-10,19400340.0,2292169.0,6527652.0,4731047.0,7743996.0,796439.7,1869701.0,8698.176051,9883.950527,...,0.0,0.0025,0.020345,0.004193,0.002,0.007838,0.003719,0.006,0.010082,0.00924


In [52]:
df_before_mmm.columns

Index(['DATE', 'sum_n_Programmatic_imps_this_day',
       'sum_n_Google.SEM_imps_this_day', 'sum_n_TikTok_imps_this_day',
       'sum_n_SEO.Non.Brand_imps_this_day', 'sum_n_Facebook_imps_this_day',
       'sum_n_CRM_imps_this_day', 'sum_n_Affiliates_imps_this_day',
       'sum_n_Direct_sessions_this_day', 'sum_n_Unassigned_sessions_this_day',
       'sum_spend_Programmatic_this_day', 'sum_spend_Google.SEM_this_day',
       'sum_spend_TikTok_this_day', 'sum_spend_SEO.Non.Brand_this_day',
       'sum_spend_Facebook_this_day', 'sum_spend_CRM_this_day',
       'sum_spend_Affiliates_this_day', 'sum_spend_Direct_this_day',
       'sum_spend_Unassigned_this_day', 'cvr_Programmatic_this_day',
       'cvr_Google.SEM_this_day', 'cvr_TikTok_this_day',
       'cvr_SEO.Non.Brand_this_day', 'cvr_Facebook_this_day',
       'cvr_CRM_this_day', 'cvr_Affiliates_this_day', 'cvr_Direct_this_day',
       'cvr_Unassigned_this_day'],
      dtype='object')

## Apply Adstock

In [53]:
true_lambda_decay = {
    "Programmatic": 0.1,
    "Google.SEM": 0.2,
    "TikTok": 0.3,
    "SEO.Non.Brand": 0.15,
    "Facebook": 0.25,
    "CRM": 0.1,
    "Affiliates": 0.05,
    "Direct": 0.2,
    "Unassigned": 0.1
}

df_adstock = apply_adstock(
    setup_variables=basic_parameters,
    df_daily=df_before_mmm,
    true_lambda_decay=true_lambda_decay
)

Applied geometric adstock to all channels: Done!


In [54]:
df_adstock.head()

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,cvr_Unassigned_this_day,sum_n_Programmatic_imps_this_day_adstocked,sum_n_Google.SEM_imps_this_day_adstocked,sum_n_TikTok_imps_this_day_adstocked,sum_n_SEO.Non.Brand_imps_this_day_adstocked,sum_n_Facebook_imps_this_day_adstocked,sum_n_CRM_imps_this_day_adstocked,sum_n_Affiliates_imps_this_day_adstocked,sum_n_Direct_sessions_this_day_adstocked,sum_n_Unassigned_sessions_this_day_adstocked
0,2020-01-01,34743730.0,5659119.0,5654821.0,2500215.0,5499462.0,1251999.0,453588.8,11595.747425,10490.66662,...,0.007873,34743730.0,5659119.0,5654821.0,2500215.0,5499462.0,1251999.0,453588.8,11595.747425,10490.66662
1,2020-01-02,24674510.0,7259378.0,5200713.0,7901119.0,4305324.0,1042125.0,2952186.0,11631.90039,9884.142587,...,0.01091,28148880.0,8391202.0,6897160.0,8276151.0,5680189.0,1167325.0,2974865.0,13951.049875,10933.209249
2,2020-01-03,22061280.0,3674833.0,3042234.0,4773498.0,5707378.0,641544.1,895717.0,10917.796677,8023.489039,...,0.007525,24876170.0,5353074.0,5111382.0,6014920.0,7127425.0,758276.6,1044460.0,13708.006652,9116.809963
3,2020-01-04,23805380.0,3048467.0,4759823.0,5598131.0,2666085.0,914037.3,1741163.0,8880.5261,8373.874793,...,0.008682,26293000.0,4119081.0,6293238.0,6500369.0,4447941.0,989865.0,1793386.0,11622.12743,9285.555789
4,2020-01-05,41616350.0,6458023.0,8822863.0,11073520.0,4737718.0,959275.6,1729475.0,10228.282925,7679.995451,...,0.008717,44245650.0,7281840.0,10710830.0,12048570.0,5849703.0,1058262.0,1819144.0,12552.708411,8608.55103


In [55]:
df_adstock.columns

Index(['DATE', 'sum_n_Programmatic_imps_this_day',
       'sum_n_Google.SEM_imps_this_day', 'sum_n_TikTok_imps_this_day',
       'sum_n_SEO.Non.Brand_imps_this_day', 'sum_n_Facebook_imps_this_day',
       'sum_n_CRM_imps_this_day', 'sum_n_Affiliates_imps_this_day',
       'sum_n_Direct_sessions_this_day', 'sum_n_Unassigned_sessions_this_day',
       'sum_spend_Programmatic_this_day', 'sum_spend_Google.SEM_this_day',
       'sum_spend_TikTok_this_day', 'sum_spend_SEO.Non.Brand_this_day',
       'sum_spend_Facebook_this_day', 'sum_spend_CRM_this_day',
       'sum_spend_Affiliates_this_day', 'sum_spend_Direct_this_day',
       'sum_spend_Unassigned_this_day', 'cvr_Programmatic_this_day',
       'cvr_Google.SEM_this_day', 'cvr_TikTok_this_day',
       'cvr_SEO.Non.Brand_this_day', 'cvr_Facebook_this_day',
       'cvr_CRM_this_day', 'cvr_Affiliates_this_day', 'cvr_Direct_this_day',
       'cvr_Unassigned_this_day', 'sum_n_Programmatic_imps_this_day_adstocked',
       'sum_n_Google.SEM_imps_

## Applying Diminishing Returns to Media Variables

In [56]:
alpha = 2.0  # same alpha for all channels
gamma = {
    "Programmatic": 0.2, "Google.SEM": 0.2, "TikTok": 0.25,
    "SEO.Non.Brand": 0.25, "Facebook": 0.2, "CRM": 0.2, "Affiliates": 0.2,
    "Direct": 0.2, "Unassigned": 0.2
}

df_dimissing = apply_diminishing_returns(
    setup_variables=basic_parameters,
    df_adstock=df_adstock,
    alpha_saturation=alpha,
    gamma_saturation=gamma,   # can also pass a single float for all channels
    x_marginal=1e6            # or e.g., 1e6 to use a constant factor or None for no marginal effect
)

Apply diminishing marginal returns: Done!


In [57]:
df_dimissing.head()

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,sum_n_Unassigned_sessions_this_day_adstocked,sum_n_Programmatic_imps_this_day_adstocked_decay_diminishing,sum_n_Google.SEM_imps_this_day_adstocked_decay_diminishing,sum_n_TikTok_imps_this_day_adstocked_decay_diminishing,sum_n_SEO.Non.Brand_imps_this_day_adstocked_decay_diminishing,sum_n_Facebook_imps_this_day_adstocked_decay_diminishing,sum_n_CRM_imps_this_day_adstocked_decay_diminishing,sum_n_Affiliates_imps_this_day_adstocked_decay_diminishing,sum_n_Direct_sessions_this_day_adstocked_decay_diminishing,sum_n_Unassigned_sessions_this_day_adstocked_decay_diminishing
0,2020-01-01,34743730.0,5659119.0,5654821.0,2500215.0,5499462.0,1251999.0,453588.8,11595.747425,10490.66662,...,10490.66662,104091.549289,186434.976011,191878.777712,111627.261778,260487.419779,922973.971669,160618.9,11594.505496,10489.82469
1,2020-01-02,24674510.0,7259378.0,5200713.0,7901119.0,4305324.0,1042125.0,2952186.0,11631.90039,9884.142587,...,10933.209249,84333.498772,276441.183706,234033.697195,369505.832344,269047.717891,860551.809805,1053420.0,13949.555688,10932.331802
2,2020-01-03,22061280.0,3674833.0,3042234.0,4773498.0,5707378.0,641544.1,895717.0,10917.796677,8023.489039,...,9116.809963,74528.513807,176352.56658,173438.878895,268548.517757,337597.460973,559001.475204,369850.5,13706.538495,9116.078292
3,2020-01-04,23805380.0,3048467.0,4759823.0,5598131.0,2666085.0,914037.3,1741163.0,8880.5261,8373.874793,...,9285.555789,78773.324141,135699.717754,213541.476302,290222.388928,210681.094586,729728.444424,635050.1,11620.882675,9284.810575
4,2020-01-05,41616350.0,6458023.0,8822863.0,11073520.0,4737718.0,959275.6,1729475.0,10228.282925,7679.995451,...,8608.55103,132559.11605,239894.167068,363438.893826,537933.275339,277076.924827,780150.794817,644171.3,12551.363989,8607.860149


In [68]:
# For an impressions channel
plot_channel_transforms(df_dimissing, channel="Facebook", figsize=(900, 600))


# Calculating Conversions

In [59]:
df_dimissing.filter(regex=r"^cvr_")

Unnamed: 0,cvr_Programmatic_this_day,cvr_Google.SEM_this_day,cvr_TikTok_this_day,cvr_SEO.Non.Brand_this_day,cvr_Facebook_this_day,cvr_CRM_this_day,cvr_Affiliates_this_day,cvr_Direct_this_day,cvr_Unassigned_this_day
0,0.002933,0.021271,0.006831,0.002,0.005060,0.001900,0.006,0.010037,0.007873
1,0.004294,0.020190,0.000898,0.002,0.006877,0.006761,0.006,0.011229,0.010910
2,0.002499,0.020061,0.005767,0.002,0.005568,0.003372,0.006,0.008940,0.007525
3,0.003415,0.019625,0.008345,0.002,0.005322,0.007349,0.006,0.009098,0.008682
4,0.003667,0.021851,0.006631,0.002,0.005691,0.003052,0.006,0.009796,0.008717
...,...,...,...,...,...,...,...,...,...
725,0.003264,0.019873,0.009577,0.002,0.006011,0.000000,0.006,0.010148,0.008956
726,0.004273,0.018019,0.005726,0.002,0.007962,0.007211,0.006,0.010442,0.008530
727,0.002459,0.017397,0.002847,0.002,0.006642,0.005850,0.006,0.011895,0.005246
728,0.003563,0.019712,0.002996,0.002,0.007128,0.004527,0.006,0.012151,0.009163


In [60]:
df_conversion = calculate_conversions(setup_variables=basic_parameters, df_daily=df_dimissing)

Calculated conversions for all channel types: Done!


In [61]:
df_conversion

Unnamed: 0,DATE,sum_n_Programmatic_imps_this_day,sum_n_Google.SEM_imps_this_day,sum_n_TikTok_imps_this_day,sum_n_SEO.Non.Brand_imps_this_day,sum_n_Facebook_imps_this_day,sum_n_CRM_imps_this_day,sum_n_Affiliates_imps_this_day,sum_n_Direct_sessions_this_day,sum_n_Unassigned_sessions_this_day,...,sum_n_Unassigned_sessions_this_day_adstocked_decay_diminishing,conv_Programmatic,conv_Google.SEM,conv_TikTok,conv_SEO.Non.Brand,conv_Facebook,conv_CRM,conv_Affiliates,conv_Direct,conv_Unassigned
0,2020-01-01,3.474373e+07,5.659119e+06,5.654821e+06,2.500215e+06,5.499462e+06,1.251999e+06,4.535888e+05,11595.747425,10490.666620,...,10489.824690,305.349807,3965.709618,1310.663476,223.254524,1318.162509,1753.366764,963.713353,116.369227,82.584548
1,2020-01-02,2.467451e+07,7.259378e+06,5.200713e+06,7.901119e+06,4.305324e+06,1.042125e+06,2.952186e+06,11631.900390,9884.142587,...,10932.331802,362.132701,5581.351892,210.144582,739.011665,1850.264341,5817.935014,6320.520927,156.638858,119.273674
2,2020-01-03,2.206128e+07,3.674833e+06,3.042234e+06,4.773498e+06,5.707378e+06,6.415441e+05,8.957170e+05,10917.796677,8023.489039,...,9116.078292,186.221025,3537.883187,1000.259292,537.097036,1879.693317,1884.829455,2219.103119,122.538303,68.594232
3,2020-01-04,2.380538e+07,3.048467e+06,4.759823e+06,5.598131e+06,2.666085e+06,9.140373e+05,1.741163e+06,8880.526100,8373.874793,...,9284.810575,268.984140,2663.106951,1781.962838,580.444778,1121.201215,5362.545696,3810.300576,105.724587,80.610385
4,2020-01-05,4.161635e+07,6.458023e+06,8.822863e+06,1.107352e+07,4.737718e+06,9.592756e+05,1.729475e+06,10228.282925,7679.995451,...,8607.860149,486.146400,5241.932352,2409.797688,1075.866551,1576.873603,2381.168069,3865.028082,122.956208,75.036867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2021-12-26,2.012568e+07,6.742164e+06,5.623130e+06,9.008119e+06,5.888542e+06,1.294745e+06,7.539707e+05,8557.408492,9423.133242,...,10388.571020,220.197681,5138.210930,2553.721741,860.761835,2205.873978,0.000000,1980.580432,107.634446,93.041186
726,2021-12-27,2.894618e+07,7.052149e+06,4.791792e+06,3.610471e+06,6.185152e+06,3.911252e+05,1.819834e+06,9290.637382,10811.311808,...,11849.301246,399.434903,5118.040426,1388.987309,451.508152,3063.063075,2824.334190,3965.522948,119.156304,101.079100
727,2021-12-28,2.217193e+07,6.895407e+06,5.799029e+06,7.266563e+06,9.839861e+06,1.084049e+06,1.336684e+06,9763.710404,10023.047965,...,11207.173688,186.316043,4940.229942,767.339439,716.587612,3734.652041,4903.840047,3038.250043,143.272308,58.791977
728,2021-12-29,1.872012e+07,8.650792e+06,3.080736e+06,9.122573e+06,4.024105e+06,5.915909e+05,9.888391e+05,12143.983475,11169.383922,...,12289.204890,226.820272,6737.392484,555.386060,922.080347,2360.603693,2354.056829,2252.840617,176.813652,112.611847


# Generate Final DataFrame

Add self-claim ratio for each channel

In [62]:
channels_impressions, channels_sessions

(['Programmatic',
  'Google.SEM',
  'TikTok',
  'SEO.Non.Brand',
  'Facebook',
  'CRM',
  'Affiliates'],
 ['Direct', 'Unassigned'])

In [63]:
self_claim_config = {
    "Programmatic": 0.2,
    "TikTok": 0.3,
    "SEO.Non.Brand": -0.20,
    "CRM": 0.25,
    "Affiliates": -0.1,
    "Facebook": 0.05,
    "Google.SEM": 0.3,
    "Direct": -0.1,
    "Unassigned": 0.05
}


final_df = generate_final_df(
    setup_variables=basic_parameters,
    df_daily=df_conversion,
    df_baseline=df_baseline,
    self_claim_config=self_claim_config,
    daily_noise_std=0.02,  # ±2% daily noise
    seed=42
)

Generate final dataframe with self-claim + daily noise — Done!


In [64]:
final_df.columns

Index(['DATE', 'actual_impressions_Programmatic',
       'self_impressions_Programmatic', 'actual_impressions_Google.SEM',
       'self_impressions_Google.SEM', 'actual_impressions_TikTok',
       'self_impressions_TikTok', 'actual_impressions_SEO.Non.Brand',
       'self_impressions_SEO.Non.Brand', 'actual_impressions_Facebook',
       'self_impressions_Facebook', 'actual_impressions_CRM',
       'self_impressions_CRM', 'actual_impressions_Affiliates',
       'self_impressions_Affiliates', 'actual_sessions_Direct',
       'self_sessions_Direct', 'actual_sessions_Unassigned',
       'self_sessions_Unassigned', 'actual_spend_Programmatic',
       'self_spend_Programmatic', 'actual_spend_Google.SEM',
       'self_spend_Google.SEM', 'actual_spend_TikTok', 'self_spend_TikTok',
       'actual_spend_SEO.Non.Brand', 'self_spend_SEO.Non.Brand',
       'actual_spend_Facebook', 'self_spend_Facebook', 'actual_spend_CRM',
       'self_spend_CRM', 'actual_spend_Affiliates', 'self_spend_Affiliates',

In [65]:
final_df.filter(regex=r"conv_")

Unnamed: 0,actual_conv_Programmatic,self_conv_Programmatic,actual_conv_Google.SEM,self_conv_Google.SEM,actual_conv_TikTok,self_conv_TikTok,actual_conv_SEO.Non.Brand,self_conv_SEO.Non.Brand,actual_conv_Facebook,self_conv_Facebook,actual_conv_CRM,self_conv_CRM,actual_conv_Affiliates,self_conv_Affiliates,actual_conv_Direct,self_conv_Direct,actual_conv_Unassigned,self_conv_Unassigned,total_conv_from_ads
0,305.349807,375.157910,3965.709618,5022.696308,1310.663476,1687.629931,223.254524,178.618192,1318.162509,1418.241348,1753.366764,2118.245453,963.713353,840.316844,116.369227,105.959308,82.584548,85.686959,10039.173825
1,362.132701,428.512018,5581.351892,7512.113500,210.144582,273.971539,739.011665,599.247968,1850.264341,1953.804106,5817.935014,7385.703716,6320.520927,5715.779887,156.638858,140.474601,119.273674,123.650565,21157.273654
2,186.221025,218.898387,3537.883187,4662.421711,1000.259292,1281.160736,537.097036,438.579210,1879.693317,1996.014586,1884.829455,2364.602098,2219.103119,2005.266153,122.538303,111.015421,68.594232,73.483650,11436.218965
3,268.984140,322.435696,2663.106951,3440.487790,1781.962838,2338.949875,580.444778,463.918314,1121.201215,1144.843580,5362.545696,6711.556668,3810.300576,3364.011382,105.724587,95.980385,80.610385,84.881462,15774.881165
4,486.146400,570.278673,5241.932352,6683.890923,2409.797688,3134.509375,1075.866551,874.034349,1576.873603,1719.647935,2381.168069,2930.593134,3865.028082,3380.039933,122.956208,111.270253,75.036867,78.361815,17234.805820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,220.197681,262.299022,5138.210930,6523.263905,2553.721741,3265.523479,860.761835,707.493381,2205.873978,2329.373237,0.000000,0.000000,1980.580432,1842.770769,107.634446,95.259223,93.041186,96.115581,13160.022229
726,399.434903,465.555483,5118.040426,6759.434585,1388.987309,1863.505715,451.508152,369.628516,3063.063075,3083.543475,2824.334190,3379.237765,3965.522948,3569.359303,119.156304,107.616803,101.079100,106.591748,17431.126406
727,186.316043,225.749606,4940.229942,6209.493424,767.339439,964.948262,716.587612,585.482695,3734.652041,3828.248460,4903.840047,6145.683130,3038.250043,2755.636699,143.272308,130.356426,58.791977,61.647223,18489.279452
728,226.820272,272.917231,6737.392484,8804.025177,555.386060,727.139487,922.080347,720.104307,2360.603693,2460.585679,2354.056829,2959.770366,2252.840617,2015.452870,176.813652,159.936270,112.611847,116.491923,15698.605800


In [66]:
# Conversions
plot_actual_vs_self_conversions(final_df, key="conversion", figsize=(900, 600))

In [67]:
# Activity (impressions + clicks + sessions)
plot_actual_vs_self_conversions(final_df, key="activity", sort_by="self")

# Save data

In [70]:
output_dir = "output"
output_file = os.path.join(output_dir, "mmm_data.csv")

# Create directory if not exists, then save
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

final_df.to_csv(output_file, index=False)
print(f"Saved file to: {output_file}")

Saved file to: output/mmm_data.csv
