## Bayesian Model Setup


In [1]:
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')

file_path = '/content/drive/My Drive/marketing_data.csv'
df = pd.read_csv(file_path)
print(df.columns)


Mounted at /content/drive
Index(['start_of_week', 'revenue', 'spend_channel_1', 'spend_channel_2',
       'spend_channel_3', 'spend_channel_4', 'spend_channel_5',
       'spend_channel_6', 'spend_channel_7', 'spend_channel_1_adstocked',
       'spend_channel_2_adstocked', 'spend_channel_3_adstocked',
       'spend_channel_4_adstocked', 'spend_channel_5_adstocked',
       'spend_channel_6_adstocked', 'spend_channel_7_adstocked'],
      dtype='object')


In [2]:
df.head()

Unnamed: 0,start_of_week,revenue,spend_channel_1,spend_channel_2,spend_channel_3,spend_channel_4,spend_channel_5,spend_channel_6,spend_channel_7,spend_channel_1_adstocked,spend_channel_2_adstocked,spend_channel_3_adstocked,spend_channel_4_adstocked,spend_channel_5_adstocked,spend_channel_6_adstocked,spend_channel_7_adstocked
0,2020-08-30,157906.75,2625.48,262.71,12954.12,3609.63,12955.29,12659.12,19379.79,2625.48,262.71,12954.12,3609.63,12955.29,12659.12,19379.79
1,2020-09-06,186425.68,2634.01,108.66,8760.28,4560.6,12747.7,12338.18,22473.45,2896.558,345.099,10055.692,7809.267,14043.229,14870.004,24411.429
2,2020-09-13,161607.39,2087.08,110.32,7155.42,4362.96,15015.41,10811.15,22596.05,2376.7358,420.9091,8160.9892,11391.3003,16419.7329,13785.1508,25037.1929
3,2020-09-20,180089.13,1690.7,52.79,15185.22,3883.41,15521.41,12890.22,24728.73,1928.37358,431.60819,16001.31892,14135.58027,17163.38329,15647.25016,27232.44929
4,2020-09-27,217793.98,1547.3,80.56,18524.05,4043.09,15793.74,12642.55,26515.48,1740.137358,469.007371,20124.181892,16765.112243,17510.078329,15772.000032,29238.724929


### Define the Model


In [6]:
import pymc as pm
import numpy as np
import pandas as pd

# Number of channels and data points
num_channels = 7
n_weeks = df.shape[0]

# Extracting adstocked spends and revenue
adstocked_spends = df[[f'spend_channel_{i+1}_adstocked' for i in range(num_channels)]].values
revenue = df['revenue'].values

# Trend over time
time_index = np.arange(n_weeks)

# Seasonality using Fourier terms (yearly pattern)
n_harmonics = 5
fourier_features = np.column_stack(
    [func(2 * np.pi * (i + 1) * time_index / 52) for i in range(n_harmonics) for func in (np.sin, np.cos)]
)

# Holiday indicators
month = pd.to_datetime(df['start_of_week']).dt.month
january, november, december = (month == 1).astype(int), (month == 11).astype(int), (month == 12).astype(int)

# PyMC model
with pm.Model() as model:
    baseline = pm.Normal('baseline', mu=np.mean(revenue), sigma=np.std(revenue))
    trend = pm.Normal('trend', mu=0, sigma=1)
    seasonal_coef = pm.Normal('seasonal_coef', mu=0, sigma=5, shape=fourier_features.shape[1])
    betas = pm.Normal('betas', mu=0, sigma=10, shape=num_channels)
    january_effect = pm.Normal('january_effect', mu=0, sigma=50000)
    november_effect = pm.Normal('november_effect', mu=0, sigma=40000)
    december_effect = pm.Normal('december_effect', mu=0, sigma=30000)
    sigma = pm.HalfNormal('sigma', sigma=10)

    # all components
    mu = (baseline +
          trend * time_index +
          pm.math.dot(fourier_features, seasonal_coef) +
          pm.math.dot(adstocked_spends, betas) +
          january_effect * january +
          november_effect * november +
          december_effect * december)

    # Likelihood
    likelihood = pm.Normal('revenue', mu=mu, sigma=sigma, observed=revenue)

    # Sampling
    trace = pm.sample(1000, tune=1000, return_inferencedata=True)


Output()

## Model Parameter Estimates

In [7]:
# arviz for analyzing the results
import arviz as az

# summary of the posterior distributions
summary = az.summary(trace, round_to=3)
print(summary)

# channel coefficients
channel_summary = summary.filter(like='betas', axis=0)
print("\nMarketing Channel Coefficients:")
print(channel_summary)

                       mean        sd     hdi_3%    hdi_97%  mcse_mean  \
baseline          74772.774  1056.956  72721.971  76639.619     25.288   
betas[0]             -4.988     0.174     -5.313     -4.664      0.004   
betas[1]             -1.837     0.118     -2.065     -1.627      0.003   
betas[2]              0.866     0.027      0.813      0.914      0.001   
betas[3]             -0.275     0.015     -0.304     -0.249      0.000   
betas[4]              1.541     0.044      1.453      1.621      0.001   
betas[5]              2.049     0.036      1.985      2.119      0.001   
betas[6]              1.244     0.019      1.209      1.279      0.000   
december_effect  -18399.081   771.048 -19835.036 -16946.991     15.203   
january_effect    31338.172   656.660  30108.371  32577.507     11.547   
november_effect   61006.340   679.765  59716.501  62238.180     12.589   
seasonal_coef[0]     -0.051     4.900     -9.217      8.981      0.075   
seasonal_coef[1]     -2.037     5.184 

### Creating a more readable table for the channel coefficients

In [8]:
import pandas as pd

channel_names = ['spend_channel_1', 'spend_channel_2', 'spend_channel_3',
                'spend_channel_4', 'spend_channel_5', 'spend_channel_6',
                'spend_channel_7']

channel_df = pd.DataFrame({
    'Channel': channel_names,
    'Coefficient': summary.filter(like='betas', axis=0)['mean'].values,
    'Lower Bound (3%)': summary.filter(like='betas', axis=0)['hdi_3%'].values,
    'Upper Bound (97%)': summary.filter(like='betas', axis=0)['hdi_97%'].values
})

# rating
def get_effectiveness(coef):
    if coef > 1:
        return "Very Positive"
    elif coef > 0.5:
        return "Positive"
    elif coef > 0:
        return "Slightly Positive"
    elif coef > -0.5:
        return "Slightly Negative"
    elif coef > -1:
        return "Negative"
    else:
        return "Very Negative"

channel_df['Impact'] = channel_df['Coefficient'].apply(get_effectiveness)

# Sorting by coefficient value
sorted_channels = channel_df.sort_values('Coefficient', ascending=False)
print(sorted_channels)

           Channel  Coefficient  Lower Bound (3%)  Upper Bound (97%)  \
5  spend_channel_6        2.049             1.985              2.119   
4  spend_channel_5        1.541             1.453              1.621   
6  spend_channel_7        1.244             1.209              1.279   
2  spend_channel_3        0.866             0.813              0.914   
3  spend_channel_4       -0.275            -0.304             -0.249   
1  spend_channel_2       -1.837            -2.065             -1.627   
0  spend_channel_1       -4.988            -5.313             -4.664   

              Impact  
5      Very Positive  
4      Very Positive  
6      Very Positive  
2           Positive  
3  Slightly Negative  
1      Very Negative  
0      Very Negative  


In [9]:
import pandas as pd

channel_names = ['Channel 1', 'Channel 2', 'Channel 3', 'Channel 4',
                'Channel 5', 'Channel 6', 'Channel 7']

channel_df = pd.DataFrame({
    'Channel': channel_names,
    'Coefficient': summary.filter(like='betas', axis=0)['mean'].values,
    'Lower Bound (3%)': summary.filter(like='betas', axis=0)['hdi_3%'].values,
    'Upper Bound (97%)': summary.filter(like='betas', axis=0)['hdi_97%'].values
})


def get_effectiveness(coef):
    if coef > 1:
        return "Very Positive"
    elif coef > 0.5:
        return "Positive"
    elif coef > 0:
        return "Slightly Positive"
    elif coef > -0.5:
        return "Slightly Negative"
    elif coef > -1:
        return "Negative"
    else:
        return "Very Negative"

# effectiveness function to 'Coefficient'
channel_df['Impact'] = channel_df['Coefficient'].apply(get_effectiveness)

sorted_channels = channel_df.sort_values('Coefficient', ascending=False)

styled_table = sorted_channels.style.format({
    'Coefficient': '{:.3f}',
    'Lower Bound (3%)': '{:.3f}',
    'Upper Bound (97%)': '{:.3f}'
}).set_table_styles([
    {'selector': 'th', 'props': [('background-color', '#4CAF50'),
                                ('color', 'white'),
                                ('font-weight', 'bold'),
                                ('font-size', '14px')]},
    {'selector': 'td', 'props': [('border', '1px solid black'),
                                 ('padding', '8px'),
                                 ('font-size', '12px'),
                                 ('color', 'black')]},
    {'selector': 'tr:nth-child(odd)', 'props': [('background-color', '#f2f2f2')]},
    {'selector': 'tr:nth-child(even)', 'props': [('background-color', 'white')]},
    {'selector': 'td:hover', 'props': [('background-color', '#e2e2e2')]},
])

styled_table.hide(axis="index")
styled_table


Channel,Coefficient,Lower Bound (3%),Upper Bound (97%),Impact
Channel 6,2.049,1.985,2.119,Very Positive
Channel 5,1.541,1.453,1.621,Very Positive
Channel 7,1.244,1.209,1.279,Very Positive
Channel 3,0.866,0.813,0.914,Positive
Channel 4,-0.275,-0.304,-0.249,Slightly Negative
Channel 2,-1.837,-2.065,-1.627,Very Negative
Channel 1,-4.988,-5.313,-4.664,Very Negative


### Checking model predictions against actual data

In [107]:
with model:
    posterior_pred = pm.sample_posterior_predictive(trace)

# Get predicted values
pred_mean = posterior_pred.posterior_predictive['revenue'].mean(dim=["chain", "draw"]).values

print(pred_mean[:10])


Output()

[141321.36758471 148646.11527389 150830.24374288 166737.68233409
 173692.09193037 199279.91050556 238391.62188311 203593.68023045
 157515.46363978 207410.33812473]


In [10]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df['start_of_week'],
    y=df['revenue'],
    mode='markers',
    name='Actual Revenue',
    marker=dict(size=8, opacity=0.6, color='blue')
))

fig.update_layout(
    title='Actual Revenue Over Time',
    xaxis_title='Date',
    yaxis_title='Revenue',
    template='plotly_white',
    width=900,
    height=500
)

fig.show()


### Calculating R-squared to Evaluate Model Fit

In [19]:
ss_total = np.sum((df['revenue'] - np.mean(df['revenue']))**2)
ss_residual = np.sum((df['revenue'] - pred_mean)**2)
r_squared = 1 - (ss_residual / ss_total)

fig.update_layout(
    title=f'Model Fit: Actual vs Predicted Revenue (R² = {r_squared:.3f})',
    xaxis_title='Date',
    yaxis_title='Revenue',
    hovermode='x unified',
    template='plotly_white',
    width=900,
    height=500
)

fig.show()


### Calculating ROI for Each Channel

In [20]:
channel_names = ['Channel 1', 'Channel 2', 'Channel 3',
                'Channel 4', 'Channel 5', 'Channel 6', 'Channel 7']
original_channels = ['spend_channel_1', 'spend_channel_2', 'spend_channel_3',
                    'spend_channel_4', 'spend_channel_5', 'spend_channel_6', 'spend_channel_7']
adstocked_channels = [ch + '_adstocked' for ch in original_channels]

# Get coefficients
coefficients = trace.posterior['betas'].mean(dim=('chain', 'draw')).values

# Calculating total spend and contribution for each channel
total_spend = []
total_contribution = []

for i, channel in enumerate(original_channels):
    # Total spend
    spend = df[channel].sum()
    total_spend.append(spend)

    # Total contribution to revenue (coefficient * adstocked_spend)
    contribution = coefficients[i] * df[adstocked_channels[i]].sum()
    total_contribution.append(contribution)

# Calculate ROI (Return on Investment)
roi = [(contribution / spend) for contribution, spend in zip(total_contribution, total_spend)]

roi_df = pd.DataFrame({
    'Channel': channel_names,
    'Coefficient': coefficients,
    'Total Spend': total_spend,
    'Total Contribution': total_contribution,
    'ROI': roi
})

# Sort by ROI
roi_df = roi_df.sort_values('ROI', ascending=False)

print(roi_df)

     Channel  Coefficient  Total Spend  Total Contribution        ROI
5  Channel 6     2.048656    526624.70        1.348591e+06   2.560820
4  Channel 5     1.540841    891863.59        1.526341e+06   1.711407
6  Channel 7     1.243823   2880942.21        3.976875e+06   1.380408
2  Channel 3     0.866258   2028746.51        1.951532e+06   0.961940
3  Channel 4    -0.274560    719174.22       -1.821948e+06  -2.533388
0  Channel 1    -4.988227    129542.90       -7.179882e+05  -5.542474
1  Channel 2    -1.837124     35738.66       -5.520603e+05 -15.447147


In [21]:
import plotly.express as px
import pandas as pd

fig = px.bar(
    roi_df,
    x='Channel',
    y='ROI',
    text='ROI',
    color='ROI',
    color_continuous_scale=['grey', 'lightgreen', 'darkgreen'],
    title="ROI by Channel"
)

fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(
    yaxis_title="ROI",
    xaxis_title="Channel",
    coloraxis_showscale=False,
    template="plotly_white",
    height=600,
    width=900
)

fig.show()


#### Channel 6's spend is relatively low, but the revenue generated from that spend is high — leading to a high ROI. each euro spent is associated with a €2.56 profit

#### Channel 2 has the lowest ROI (-15.45) - each euro spent is associated with a €-15.45 loss

#### Total Contribution by Channel

In [22]:
fig = px.bar(
    roi_df,
    x='Channel',
    y='Total Contribution',
    text='Total Contribution',
    color='Total Contribution',
    color_continuous_scale=['grey', 'lightgreen', 'darkgreen'],
    title="Total Contribution by Channel"
)

fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(
    yaxis_title="Contribution (€)",
    xaxis_title="Channel",
    coloraxis_showscale=False,
    template="plotly_white",
    height=600,
    width=900
)

fig.show()


#### Channel 7 has the highest total revenue contribution €3.98M, indicating that it’s driving significant absolute revenue.

However, its ROI is 1.38, which suggests that while it's effective in generating revenue, the efficiency (revenue per € spent) is moderate compared to Channel 6

In [23]:
def calculate_mape(actual, predicted):
    mape = (abs(actual - predicted) / (actual + 1e-10)).mean() * 100
    return mape


In [24]:
actual = df['revenue'].values
print(f"Actual values: {actual[:5]}")


Actual values: [157906.75 186425.68 161607.39 180089.13 217793.98]


In [25]:
with model:
    posterior_pred = pm.sample_posterior_predictive(trace, var_names=['revenue'], random_seed=42)

print(posterior_pred.posterior_predictive['revenue'].shape)


Output()

(2, 1000, 104)


In [26]:
predicted = posterior_pred.posterior_predictive['revenue'].mean(dim=["chain", "draw"]).values
print(f"Predicted values: {predicted[:5]}")

Predicted values: [141434.02893623 148725.5620874  150740.06368335 166755.57977145
 173763.61844921]


In [27]:
mape = calculate_mape(actual, predicted)
print(f"MAPE: {mape:.2f}%")


MAPE: 17.35%


In [28]:
# Mean Absolute Error (MAE)
def calculate_mae(actual, predicted):
    mae = abs(actual - predicted).mean()
    return mae

actual = df['revenue'].values
predicted = posterior_pred.posterior_predictive['revenue'].mean(dim=["chain", "draw"]).values

mae = calculate_mae(actual, predicted)
print(f"MAE: €{mae:.2f}")

MAE: €23079.78


In [31]:
from plotly.subplots import make_subplots
# Extracting components from the model
baseline_mean = trace.posterior['baseline'].mean().values
trend_mean = trace.posterior['trend'].mean().values
seasonal_coef_mean = trace.posterior['seasonal_coef'].mean(dim=["chain", "draw"]).values

# Calculate components
trend_component = trend_mean * time_index
seasonal_component = np.dot(fourier_features, seasonal_coef_mean)
january_effect = trace.posterior['january_effect'].mean().values * january
november_effect = trace.posterior['november_effect'].mean().values * november
december_effect = trace.posterior['december_effect'].mean().values * december
event_component = january_effect + november_effect + december_effect

fig = make_subplots(rows=4, cols=1,
                    subplot_titles=("Total Revenue", "Trend Component",
                                   "Seasonal Component", "Special Event Effects"))

# total revenue
fig.add_trace(
    go.Scatter(x=df['start_of_week'], y=revenue, mode='lines+markers',
              name='Revenue', line=dict(color='blue')),
    row=1, col=1
)

# trend component
fig.add_trace(
    go.Scatter(x=df['start_of_week'], y=trend_component, mode='lines',
              name='Trend', line=dict(color='red')),
    row=2, col=1
)

# seasonal component
fig.add_trace(
    go.Scatter(x=df['start_of_week'], y=seasonal_component, mode='lines',
              name='Seasonality', line=dict(color='green')),
    row=3, col=1
)

# special event component
fig.add_trace(
    go.Scatter(x=df['start_of_week'], y=event_component, mode='lines',
              name='Special Events', line=dict(color='purple')),
    row=4, col=1
)

fig.update_layout(height=800, width=1000,
                 title_text="Revenue Decomposition: Trend, Seasonality, and Special Events",
                 showlegend=False)

fig.update_yaxes(title_text="Revenue (€)", row=1, col=1)
fig.update_yaxes(title_text="Trend (€)", row=2, col=1)
fig.update_yaxes(title_text="Seasonal Effect (€)", row=3, col=1)
fig.update_yaxes(title_text="Special Event Effect (€)", row=4, col=1)

fig.show()