In [None]:
# Install meridian: from PyPI @ latest release
!pip install --upgrade google-meridian[colab]



Collecting google-meridian[colab]
  Downloading google_meridian-1.0.0-py3-none-any.whl.metadata (21 kB)
Collecting arviz (from google-meridian[colab])
  Downloading arviz-0.20.0-py3-none-any.whl.metadata (8.8 kB)
Collecting altair<5,>=4.2.0 (from google-meridian[colab])
  Downloading altair-4.2.2-py3-none-any.whl.metadata (13 kB)
Collecting pandas<2,>=1.5.3 (from google-meridian[colab])
  Downloading pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy<1.13,>=1.12.0 (from google-meridian[colab])
  Downloading scipy-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow<2.17,>=2.16 (from google-meridian[colab])
  Downloading tensorflow-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting tf-keras<2.17,>=2.16 (from google

In [None]:
# prompt: git clone https://github.com/google/meridian.git

!git clone https://github.com/google/meridian.git


Cloning into 'meridian'...
remote: Enumerating objects: 2225, done.[K
remote: Counting objects: 100% (580/580), done.[K
remote: Compressing objects: 100% (232/232), done.[K
remote: Total 2225 (delta 411), reused 360 (delta 348), pack-reused 1645 (from 3)[K
Receiving objects: 100% (2225/2225), 7.38 MiB | 25.96 MiB/s, done.
Resolving deltas: 100% (1629/1629), done.


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
import arviz as az

import IPython

from meridian import constants
from meridian.data import load
from meridian.data import test_utils
from meridian.model import model
from meridian.model import spec
from meridian.model import prior_distribution
from meridian.analysis import optimizer
from meridian.analysis import analyzer
from meridian.analysis import visualizer
from meridian.analysis import summarizer
from meridian.analysis import formatter

# check if GPU is available
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))

Your runtime has 359.2 gigabytes of available RAM

Num GPUs Available:  0
Num CPUs Available:  1


In [None]:
coord_to_columns = load.CoordToColumns(
    time='time',
    geo='geo',
    controls=['GQV', 'Competitor_Sales'],
    population='population',
    kpi='conversions',
    revenue_per_kpi='revenue_per_conversion',
    media=[
        'Channel0_impression',
        'Channel1_impression',
        'Channel2_impression',
        'Channel3_impression',
        'Channel4_impression',
    ],
    media_spend=[
        'Channel0_spend',
        'Channel1_spend',
        'Channel2_spend',
        'Channel3_spend',
        'Channel4_spend',
    ],
    organic_media=['Organic_channel0_impression'],
    non_media_treatments=['Promo'],
)

In [None]:
correct_media_to_channel = {
    'Channel0_impression': 'Channel_0',
    'Channel1_impression': 'Channel_1',
    'Channel2_impression': 'Channel_2',
    'Channel3_impression': 'Channel_3',
    'Channel4_impression': 'Channel_4',
}
correct_media_spend_to_channel = {
    'Channel0_spend': 'Channel_0',
    'Channel1_spend': 'Channel_1',
    'Channel2_spend': 'Channel_2',
    'Channel3_spend': 'Channel_3',
    'Channel4_spend': 'Channel_4',
}

In [None]:
loader = load.CsvDataLoader(
    csv_path="/content/meridian/meridian/data/simulated_data/csv/geo_all_channels.csv",
    kpi_type='non_revenue',
    coord_to_columns=coord_to_columns,
    media_to_channel=correct_media_to_channel,
    media_spend_to_channel=correct_media_spend_to_channel,
)
data = loader.load()

  if (constants.GEO) not in self.dataset.dims.keys():
  if constants.MEDIA_TIME not in self.dataset.dims.keys():


In [None]:
roi_mu = 0.2     # Mu for ROI prior for each media channel. (Mean)
roi_sigma = 0.9  # Sigma for ROI prior for each media channel. (Standard Deviation)
prior = prior_distribution.PriorDistribution(
    roi_m=tfp.distributions.LogNormal(roi_mu, roi_sigma, name=constants.ROI_M)
)
model_spec = spec.ModelSpec(prior=prior)

mmm = model.Meridian(input_data=data, model_spec=model_spec)

In [None]:
#%%time

mmm.sample_prior(500)
#It tells the function to generate 500 samples from the prior distribution.x



from tqdm.notebook import tqdm  # Import tqdm for Jupyter Notebook
import time

# ... (rest of your imports and code) ...

with tqdm(total=5*1000, desc="Training Progress") as pbar:  # Total iterations
    def update_progress(current_iteration, total_iterations):
        pbar.update(current_iteration)  # Update the progress bar

    # Remove progress_callback from the sample_posterior call
    mmm.sample_posterior(
        n_chains=5,
        n_adapt=500,
        n_burnin=500,
        n_keep=1000,
        parallel_iterations=100,
        # progress_callback=update_progress  # Remove this line
    )
    # Manually update progress bar after sampling
    pbar.update(5 * 1000)

#n_chains: Markov Chain Monte Carlo (MCMC) indipendent chains
#n_adapt: number of initial samples used to tune the sampling algorithm for better performance
#n_burnin: number of initial samples from each chain that are discarded
#n_keep: number of samples to keep from each chain after the burn-in phase. These samples represent the posterior distribution of the model parameters and are used for inference.

Training Progress:   0%|          | 0/5000 [00:00<?, ?it/s]



R-hat close to 1.0 indicate convergence. R-hat < 1.2 indicates approximate convergence and is a reasonable threshold for many problems.

In [None]:
model_diagnostics = visualizer.ModelDiagnostics(mmm)
model_diagnostics.plot_rhat_boxplot()

Each parameter in the plot corresponds to a specific aspect of your MMM model. Here's a general breakdown of the parameters commonly encountered in such models:

Alpha (
𝛼
α):

Represents the intercept of the model or the baseline contribution of the media channel (without interactions).
𝛼
𝑚
α
m
​
  and
𝛼
𝑜
𝑚
α
om
​
  might represent specific intercepts for different variables.

Beta (
𝛽
β):

Represents media elasticity. This measures the effect of media spend (e.g., TV, digital ads) on the outcome (e.g., sales).
𝛽
𝑔
𝑚
β
gm
​
 ,
𝛽
𝑔
𝑜
𝑚
β
gom
​
 ,
𝛽
𝑚
β
m
​
 , etc., might represent elasticities for different media channels or groups.

Gamma (
𝛾
γ):

Represents carryover effects or saturation parameters.
These parameters are often used in adstock transformations, capturing how the impact of media decays over time (e.g., long-term brand-building effects).

Eta (
𝜂
η):

Often represents the Hill transformation parameters for diminishing returns in MMM.
It controls the saturation level and steepness of the diminishing returns curve.

Sigma (
𝜎
σ):

Represents the standard deviation of the model residuals, indicating the uncertainty in your predictions.

Mu (
𝜇
μ):

Represents the mean of the response variable along with other contextual transformations.

Knot values:

Typically tied to spline-based models, representing flexible curves to capture nonlinear relationships (e.g., in spending vs. ROI).

Roi_m, Tau_g, Xi_c, Xi_n:

Likely represent channel-level ROI estimates (e.g., Return on Investment for media), time-dependent variables, or noise parameters depending on your specific model.


2. Assess the model's fit by comparing the expected revenue against the actual revenue.



In [None]:
mmm_summarizer = summarizer.Summarizer(mmm)
#save output
from google.colab import drive
drive.mount('/content/drive')


filepath = '/content/drive/MyDrive'
start_date = '2021-01-25'
end_date = '2024-01-15'
mmm_summarizer.output_model_results_summary('summary_output.html', filepath, start_date, end_date)
#preview 2 pager
IPython.display.HTML(filename='/content/drive/MyDrive/summary_output.html')


Mounted at /content/drive


Dataset,R-squared,MAPE,wMAPE
All Data,0.93,1%,1%


# Optimizing the budget

In [None]:
%%time
budget_optimizer = optimizer.BudgetOptimizer(mmm)
optimization_results = budget_optimizer.optimize()

CPU times: user 38min 3s, sys: 13min 2s, total: 51min 5s
Wall time: 12min 48s


In [None]:
filepath = '/content/drive/MyDrive'
optimization_results.output_optimization_summary('optimization_output.html', filepath)
IPython.display.HTML(filename='/content/drive/MyDrive/optimization_output.html')

Channel,Non-optimized spend,Optimized spend
Channel_3,40%,29%
Channel_4,22%,28%
Channel_0,18%,22%
Channel_1,14%,13%
Channel_2,6%,7%


In [None]:
file_path='/content/drive/MyDrive/saved_mmm.pkl'
model.save_mmm(mmm, file_path)