# Google LightweightMMM
https://github.com/google/lightweight_mmm

In [7]:
# Install LightweightMMM
%pip install lightweight_mmm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Note: make sure to restart runtime to use new versions of libraries

In [8]:
# Import jax.numpy and numpyro
import jax.numpy as jnp
import numpyro
import pandas as pd
import numpy as np
numpyro.set_host_device_count(2)

In [9]:
# Import the relevant modules of the library
from lightweight_mmm import lightweight_mmm
from lightweight_mmm import optimize_media
from lightweight_mmm import plot
from lightweight_mmm import preprocessing
from lightweight_mmm import utils

## Simulating the data for modeling

In [10]:
# 104 weeks of training data + 13 weeks of test data
data_size = 1100 + 3
n_media_channels = 3
n_extra_features = 1
n_geos = 2

In [11]:
# simulate the data
# media_data, extra_features, target, costs = utils.simulate_dummy_data(
#     data_size=data_size,
#     n_media_channels=n_media_channels,
#     n_extra_features=n_extra_features,
#     geos=n_geos)
df = pd.read_csv('/content/kind_costs.csv')

In [12]:
target = jnp.asarray(df[['clicks']])
media_data = jnp.asarray(df[['spend','impressions', 'sales']])
costs = jnp.array([176.36,.6005,3.48])
extra_features = jnp.asarray(df[['imp_spend','click_spend',]])

In [13]:
# 117 rows, 3 media channels, 2 geos
media_data.shape

(1473, 3)

In [14]:
# split train and test data
split_point = data_size - 373

# media data
media_data_train = media_data[:split_point, ...]
media_data_test = media_data[split_point:, ...]

# extra features
extra_features_train = extra_features[:split_point, ...]
extra_features_test = extra_features[split_point:, ...]

# target
target_train = target[:split_point]

In [15]:
# create the scalers
media_scaler = preprocessing.CustomScaler(divide_operation=jnp.mean)
extra_features_scaler = preprocessing.CustomScaler(divide_operation=jnp.mean)
target_scaler = preprocessing.CustomScaler(divide_operation=jnp.mean)
cost_scaler = preprocessing.CustomScaler(divide_operation=jnp.mean)

# scale the data
media_data_train = media_scaler.fit_transform(media_data_train)
extra_features_train = extra_features_scaler.fit_transform(extra_features_train)
target_train = target_scaler.fit_transform(target_train)
costs = cost_scaler.fit_transform(costs)

In [None]:
# training the model
mmm = lightweight_mmm.LightweightMMM(model_name="carryover")

mmm.fit(
    media=media_data_train,
    total_costs=costs,
    target=target_train,
    extra_features=extra_features_train,
    number_warmup=2000,
    number_samples=2000,
    number_chains=2)

  0%|          | 0/4000 [00:00<?, ?it/s]

  0%|          | 0/4000 [00:00<?, ?it/s]

In [None]:
# check the summary
# rhats should be < 1.1
mmm.print_summary()

In [None]:
# plot the posterior distributions of the media effects
channel_names = ['facebook', 'tiktok', 'google']
plot.plot_media_channel_posteriors(media_mix_model=mmm, channel_names=channel_names)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# check the model predicts training data
# passing the scaler gives unscaled results
plot.plot_model_fit(mmm, target_scaler=target_scaler)

In [None]:
# scale the test media data to make predictions on unseen data
media_data_test = media_scaler.transform(media_data_test)
extra_features_test = extra_features_scaler.transform(extra_features_test)
new_predictions = mmm.predict(media=media_data_test,
                              extra_features=extra_features_test)
new_predictions.shape

In [None]:
# plot the prediction vs actual charts
target_test = target_scaler.transform(target[split_point:])
plot.plot_out_of_sample_model_fit(out_of_sample_predictions=new_predictions,
                                 out_of_sample_target=target_test)

In [None]:
# estimate media effects with their respective credibility intervals
media_effect, roi_hat = mmm.get_posterior_metrics(target_scaler=target_scaler, cost_scaler=cost_scaler)

In [None]:
# plot media effects
plot.plot_bars_media_metrics(metric=media_effect, metric_name="Media Effect")

In [None]:
# plot media roi
plot.plot_bars_media_metrics(metric=roi_hat, metric_name="ROI hat")

In [None]:
# plot response curves for channels
plot.plot_response_curves(
    media_mix_model=mmm, target_scaler=target_scaler, media_scaler=media_scaler)

## Budget Optimization

In [None]:
# if you used impressions for media variables, this should be an array of average CPMs
# if you used spend then just put an array of 1s like we did here
prices = jnp.ones(mmm.n_media_channels)

In [None]:
# starting with the same average weekly budget and average values for extra features
n_time_periods = 10
budget = jnp.sum(media_data.mean(axis=0)) * n_time_periods
extra_features_forecast = extra_features_scaler.transform(extra_features_test)[:n_time_periods]

In [None]:
# run budget optimization
solution = optimize_media.find_optimal_budgets(
    n_time_periods=n_time_periods,
    media_mix_model=mmm,
    extra_features=extra_features_forecast,
    budget=budget,
    prices=prices,
    media_scaler=media_scaler,
    target_scaler=target_scaler,)

In [None]:
# both values should be almost equal
budget, jnp.sum(solution[2] * prices)

In [None]:
for x in range(len(solution[2])):
    share = round(solution[2][x] / jnp.sum(solution[2] * prices)*100, 2)
    print(channel_names[x], ": ", share, "%")

In [None]:
%pip install matplotlib==3.1.3