In [1]:
from __future__ import annotations

import sys
home_dir = "/Users/mariappan.subramanian/Documents/"
sys.path.append(f'{home_dir}/repo/forked/meridian/meridian/')

from meridian.mpa.mini_meridian_utils import create_xarray_and_tf_tensor

from typing import Dict, List, Optional, Sequence, Tuple
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

1. Define Config

In [2]:
# 1. Define Config
imp_variables = ["TV_I", "Display_I", "Video_I"]
spend_variables = ["TV_AC", "Display_AC", "Video_AC"]
kpi = "conversions"
date_field = "WES"
geo_field = "Region"

In [3]:
# read raw model ready data
input_file_path = "/Users/mariappan.subramanian/OneDrive - The Trade Desk/MMM/Media Parameter Analysis/Dev/InteractionEffects/data/testdata_Mazda_national_False_mdf.csv"
sample_data = pd.read_csv(input_file_path)

In [4]:
sample_data.head()

Unnamed: 0,WES,conversions,TV_I,Display_I,Video_I,TV_AC,Display_AC,Video_AC,Region,Population,revenue_per_conversion
0,2024-05-04,752401,137061,4862581,257424,1950.982979,15374.634872,2051.044776,CALIFORNIA,39431263,1.0
1,2024-05-04,518382,165933,2730622,147589,2357.05483,9873.101013,1182.903972,FLORIDA,23372215,1.0
2,2024-05-04,406187,92170,1748128,83139,1312.75306,6091.144472,675.737354,GEORGIA,11180878,1.0
3,2024-05-04,623733,73928,2544351,100274,1052.1615,8597.751056,817.732719,ILLINOIS,12710158,1.0
4,2024-05-04,180935,34391,946181,63956,491.882279,3197.289078,518.980663,MASSACHUSETTS,7136171,1.0


2. Create Data Tensors

In [62]:
# create raw media tensors
media_tensors = create_xarray_and_tf_tensor(sample_data, date_field, geo_field, imp_variables)  # (G, T, M)
media_spend_tensors = create_xarray_and_tf_tensor(sample_data, date_field, geo_field, spend_variables)  # (G, T, M)

# create raw outcome tensor
kpi_tensor = create_xarray_and_tf_tensor(sample_data, date_field, geo_field, [kpi])  # (G, T, 1)
kpi_tensor = tf.squeeze(kpi_tensor, axis=-1)  # (G, T)

# create raw population tensor
population_tensor = create_xarray_and_tf_tensor(sample_data, date_field, geo_field, ["Population"])  # (G, T, 1)
population_tensor = tf.reduce_mean(population_tensor, axis=[1,2])  # (G,)

3. Scaling Logic

In [63]:
# scaling pipeline

# 1. media scaling (first, scale by population, and then scale by median of non-zero impressions)
from meridian.model.transformers import MediaTransformer, KpiTransformer
media_transformer = MediaTransformer(media_tensors, population_tensor)
media_scaled = media_transformer.forward(media_tensors)

# 2. scale the kpi
kpi_transformer = KpiTransformer(kpi_tensor, population_tensor)
kpi_scaled = kpi_transformer.forward(kpi_tensor)



In [65]:
kpi_scaled[0,]

<tf.Tensor: shape=(61,), dtype=float32, numpy=
array([-1.1902621 , -1.3088555 , -1.2479244 , -1.129949  , -1.1154652 ,
       -1.2549164 , -1.2497667 , -1.2391946 , -1.2588258 , -1.321858  ,
       -1.2404668 , -1.2479244 , -1.1282021 , -1.1824524 , -1.2593505 ,
       -1.2918752 , -1.2409393 , -1.1900169 , -1.3629876 , -1.4569635 ,
       -1.3706791 , -1.355012  , -1.3554708 , -1.3606251 , -1.3019952 ,
       -1.3095825 , -1.4585719 , -1.4524635 , -1.327939  , -1.3667948 ,
       -1.441912  , -1.4242618 , -1.2818166 , -1.2327796 , -1.1885039 ,
       -1.0923995 , -1.2363892 , -1.1717032 , -1.1393943 , -1.242091  ,
       -1.2219827 , -1.2561793 , -1.1490984 , -1.1755967 , -1.084342  ,
       -1.0201718 , -1.0882446 , -0.97313595, -1.0397507 , -1.1138614 ,
       -1.2476268 , -1.3607183 , -1.3302517 , -1.4721993 , -1.3992083 ,
       -1.2734845 , -1.2789112 , -1.257749  , -1.4100393 , -1.3158815 ,
       -1.4364988 ], dtype=float32)>

In [66]:
media_scaled[0,:5,]

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[0.13640535, 0.79866165, 0.11554014],
       [0.4176674 , 0.5373079 , 0.26535264],
       [0.482035  , 0.7153818 , 0.36691928],
       [0.4837627 , 0.87780654, 0.784325  ],
       [0.80171543, 0.94360584, 0.7013741 ]], dtype=float32)>