In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.stattools import durbin_watson
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant
from scipy.optimize import minimize
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import holidays
import pandas as pd
import numpy as np
import holidays

In [2]:
# Replace 'merged_data.csv' with the exact path to your file if it's not in the same directory
df = pd.read_csv("data/merged_data.csv")

In [3]:
def transform_and_split_data(df):
    """
    Transform data by pivoting on growth_driver_l5 and splitting periods

    Parameters:
    -----------
    df : pandas.DataFrame
        Input dataframe containing:
        - growth_driver_l5 (categories)
        - execution (values)
        - Starting Week
        - Sales Units
        - Average Price
    """
    # Create a copy
    df = df.copy()

    # Convert date
    df["Starting Week"] = pd.to_datetime(df["Starting Week"])

    # First create the basic pivot
    pivot_exec = df.pivot_table(
        index="Starting Week",
        columns="growth_driver_l5",
        values="execution",
        aggfunc="sum",
        fill_value=0,
    ).add_suffix("_execution")

    # Now create pre-2023 and post-2022 versions
    pivot_pre2023 = pivot_exec.copy()
    pivot_post2022 = pivot_exec.copy()

    # Set values to 0 for respective periods
    pivot_pre2023[pivot_pre2023.index.year >= 2023] = 0
    pivot_post2022[pivot_post2022.index.year < 2023] = 0

    # Rename columns to indicate period
    pivot_pre2023.columns = [col + "_pre2023" for col in pivot_pre2023.columns]
    pivot_post2022.columns = [col + "_post2022" for col in pivot_post2022.columns]

    # Combine the pivoted data
    result = pd.concat([pivot_pre2023, pivot_post2022], axis=1)

    # Add target variable (sum by week)
    weekly_units = df.groupby("Starting Week")[
        "UK L'Oreal Paris Haircare Total Online Sellout Units"
    ].sum()

    # Calculate and add average price
    weekly_value = df.groupby("Starting Week")[
        "UK L'Oreal Paris Haircare Total Online Sellout Value (in pound)"
    ].sum()
    weekly_units_for_price = df.groupby("Starting Week")[
        "UK L'Oreal Paris Haircare Total Online Sellout Units"
    ].sum()
    avg_price = (weekly_value / weekly_units_for_price).fillna(0)

    # Add to result
    result = result.reset_index()
    result = result.merge(
        pd.DataFrame(
            {
                "Starting Week": weekly_units.index,
                "UK L'Oreal Paris Haircare Total Online Sellout Units": weekly_units.values,
                "UK L'Oreal Paris Haircare Online Average Price (in pound)": avg_price.values,
            }
        ),
        on="Starting Week",
        how="left",
    )

    return result

In [4]:
result_df = transform_and_split_data(df)
print("Columns in transformed data:")
for col in result_df.columns:
    print(f"- {col}")

Columns in transformed data:
- Starting Week
- amazon_execution_pre2023
- amazon_retail_execution_pre2023
- bvod_execution_pre2023
- citrus_execution_pre2023
- criteo_execution_pre2023
- google_execution_pre2023
- google_video_execution_pre2023
- influencer_management_execution_pre2023
- linear_execution_pre2023
- meta_execution_pre2023
- meta_collab_ads_execution_pre2023
- pinterest_execution_pre2023
- tesco_execution_pre2023
- testers_and_merchandising_execution_pre2023
- the_hut_group_execution_pre2023
- tik_tok_execution_pre2023
- youtube_execution_pre2023
- amazon_execution_post2022
- amazon_retail_execution_post2022
- bvod_execution_post2022
- citrus_execution_post2022
- criteo_execution_post2022
- google_execution_post2022
- google_video_execution_post2022
- influencer_management_execution_post2022
- linear_execution_post2022
- meta_execution_post2022
- meta_collab_ads_execution_post2022
- pinterest_execution_post2022
- tesco_execution_post2022
- testers_and_merchandising_execut

In [5]:
result_df

Unnamed: 0,Starting Week,amazon_execution_pre2023,amazon_retail_execution_pre2023,bvod_execution_pre2023,citrus_execution_pre2023,criteo_execution_pre2023,google_execution_pre2023,google_video_execution_pre2023,influencer_management_execution_pre2023,linear_execution_pre2023,...,meta_execution_post2022,meta_collab_ads_execution_post2022,pinterest_execution_post2022,tesco_execution_post2022,testers_and_merchandising_execution_post2022,the_hut_group_execution_post2022,tik_tok_execution_post2022,youtube_execution_post2022,UK L'Oreal Paris Haircare Total Online Sellout Units,UK L'Oreal Paris Haircare Online Average Price (in pound)
0,2022-01-03,1468912.9,193246.3,0.00,0.0,156076.7,4067.7,0.0,537.216943,0.00,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,2125879.6,3.242071
1,2022-01-10,2474498.0,343567.9,0.00,0.0,161675.8,3716.7,0.0,0.000000,0.00,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,2457353.6,2.978819
2,2022-01-17,2691219.7,448357.0,0.00,0.0,408499.0,4264.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,2266373.2,3.014039
3,2022-01-24,1940061.5,626307.5,815102.08,0.0,684980.4,4633.2,0.0,142432.121596,58.63,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,2209340.9,3.077403
4,2022-01-31,2349322.3,619993.4,679251.82,0.0,1074118.5,2134.6,0.0,95869.369418,100.49,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,2220604.1,3.306272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,2023-11-27,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,3038705.800,0.0,0.0,0.0,0.0,2710942.0,3.771592
100,2023-12-04,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,5438082.650,0.0,0.0,0.0,0.0,2556918.0,3.784533
101,2023-12-11,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,5438082.650,193.7,0.0,0.0,0.0,2620150.0,3.836532
102,2023-12-18,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,5438082.650,0.0,0.0,0.0,0.0,2353416.0,3.939211


In [6]:
import pandas as pd
import numpy as np
import holidays


def add_seasonality_features(df, date_col="Starting Week"):
    """Add seasonality features (month/week cyclical, any-day weekend indicator, any-day holiday indicator)."""

    df_new = df.copy()
    df_new[date_col] = pd.to_datetime(df_new[date_col])

    # Cyclical month
    df_new["month_sin"] = np.sin(2 * np.pi * df_new[date_col].dt.month / 12)
    df_new["month_cos"] = np.cos(2 * np.pi * df_new[date_col].dt.month / 12)

    # Cyclical ISO week number
    df_new["week_sin"] = np.sin(2 * np.pi * df_new[date_col].dt.isocalendar().week / 52)
    df_new["week_cos"] = np.cos(2 * np.pi * df_new[date_col].dt.isocalendar().week / 52)

    def any_day_in_week_is_holiday(start_date, holiday_calendar):
        for offset in range(7):
            test_date = start_date + pd.Timedelta(days=offset)
            if test_date in holiday_calendar:
                return 1
        return 0

    uk_holidays = holidays.country_holidays("GB")
    df_new["is_holiday"] = df_new[date_col].apply(
        lambda d: any_day_in_week_is_holiday(d, uk_holidays)
    )

    return df_new

In [7]:
result_df_transformed = add_seasonality_features(result_df, date_col="Starting Week")

In [8]:
result_df_transformed

Unnamed: 0,Starting Week,amazon_execution_pre2023,amazon_retail_execution_pre2023,bvod_execution_pre2023,citrus_execution_pre2023,criteo_execution_pre2023,google_execution_pre2023,google_video_execution_pre2023,influencer_management_execution_pre2023,linear_execution_pre2023,...,the_hut_group_execution_post2022,tik_tok_execution_post2022,youtube_execution_post2022,UK L'Oreal Paris Haircare Total Online Sellout Units,UK L'Oreal Paris Haircare Online Average Price (in pound),month_sin,month_cos,week_sin,week_cos,is_holiday
0,2022-01-03,1468912.9,193246.3,0.00,0.0,156076.7,4067.7,0.0,537.216943,0.00,...,0.0,0.0,0.0,2125879.6,3.242071,5.000000e-01,0.866025,0.120537,0.992709,1
1,2022-01-10,2474498.0,343567.9,0.00,0.0,161675.8,3716.7,0.0,0.000000,0.00,...,0.0,0.0,0.0,2457353.6,2.978819,5.000000e-01,0.866025,0.239316,0.970942,0
2,2022-01-17,2691219.7,448357.0,0.00,0.0,408499.0,4264.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,2266373.2,3.014039,5.000000e-01,0.866025,0.354605,0.935016,0
3,2022-01-24,1940061.5,626307.5,815102.08,0.0,684980.4,4633.2,0.0,142432.121596,58.63,...,0.0,0.0,0.0,2209340.9,3.077403,5.000000e-01,0.866025,0.464723,0.885456,0
4,2022-01-31,2349322.3,619993.4,679251.82,0.0,1074118.5,2134.6,0.0,95869.369418,100.49,...,0.0,0.0,0.0,2220604.1,3.306272,5.000000e-01,0.866025,0.568065,0.822984,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,2023-11-27,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,2710942.0,3.771592,-5.000000e-01,0.866025,-0.464723,0.885456,0
100,2023-12-04,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,2556918.0,3.784533,-2.449294e-16,1.000000,-0.354605,0.935016,0
101,2023-12-11,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,2620150.0,3.836532,-2.449294e-16,1.000000,-0.239316,0.970942,0
102,2023-12-18,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.000000,0.00,...,0.0,0.0,0.0,2353416.0,3.939211,-2.449294e-16,1.000000,-0.120537,0.992709,0


In [9]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.optimize import minimize
from statsmodels.stats.stattools import durbin_watson

# ---------------------------------------------------
# 1) Define your data and variable lists
# ---------------------------------------------------

# Name of your DataFrame:
df = result_df_transformed

# Target variable
target_col = "UK L'Oreal Paris Haircare Total Online Sellout Units"

# Marketing variables (to be forced ≥ 0)
marketing_vars = [
    "amazon_execution_pre2023",
    "amazon_retail_execution_pre2023",
    "bvod_execution_pre2023",
    "citrus_execution_pre2023",
    "criteo_execution_pre2023",
    "google_execution_pre2023",
    "google_video_execution_pre2023",
    "influencer_management_execution_pre2023",
    "linear_execution_pre2023",
    "meta_execution_pre2023",
    "meta_collab_ads_execution_pre2023",
    "pinterest_execution_pre2023",
    "tesco_execution_pre2023",
    "testers_and_merchandising_execution_pre2023",
    "the_hut_group_execution_pre2023",
    "tik_tok_execution_pre2023",
    "youtube_execution_pre2023",
    "amazon_execution_post2022",
    "amazon_retail_execution_post2022",
    "bvod_execution_post2022",
    "citrus_execution_post2022",
    "criteo_execution_post2022",
    "google_execution_post2022",
    "google_video_execution_post2022",
    "influencer_management_execution_post2022",
    "linear_execution_post2022",
    "meta_execution_post2022",
    "meta_collab_ads_execution_post2022",
    "pinterest_execution_post2022",
    "tesco_execution_post2022",
    "testers_and_merchandising_execution_post2022",
    "the_hut_group_execution_post2022",
    "tik_tok_execution_post2022",
    "youtube_execution_post2022",
]

# Seasonality (and other) variables (unconstrained)
seasonality_vars = [
    "UK L'Oreal Paris Haircare Online Average Price (in pound)",
    "month_sin",
    "month_cos",
    "is_holiday",
]

# Create the design matrix X and target y
X = df[marketing_vars + seasonality_vars].copy()
y = df[target_col].values

# Add a constant for the intercept
X = sm.add_constant(X)
# Now X columns will be: ['const'] + marketing_vars + seasonality_vars

# ---------------------------------------------------
# 2) Define the objective function
# ---------------------------------------------------


def objective(params, X, y):
    """Sum of squared residuals (SSE)."""
    residuals = y - X.dot(params)
    return np.sum(residuals**2)


# ---------------------------------------------------
# 3) Set up bounds for each parameter
# ---------------------------------------------------
# - Intercept: no constraint => (None, None)
# - Marketing coefficients: nonnegative => (0.0, None)
# - Seasonality variables: no constraint => (None, None)

n_marketing = len(marketing_vars)
n_seasonality = len(seasonality_vars)

bounds = []
# Intercept (1 parameter)
bounds.append((None, None))
# Marketing variables (≥ 0)
bounds.extend([(0.0, None)] * n_marketing)
# Seasonality variables (unconstrained)
bounds.extend([(None, None)] * n_seasonality)

# ---------------------------------------------------
# 4) Provide an initial guess (from unconstrained OLS)
# ---------------------------------------------------
ols_model = sm.OLS(y, X).fit()
init_params = ols_model.params.values

# ---------------------------------------------------
# 5) Run the constrained optimization
# ---------------------------------------------------
result = minimize(objective, init_params, args=(X, y), method="L-BFGS-B", bounds=bounds)

final_params = result.x  # The optimized coefficients

# ---------------------------------------------------
# 6) Evaluate fit: predictions, residuals, R^2, etc.
# ---------------------------------------------------
y_pred = X.dot(final_params)
residuals = y - y_pred

# R^2
ss_res = np.sum(residuals**2)
ss_tot = np.sum((y - np.mean(y)) ** 2)
r2 = 1 - ss_res / ss_tot

# Adjusted R^2
n = len(y)
p = X.shape[1] - 1  # subtract 1 for the intercept
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

# Durbin-Watson
dw_stat = durbin_watson(residuals)

# ---------------------------------------------------
# 7) Summarize results
# ---------------------------------------------------
param_names = X.columns
coef_dict = dict(zip(param_names, final_params))

print("Constrained OLS Results (nonnegative marketing coefficients):")
print("--------------------------------------------------------------")
for name, val in coef_dict.items():
    print(f"{name:50s} = {val:10.6f}")

print("\nModel Performance:")
print(f"  R-squared           = {r2:.4f}")
print(f"  Adjusted R-squared  = {adj_r2:.4f}")
print(f"  Durbin-Watson       = {dw_stat:.4f}")

if result.success:
    print("\nOptimization converged successfully.")
else:
    print("\nOptimization did NOT converge.")

Constrained OLS Results (nonnegative marketing coefficients):
--------------------------------------------------------------
const                                              = 4092756.578043
amazon_execution_pre2023                           =   0.000000
amazon_retail_execution_pre2023                    =   0.000000
bvod_execution_pre2023                             =   0.000000
citrus_execution_pre2023                           =   0.000000
criteo_execution_pre2023                           =   0.000000
google_execution_pre2023                           =   0.000000
google_video_execution_pre2023                     =   0.218410
influencer_management_execution_pre2023            =   0.095494
linear_execution_pre2023                           =   0.000000
meta_execution_pre2023                             =   0.000000
meta_collab_ads_execution_pre2023                  =   0.011783
pinterest_execution_pre2023                        =   0.000131
tesco_execution_pre2023                

# 2. ROI

## 2.1 Incremental Revenue

In [10]:
marketing_vars

['amazon_execution_pre2023',
 'amazon_retail_execution_pre2023',
 'bvod_execution_pre2023',
 'citrus_execution_pre2023',
 'criteo_execution_pre2023',
 'google_execution_pre2023',
 'google_video_execution_pre2023',
 'influencer_management_execution_pre2023',
 'linear_execution_pre2023',
 'meta_execution_pre2023',
 'meta_collab_ads_execution_pre2023',
 'pinterest_execution_pre2023',
 'tesco_execution_pre2023',
 'testers_and_merchandising_execution_pre2023',
 'the_hut_group_execution_pre2023',
 'tik_tok_execution_pre2023',
 'youtube_execution_pre2023',
 'amazon_execution_post2022',
 'amazon_retail_execution_post2022',
 'bvod_execution_post2022',
 'citrus_execution_post2022',
 'criteo_execution_post2022',
 'google_execution_post2022',
 'google_video_execution_post2022',
 'influencer_management_execution_post2022',
 'linear_execution_post2022',
 'meta_execution_post2022',
 'meta_collab_ads_execution_post2022',
 'pinterest_execution_post2022',
 'tesco_execution_post2022',
 'testers_and_merch

In [11]:
# Multiply the coefficients by their corresponding columns for each row in the DataFrame
df_weighted_contribution = X * final_params
inc_rev = df_weighted_contribution[marketing_vars].sum()
inc_rev

amazon_execution_pre2023                        0.000000e+00
amazon_retail_execution_pre2023                 0.000000e+00
bvod_execution_pre2023                          0.000000e+00
citrus_execution_pre2023                        0.000000e+00
criteo_execution_pre2023                        0.000000e+00
google_execution_pre2023                        0.000000e+00
google_video_execution_pre2023                  1.563782e+06
influencer_management_execution_pre2023         2.276832e+05
linear_execution_pre2023                        0.000000e+00
meta_execution_pre2023                          0.000000e+00
meta_collab_ads_execution_pre2023               9.963807e+05
pinterest_execution_pre2023                     0.000000e+00
tesco_execution_pre2023                         0.000000e+00
testers_and_merchandising_execution_pre2023     0.000000e+00
the_hut_group_execution_pre2023                 0.000000e+00
tik_tok_execution_pre2023                       0.000000e+00
youtube_execution_pre202

In [None]:
# plot incremental 

## 2.2 Calculate Spend

In [12]:
spend = pd.read_csv("data/a&p_variables.csv")
spend

Unnamed: 0,growth_driver_l1,growth_driver_l2,growth_driver_l3,growth_driver_l4,growth_driver_l5,metric,Starting week,investment (in pound),execution
0,ap_consumer_facing,consumer_engagement,advocacy_media,influencer_management,influencer_management,engagements,2022-01-03,107.438017,537.216943
1,ap_consumer_facing,consumer_engagement,advocacy_media,influencer_management,influencer_management,engagements,2022-01-10,0.000000,0.000000
2,ap_consumer_facing,consumer_engagement,advocacy_media,influencer_management,influencer_management,engagements,2022-01-17,0.000000,0.000000
3,ap_consumer_facing,consumer_engagement,advocacy_media,influencer_management,influencer_management,engagements,2022-01-24,28484.999998,142432.121596
4,ap_consumer_facing,consumer_engagement,advocacy_media,influencer_management,influencer_management,engagements,2022-01-31,19172.915190,95869.369418
...,...,...,...,...,...,...,...,...,...
2023,ap_consumer_facing,shopper_experience,shopper_experience,testers_and_merchandising,testers_and_merchandising,units,2023-11-27,0.000000,0.000000
2024,ap_consumer_facing,shopper_experience,shopper_experience,testers_and_merchandising,testers_and_merchandising,units,2023-12-04,0.000000,0.000000
2025,ap_consumer_facing,shopper_experience,shopper_experience,testers_and_merchandising,testers_and_merchandising,units,2023-12-11,3381.443000,193.700000
2026,ap_consumer_facing,shopper_experience,shopper_experience,testers_and_merchandising,testers_and_merchandising,units,2023-12-18,0.000000,0.000000


In [13]:
spend_aggregated = spend.groupby(
    ["growth_driver_l5", "Starting week"],
    as_index=False,
)["investment (in pound)"].sum()

spend_aggregated

Unnamed: 0,growth_driver_l5,Starting week,investment (in pound)
0,amazon,2022-01-03,4245.110998
1,amazon,2022-01-10,6786.949015
2,amazon,2022-01-17,6653.399975
3,amazon,2022-01-24,4944.758000
4,amazon,2022-01-31,5821.348005
...,...,...,...
1763,youtube,2023-11-27,0.000000
1764,youtube,2023-12-04,0.000000
1765,youtube,2023-12-11,0.000000
1766,youtube,2023-12-18,0.000000


In [14]:
pivot_spend = spend_aggregated.pivot(
    index="Starting week", columns="growth_driver_l5", values="investment (in pound)"
)
spend_final = pivot_spend.reset_index()

spend_final

growth_driver_l5,Starting week,amazon,amazon_retail,bvod,citrus,criteo,google,google_video,influencer_management,linear,meta,meta_collab_ads,pinterest,tesco,testers_and_merchandising,the_hut_group,tik_tok,youtube
0,2022-01-03,4245.110998,613.431,0.000000,0.000,3053.209640,374.884674,0.0,107.438017,0.0,2322.307000,918.38565,0.0,0.000,5303.066526,0.0,0.0,0.0
1,2022-01-10,6786.949015,1065.064,0.000000,0.000,3387.968877,333.488566,0.0,0.000000,0.0,3375.190000,867.26952,0.0,0.000,4418.402838,0.0,0.0,0.0
2,2022-01-17,6653.399975,1354.821,0.000000,0.000,3045.849565,371.129451,0.0,0.000000,0.0,5137.405000,1019.38005,0.0,0.000,6301.752894,0.0,0.0,0.0
3,2022-01-24,4944.758000,2428.517,16158.513010,0.000,2990.311483,360.270269,0.0,28484.999998,169674.7,20303.140000,1101.75156,0.0,0.000,9379.862800,0.0,0.0,0.0
4,2022-01-31,5821.348005,2262.923,13465.429227,0.000,2979.300479,237.652458,0.0,19172.915190,292321.9,28768.727013,987.27993,0.0,0.000,16387.142720,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,2023-11-27,10447.814000,6967.649,0.000000,10691.603,8660.379000,1410.916000,0.0,0.000000,0.0,0.000000,0.00000,0.0,21208.889,0.000000,0.0,0.0,0.0
100,2023-12-04,4372.979000,6961.994,0.000000,10878.283,5751.850000,1576.926000,0.0,0.000000,0.0,0.000000,0.00000,0.0,34548.917,0.000000,0.0,0.0,0.0
101,2023-12-11,838.552000,3631.017,0.000000,6374.641,7118.384000,1147.926000,0.0,0.000000,0.0,0.000000,0.00000,0.0,34548.917,3381.443000,0.0,0.0,0.0
102,2023-12-18,891.917000,0.000,0.000000,5420.545,8714.498000,1003.444000,0.0,0.000000,0.0,0.000000,0.00000,0.0,34548.917,0.000000,0.0,0.0,0.0


In [15]:
# Extract the year from 'Starting week'
spend_final["Year"] = pd.to_datetime(spend_final["Starting week"]).dt.year

# Create separate columns for 2022 and 2023 by prefixing variable names with the year
split_columns = [
    col
    for col in spend_final.columns
    if col not in ["growth_driver_l5", "Starting week", "Year"]
]
for col in split_columns:
    spend_final[f"{col}_2022"] = spend_final.apply(
        lambda x: x[col] if x["Year"] == 2022 else 0, axis=1
    )
    spend_final[f"{col}_2023"] = spend_final.apply(
        lambda x: x[col] if x["Year"] == 2023 else 0, axis=1
    )

# Drop intermediate 'Year' column if not needed
spend_final.drop(columns=["Year"] + split_columns, inplace=True)

In [144]:
spend_totals = spend_final.drop(columns="Starting week").sum()
spend_totals

growth_driver_l5
amazon_2022                       3.093380e+05
amazon_2023                       4.989289e+05
amazon_retail_2022                2.500960e+05
amazon_retail_2023                3.400397e+05
bvod_2022                         3.448057e+05
bvod_2023                         5.245864e+05
citrus_2022                       2.868698e+05
citrus_2023                       3.344385e+05
criteo_2022                       2.338545e+05
criteo_2023                       2.793985e+05
google_2022                       5.528622e+04
google_2023                       9.440443e+04
google_video_2022                 1.314585e+05
google_video_2023                 0.000000e+00
influencer_management_2022        4.768271e+05
influencer_management_2023        4.230657e+05
linear_2022                       4.985677e+06
linear_2023                       5.031694e+06
meta_2022                         6.567312e+05
meta_2023                         7.692505e+05
meta_collab_ads_2022              3.571685e

## 2.3 Calculate ROI

In [145]:
inc_rev = inc_rev.reset_index(name="incremental_rev")
inc_rev

Unnamed: 0,index,incremental_rev
0,amazon_execution_pre2023,0.0
1,amazon_retail_execution_pre2023,0.0
2,bvod_execution_pre2023,0.0
3,citrus_execution_pre2023,0.0
4,criteo_execution_pre2023,0.0
5,google_execution_pre2023,0.0
6,google_video_execution_pre2023,1555051.0
7,influencer_management_execution_pre2023,231677.3
8,linear_execution_pre2023,0.0
9,meta_execution_pre2023,0.0


In [146]:
spend_totals = spend_totals.reset_index(name="total_spend")
spend_totals

Unnamed: 0,growth_driver_l5,total_spend
0,amazon_2022,309338.0
1,amazon_2023,498928.9
2,amazon_retail_2022,250096.0
3,amazon_retail_2023,340039.7
4,bvod_2022,344805.7
5,bvod_2023,524586.4
6,citrus_2022,286869.8
7,citrus_2023,334438.5
8,criteo_2022,233854.5
9,criteo_2023,279398.5


In [147]:
roi = inc_rev["incremental_rev"] / spend_totals["total_spend"]
roi.index = spend_totals["growth_driver_l5"].values
roi

amazon_2022                        0.000000
amazon_2023                        0.000000
amazon_retail_2022                 0.000000
amazon_retail_2023                 0.000000
bvod_2022                          0.000000
bvod_2023                          0.000000
citrus_2022                        5.420755
citrus_2023                        0.692735
criteo_2022                        0.000000
criteo_2023                        0.000000
google_2022                       18.441184
google_2023                        0.000000
google_video_2022                  0.000000
google_video_2023                       NaN
influencer_management_2022         0.000000
influencer_management_2023         0.000000
linear_2022                        0.000000
linear_2023                        1.991887
meta_2022                         17.568862
meta_2023                          1.451993
meta_collab_ads_2022              20.962774
meta_collab_ads_2023               0.000000
pinterest_2022                  