In [1]:
import pandas as pd
import numpy as np
from time import time
from scipy.optimize import minimize, Bounds, LinearConstraint
from functools import partial

In [2]:
current_volume = 75.3e6 # database
growth_ambition_perc = 0.1 ## user_input
growth_ambition_volume = current_volume*growth_ambition_perc

In [3]:
data = pd.read_excel("./test_data/KSA Pepsi.xlsx", sheet_name="Media Input", engine="openpyxl").dropna(how="all")
data['key'] = data['aggregator_classification'].str.cat(data['lever'], sep='_')
data = data.set_index(['key'])
data['recommended'] = np.nan
# data = data.query("lever == 'digital'") # to run for single lever
data

FileNotFoundError: [Errno 2] No such file or directory: './test_data/KSA Pepsi.xlsx'

In [None]:
# do within opt functions
data['cost_per_unit'] = data['cost_per_unit']/data['1_unit_metric_quantity'] 
data['effectiveness_per_unit'] = data['effectiveness_per_unit']/data['1_unit_metric_quantity'] 

In [None]:
def cost(x, cost_per_unit, bounds=None):
    if bounds is not None: 
        if not all([z[0] <= z[1] <= z[2] for z in zip(bounds.lb, x, bounds.ub)]): 
            return 1e9 # fix
        else:
            return sum(x*cost_per_unit)
    else:
        return sum(x*cost_per_unit)

def volume_growth(x, effectiveness):
    return sum(x*effectiveness)

def hessian(x, *args):
    return np.zeros((x.shape[0], x.shape[0]))

def make_linear_constraint_matrix(_data, totals_data):
    data = _data.copy()
    n_constraint = data['lever'].nunique() + 1
    n_elements = data.shape[0]
    matrices = {
        'lb': np.zeros((n_constraint, )),
        'ub': np.zeros((n_constraint, )),
        'A': np.zeros((n_constraint, n_elements))
               }
    data['constraint_matrix'] = np.where(data['constrainted_metric'] == 'spend', data['cost_per_unit'], 1)
    columns = []
    for i, lever in enumerate(data['lever'].unique()):
        columns.append(f"constraint_{lever}")
        data["constraint_"+ lever] = np.where(data['lever'] == lever, data['constraint_matrix'], 0)
        matrices['lb'][i] = totals_data.query("lever == @lever")['constraint_lower'][0]
        matrices['ub'][i] = totals_data.query("lever == @lever")['constraint_upper'][0]
    
    columns.append('effectiveness_per_unit')
    current_volume_contribution = volume_growth(data['current_standing'], data['effectiveness_per_unit'])
    matrices['lb'][n_constraint - 1] = growth_ambition_volume + current_volume_contribution
    matrices['ub'][n_constraint - 1] = np.inf
    matrices["A"] = data[columns].values.T
    return matrices

In [None]:
media_data = data.query("aggregation_level not in ['total']")
total_data = data.query("aggregation_level in ['total']")
M = make_linear_constraint_matrix(media_data, total_data)

In [None]:
bounds = Bounds(lb=media_data['constraint_lower'].to_list(), 
                ub=media_data['constraint_upper'].to_list(), 
                keep_feasible=True)
linear_constraints = LinearConstraint(A=M['A'], lb=M['lb'], ub=M['ub'])
x0  = ((media_data['constraint_lower'] + media_data['constraint_upper'])/2).values
iterations = int(1e5)

In [None]:
stime = time()
result = minimize(cost, 
                  x0 = x0, args = (media_data['cost_per_unit'].values, bounds), bounds = bounds, 
                  method = 'trust-constr', constraints = [linear_constraints], hess=hessian,
                  options = {'maxiter': iterations, 'verbose': 1, 'gtol': 1e-12, 'xtol': 1e-12,
                             'factorization_method':'SVDFactorization'})

# result = differential_evolution(cost, 
#                                 args = (tv_data['cost_per_unit'].values,), bounds = bounds, constraints = constraints, tol=1e-6,
#                                 maxiter= iterations, workers=-1)

# print(f"Ran in {(time()-stime)/60:.1f}m")

In [None]:
# BUG: 
# 1. Allocation not happening according to mileage
# 2. Probabilistic results!?

In [None]:
print(result.success, result.cg_stop_cond)
current_volume_contribution = volume_growth(media_data['current_standing'], media_data['effectiveness_per_unit'])
recommended_volume_contribution = volume_growth(result.x, media_data['effectiveness_per_unit'])
print(f"Growth Achieved {(recommended_volume_contribution-current_volume_contribution)/current_volume*1e2:.2f}%")
print(f"From additional ${int(cost(result.x, media_data['cost_per_unit']) - cost(media_data['current_standing'], media_data['cost_per_unit'])):,}")

In [None]:
recommended = pd.Series(index=media_data.index, data=result.x)
data['recommended'] = data['recommended'].fillna(recommended)

In [None]:
for i, lever in enumerate(data['lever'].unique()):
    data.loc['National_'+lever, 'recommended'] = data.query("lever == @lever and aggregation_level not in ['total']")['recommended'].sum()

## do within opt functions
data['cost_per_unit'] = data['cost_per_unit']*data['1_unit_metric_quantity'] 
data['effectiveness_per_unit'] = data['effectiveness_per_unit']*data['1_unit_metric_quantity'] 

In [None]:
data.to_csv("./test_data/KSA Pepsi Out.csv", index=False)