In [1]:
import pandas as pd
import numpy as np
from time import time
import nevergrad as ng
from functools import partial

In [2]:
def convert_df(data):
    df = data.copy()
    df = df.replace(-1, np.nan)

    for i in df.columns:
        ll = []
        for j in df[i]:
            try:
                ll.append(float(j))
            except:
                ll.append(j)

        df[i] = ll

    return df

In [6]:
country = "KSA"
brand = "Pepsi"
# media_records = pd.read_csv("./db/media_records_new_KSA.csv", encoding='latin').drop(columns=['id', ])
media_records = pd.read_excel("../db/media_records_new KSA refresh.xlsx", sheet_name="media_records_new", engine="openpyxl").drop(columns=['id', ])
execution_records = pd.read_csv("../db/distribution_records_new.csv", encoding='latin').drop(columns=['id', ])
scenario_media_details = pd.read_csv("../db/scenario_media_details.csv", encoding='latin').drop(columns=['id', ])
scenario_execution_details = pd.read_csv("../db/scenario_execution_details.csv", encoding='latin').drop(columns=['id', ])

In [7]:
media_records = media_records.query("genre_platform not in ['anghami.com', 'Twitter']")

In [8]:
media_records_headers = media_records.columns.tolist()
execution_records_headers = execution_records.columns.tolist()
scenario_media_headers = list(set(scenario_media_details.columns.tolist()) - set(media_records_headers))
scenario_execution_headers = list(set(scenario_execution_details.columns.tolist()) - set(execution_records_headers))

In [12]:
df_constraints_media = media_records.query("country == @country and brand == @brand").replace('',np.nan).convert_dtypes()
df_constraints_execution = execution_records.query("country == @country and brand == @brand").replace('',np.nan).convert_dtypes()

df_constraints_media = df_constraints_media.reindex(columns = df_constraints_media.columns.tolist() + scenario_media_headers)
df_constraints_execution = df_constraints_execution.reindex(columns = df_constraints_execution.columns.tolist() + scenario_execution_headers)

In [13]:
current_volume = df_constraints_execution.query("pack_name == 'Total'").sum().sum()
growth_ambition_perc = 0.02
growth_ambition_volume = current_volume*growth_ambition_perc

In [14]:
data = convert_df(df_constraints_media)

In [15]:
# data = pd.read_excel("./test_data/KSA Pepsi.xlsx", sheet_name="Media Input", engine="openpyxl").dropna(how="all")
# data['key'] = data['aggregator_classification'].str.cat(data['lever'], sep='_')
# data = data.set_index(['key'])
# data['recommended'] = np.nan
# # data = data.query("lever == 'tv'") # to run for single lever
# data

In [16]:
# do within opt functions
# data['cost_per_unit'] = data['cost_per_unit']/data['1_unit_metric_quantity'] 
# data['effectiveness_per_unit'] = data['effectiveness_per_unit']/data['1_unit_metric_quantity'] 

data['current_effectiveness_per_metric'] = data['current_effectiveness_per_unit'] / data['one_unit_metric_quantity']
data['input_cost_per_metric'] = data['input_cost_per_unit'] / data['one_unit_metric_quantity']

In [17]:
def multiobjective(x, cost_per_unit, effectiveness):
    return [cost(x, cost_per_unit), -volume_growth(x, effectiveness)]

def cost(x, cost_per_unit):
    return sum(x*cost_per_unit)

def volume_growth(x, effectiveness):
#     print(x, sum(x*effectiveness))
    return sum(x*effectiveness)

In [19]:
media_data = data.query("genre_platform not in ['Total']")
total_data = data.query("genre_platform in ['Total']")

current_volume_contribution = total_data['current_volume'].sum()

In [None]:
##change from here

In [7]:
def sum_constraint(x, media_data, total_data):
    check = lambda row: row['recommended_spend'] <= row['constraint_upper'] if row['constrainted_metric'] == 'spend' else row['recommended'] <= row['constraint_upper']    
    media_data['recommended'] = x
    media_data['recommended_spend'] = media_data['recommended']*media_data['cost_per_unit']
    total_data['recommended'] = total_data['recommended'].fillna( media_data.groupby(['lever'])['recommended'].sum())
    total_data['recommended_spend'] = total_data['recommended_spend'].fillna( media_data.groupby(['lever'])['recommended_spend'].sum())
    total_data['constraint_check'] = total_data.apply(check, axis=1)
#     print(total_data)
    return total_data['constraint_check'].all()

In [8]:
X = [ng.p.Scalar(lower=row['constraint_lower'], upper=row['constraint_upper']) for i, row in media_data.iterrows()]
iterations = int(1e2)

In [9]:
parameters = ng.p.Instrumentation(ng.p.Tuple(*X), media_data['cost_per_unit'].values)
parameters.random_state.seed(0)
optimizer = ng.optimizers.NGOpt(parametrization=parameters, budget=iterations, num_workers=8)
optimizer.parametrization.register_cheap_constraint(lambda x: volume_growth(x[0][0], media_data['effectiveness_per_unit'].values) >=  growth_ambition_volume + current_volume_contribution)
optimizer.parametrization.register_cheap_constraint(lambda x: sum_constraint(x[0][0], media_data, total_data))



In [10]:
mparameters = ng.p.Instrumentation(ng.p.Tuple(*X), media_data['cost_per_unit'].values, media_data['effectiveness_per_unit'].values)
mparameters.random_state.seed(0)
moptimizer = ng.optimizers.CMA(parametrization=mparameters, budget=iterations, num_workers=1)
moptimizer.parametrization.register_cheap_constraint(lambda x: sum(x[0][0]) <=  total_data.loc['National_tv', 'constraint_upper'])



In [11]:
stime = time()
# result = optimizer.minimize(cost, batch_mode=True, verbosity=0)
result = moptimizer.minimize(multiobjective, verbosity=0)
print(f"Ran in {(time()-stime)/60:.1f}m")
result_x = result.value[0][0]

(5_w,11)-aCMA-ES (mu_w=3.4,w_1=42%) in dimension 12 (seed=nan, Mon Nov  8 19:19:30 2021)




KeyboardInterrupt: 

In [None]:
recommended_volume_contribution = volume_growth(result_x, media_data['effectiveness_per_unit'])
print(f"Growth Achieved {(recommended_volume_contribution-current_volume_contribution)/current_volume*1e2:.2f}%")
print(f"From additional ${int(cost(result_x, media_data['cost_per_unit']) - cost(media_data['current_standing'], media_data['cost_per_unit'])):,}")

In [None]:
# print("Pareto front:")
pf_data = []
for param in moptimizer.pareto_front(size=100, subset='domain-covering'):
    losses = param.losses
    x, _, _ = param[0].value
    x = x*media_data['cost_per_unit']
    pf_data.append(list(losses) + list(x))
#     print(f"{list(x)} with losses {losses}\n")
    
pf = pd.DataFrame(data=pf_data, columns=['cost', 'volume_growth_neg'] + list(media_data.index))
pf['volume_growth'] = -pf['volume_growth_neg']
pf['growth_perc'] = (pf['volume_growth'] - current_volume_contribution)/current_volume

pf.to_csv("./test_data/pfront.csv", index=False)

In [None]:
recommended = pd.Series(index=media_data.index, data=result_x)
data['recommended'] = data['recommended'].fillna(recommended)

In [None]:
for i, lever in enumerate(data['lever'].unique()):
    data.loc['National_'+lever, 'recommended'] = data.query("lever == @lever and aggregation_level not in ['total']")['recommended'].sum()

## do within opt functions
data['cost_per_unit'] = data['cost_per_unit']*data['1_unit_metric_quantity'] 
data['effectiveness_per_unit'] = data['effectiveness_per_unit']*data['1_unit_metric_quantity'] 

In [None]:
data

In [None]:
# data.to_csv("./test_data/KSA Pepsi Out_nevergrad.csv", index=False)