In [None]:
import pandas as pd
from scipy import stats
import os
import sys

In [None]:
def spotlift_group(X, y):
    df = X.copy()
    df['y']  = None
    for idx,row in df[['date_time','post_eff']].iterrows():
        dates = pd.date_range(start=row[0], end =row[1], freq='min')
        df.loc[idx, ['y']]= y[y['date_time'].isin(dates)].iloc[:,[1]].sum().values
    return df

In [None]:
# the direction to the main directory
direct = sys.path[0]

dir_greedy = direct + r'\spotlifts\Greedy'
dir_BSTS = direct + r'\spotlifts\BSTS_4min'
dir_save = direct + r'\results'

In [None]:
# Function for post-effect:
def post_effects(case):
    dates = pd.date_range(start='2019-01-01 00:00:00', end ='2019-06-30 23:59:00', freq='min')
    total_list = {}
    main_folder = dir_greedy
    folder = os.path.join(main_folder, case, r'All visits')
    sets = {}
    matrices = {}
    for filename in os.listdir(folder):
        post_eff = int(filename[-5])
        title = str(post_eff)+'min'

        if title == '0min':
            post_eff = 10
            title = '10min'
        file_path = os.path.join(folder, filename)
        DataSet = pd.read_csv(file_path, usecols=['Spotlift'])
        DataSet['date_time'] = dates
        if case[:2]=='NL':
            X_dir = direct + r'\X_matrix\X_nl_' + title + '.csv'
        else:
            X_dir = direct + r'\X_matrix\X_be_' + title + '.csv'
        X_matrix = pd.read_csv(X_dir, index_col='ad_group')
        lift = spotlift_group(X_matrix,DataSet[['date_time','Spotlift']])
        sets[title] = DataSet
        matrices[title]  = lift

        neg_mins = len(DataSet[DataSet['Spotlift']<0])/len(DataSet[DataSet['Spotlift']!=0])*100
        neg_lifts = len(lift[lift['y']<0])/len(lift)*100
        positive_lifts = DataSet[DataSet['Spotlift']>0]['Spotlift'].sum()/post_eff
        total_lifts = DataSet['Spotlift'].sum()/post_eff

        total_list[title] = [neg_mins, neg_lifts, positive_lifts, total_lifts]

    result = pd.DataFrame.from_dict(total_list, orient='index',
                           columns = ['% neg. ad_group', '% neg. spotlifts', 'positive', 'all']).sort_index()
    display(result)
    return result

In [None]:
# Table 5, 14-17
post_eff_NL_Web = post_effects('NL_Web')
post_eff_NL_App = post_effects('NL_App')
post_eff_BE_Web = post_effects('BE_Web')
post_eff_BE_App = post_effects('BE_App')

post_eff_NL_Web.to_excel(dir_save + r'\post_eff_NL_Web.xlsx')
post_eff_NL_App.to_excel(dir_save + r'\post_eff_NL_App.xlsx')
post_eff_BE_Web.to_excel(dir_save + r'\post_eff_BE_Web.xlsx')
post_eff_BE_App.to_excel(dir_save + r'\post_eff_BE_App.xlsx')

In [None]:
# Table 6, 18-25
# BTST 4 min----------------------------------------------------------------------------------------------------------------
dates = pd.date_range(start='2019-01-01 00:00:00', end ='2019-06-30 23:59:00', freq='min')

folder = dir_BSTS
total_list = {}

for filename in os.listdir(folder):
    post_eff = 4
    country = filename[5:7]
    medium = filename[8:11]
    title = filename[5:17]
    case = filename[16]

    file_path = os.path.join(folder, filename)

    DataSet = pd.read_csv(file_path, sep=';', usecols=['lift'])
    DataSet.columns = ['Spotlift']
    DataSet['date_time'] = dates

    if country == 'nl':
        X_dir = direct + r'\X_matrix\X_nl_4min.csv'
    else:
        X_dir = direct + r'\X_matrix\X_be_4min.csv'     
    X_matrix = pd.read_csv(X_dir, index_col='ad_group')
    
    neg_mins = len(DataSet[DataSet['Spotlift']<0])/len(DataSet[DataSet['Spotlift'].notna()])*100
    neg_lifts = DataSet[DataSet['Spotlift']>0]['Spotlift'].median()
    mad = stats.median_absolute_deviation(DataSet[DataSet['Spotlift']>0]['Spotlift'])
    sum_pos = DataSet[DataSet['Spotlift']>0]['Spotlift'].sum()
    
    total_list[title] = [neg_mins, neg_lifts, mad, sum_pos]

result = pd.DataFrame.from_dict(total_list, orient='index',
                       columns = ['% neg. mins', 'median', 'mad', 'sum pos spotlift'])
display(result)  

result.to_excel(dir_save+r'\cases_BSTS_4min.xlsx')

In [None]:
# Greedy 4 min----------------------------------------------------------------------------------------------------------------
countries = ['NL', 'BE']
mediums = ['App', 'Web']
cases = ['All visits', 'No paid visits', 'Direct and search visits', 'Direct visits']

folder = dir_greedy
total_list = {}

for c in countries:
    for m in mediums:
        for idx,ca in enumerate(cases):
            version = c + '_' + m
            file_path = os.path.join(folder, version, ca, '4.csv')
            title = version + '_case' + str(idx+1)

            DataSet = pd.read_csv(file_path, usecols=['Spotlift'])
            DataSet['date_time'] = dates

            if country == 'NL':
                X_dir = direct + r'\X_matrix\X_nl_4min.csv'
            else:
                X_dir = direct + r'\X_matrix\X_be_4min.csv'     
            X_matrix = pd.read_csv(X_dir, index_col='ad_group')
            
            neg_mins = len(DataSet[DataSet['Spotlift']<0])/len(DataSet[DataSet['Spotlift']!=0])*100
            neg_lifts = DataSet[DataSet['Spotlift']>0]['Spotlift'].median()
            mad = stats.median_absolute_deviation(DataSet[DataSet['Spotlift']>0]['Spotlift'])
            sum_pos = DataSet[DataSet['Spotlift']>0]['Spotlift'].sum()
    
            total_list[title] = [neg_mins, neg_lifts, mad, sum_pos]
        
result = pd.DataFrame.from_dict(total_list, orient='index',
                       columns = ['% neg. mins', 'median', 'mad', 'sum pos spotlift'])
display(result) 
result.to_excel(dir_save+r'\cases_GREEDY_4min.xlsx')