# Imports

In [266]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns

In [267]:
df = pd.read_csv("ALL_DATA_COMBINED.csv")

# Convert 'week_start' to date-time

In [268]:
df['week_start'] = pd.to_datetime(df['week_start'], format='%d-%m-%Y')

# Month Level Flag

In [269]:
df['month'] = df['week_start'].dt.month
df_months = pd.get_dummies(df['month'],drop_first=True)
df = pd.concat([df,df_months],axis = 1, join='inner', ignore_index=False, keys=None)

# Week Level Flag

In [270]:
df['week_start'] = pd.to_datetime(df['week_start'])

def assign_week_numbers(group):
    group = group.sort_values('week_start')
    group['week_number'] = range(1, len(group) + 1)
    month_starts = group['week_start'].dt.to_period('M').unique().to_timestamp()
    for month_start in month_starts:
        mask = group['week_start'] >= month_start
        group.loc[mask, 'week_number'] -= group.loc[mask, 'week_number'].min() - 1
    return group

df = df.groupby(df['week_start'].dt.to_period('M')).apply(assign_week_numbers)
df_weeks = pd.get_dummies(df['week_number'],drop_first=True)

df = pd.concat([df,df_weeks],axis = 1, join='inner', ignore_index=False, keys=None)

# Dropping

In [271]:
df=df.drop(['week_number','month'],axis=1)

In [272]:
df.columns = df.columns.astype(str)

In [273]:
df.columns

Index(['week_start', 'digital_catchuptv_bledina_brand-equity_spends',
       'digital_dataretailers_bledina_brand-equity_spends',
       'digital_directbuying_bledina_brand-equity_spends',
       'digital_keywordtargeting_bledina_brand-equity_spends',
       'digital_nativeads_bledina_brand-equity_spends',
       'digital_programmatic_bledina_brand-equity_spends',
       'digital_sea_bledina_brand-equity_spends',
       'digital_social_bledina_brand-equity_spends',
       'press_equity_bledina_brand-equity_spends',
       'tv_equity_bledina_brand-equity_spends',
       'digital_dataretailers_bledina_brand-range_spends',
       'digital_directbuying_bledina_brand-range_spends',
       'digital_keywordtargeting_bledina_brand-range_spends',
       'digital_programmatic_bledina_brand-range_spends',
       'digital_sea_bledina_brand-range_spends',
       'digital_social_bledina_brand-range_spends',
       'press_product_bledina_brand-range_spends',
       'app_total_bledina_brand_visits', '

In [274]:
df

Unnamed: 0,week_start,digital_catchuptv_bledina_brand-equity_spends,digital_dataretailers_bledina_brand-equity_spends,digital_directbuying_bledina_brand-equity_spends,digital_keywordtargeting_bledina_brand-equity_spends,digital_nativeads_bledina_brand-equity_spends,digital_programmatic_bledina_brand-equity_spends,digital_sea_bledina_brand-equity_spends,digital_social_bledina_brand-equity_spends,press_equity_bledina_brand-equity_spends,...,7,8,9,10,11,12,2,3,4,5
0,2017-01-02,0.01,0.0,0.01,0.000000,2367.40,0.060000,16089.49,0.010000,0.0,...,0,0,0,0,0,0,0,0,0,0
1,2017-01-09,0.00,0.0,0.01,0.000000,2785.65,0.050000,11618.44,10034.990000,0.0,...,0,0,0,0,0,0,1,0,0,0
2,2017-01-16,0.00,0.0,0.01,0.000000,2708.31,0.080000,10388.53,6955.560000,0.0,...,0,0,0,0,0,0,0,1,0,0
3,2017-01-23,0.00,0.0,0.00,0.000000,2473.94,0.080000,9699.73,4630.010000,0.0,...,0,0,0,0,0,0,0,0,1,0
4,2017-01-30,0.00,0.0,0.01,0.000000,618.99,0.010000,6182.01,2911.880000,0.0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,2020-05-25,0.00,0.0,0.00,6562.143500,0.00,5501.142043,2537.86,8923.720012,0.0,...,0,0,0,0,0,0,0,0,1,0
178,2020-06-01,0.00,0.0,0.00,0.031994,0.00,0.000000,2739.21,1075.980003,0.0,...,0,0,0,0,0,0,0,0,0,0
179,2020-06-08,0.00,0.0,0.00,0.000000,0.00,0.000000,2782.00,3742.977999,0.0,...,0,0,0,0,0,0,1,0,0,0
180,2020-06-15,0.00,0.0,0.00,0.000000,0.00,0.000000,2622.45,4007.615998,0.0,...,0,0,0,0,0,0,0,1,0,0


# Combining spends

In [275]:
def row_sum(row):
    return row.iloc[:].sum()

# TV Spends

In [276]:
tv_columns = [col for col in df.columns if 'tv' in col.lower() and 'digital' not in col.lower() and 'competition' not in col.lower()]

In [277]:
df['Bledina_total_tv_spends'] = df[tv_columns].apply(row_sum,axis=1)

In [278]:
df = df.drop(tv_columns,axis=1)

# Press spends

In [279]:
press_columns = [col for col in df.columns if 'press' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [280]:
df['Bledina_total_press_spends'] = df[press_columns].apply(row_sum,axis=1)

In [281]:
df = df.drop(press_columns,axis=1)

# Dataretailer spends

In [282]:
dataretailers_columns = [col for col in df.columns if 'dataretailers' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [283]:
df['Bledina_total_dataretailers_spends'] = df[dataretailers_columns].apply(row_sum,axis=1)

In [284]:
df = df.drop(dataretailers_columns,axis=1)

# Social spends

In [285]:
social_columns = [col for col in df.columns if 'social' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [286]:
social_columns

['digital_social_bledina_brand-equity_spends',
 'digital_social_bledina_brand-range_spends',
 'digital_social_bledina_product_spends']

In [287]:
df['Bledina_total_social_spends'] = df[social_columns].apply(row_sum,axis=1)

In [288]:
df = df.drop(social_columns,axis=1)

# Promo spends

In [289]:
promo_columns = [col for col in df.columns if 'promo' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [290]:
df['Bledina_total_promo_spends'] = df[promo_columns].apply(row_sum,axis=1)

In [291]:
df = df.drop(promo_columns,axis=1)

In [292]:
df

Unnamed: 0,week_start,digital_catchuptv_bledina_brand-equity_spends,digital_directbuying_bledina_brand-equity_spends,digital_keywordtargeting_bledina_brand-equity_spends,digital_nativeads_bledina_brand-equity_spends,digital_programmatic_bledina_brand-equity_spends,digital_sea_bledina_brand-equity_spends,digital_directbuying_bledina_brand-range_spends,digital_keywordtargeting_bledina_brand-range_spends,digital_programmatic_bledina_brand-range_spends,...,12,2,3,4,5,Bledina_total_tv_spends,Bledina_total_press_spends,Bledina_total_dataretailers_spends,Bledina_total_social_spends,Bledina_total_promo_spends
0,2017-01-02,0.01,0.01,0.000000,2367.40,0.060000,16089.49,0.0,0.000000,0.00,...,0,0,0,0,0,2369.390000,0.0,0.000,0.010000,7238.542484
1,2017-01-09,0.00,0.01,0.000000,2785.65,0.050000,11618.44,0.0,0.000000,0.00,...,0,1,0,0,0,2470.600000,0.0,0.000,10034.990000,20498.188230
2,2017-01-16,0.00,0.01,0.000000,2708.31,0.080000,10388.53,0.0,0.000000,0.01,...,0,0,1,0,0,2470.600000,0.0,0.000,6955.560000,104070.912100
3,2017-01-23,0.00,0.00,0.000000,2473.94,0.080000,9699.73,0.0,0.000000,0.01,...,0,0,0,1,0,2470.600000,0.0,0.000,4630.010000,67294.616930
4,2017-01-30,0.00,0.01,0.000000,618.99,0.010000,6182.01,0.0,0.000000,0.00,...,0,0,0,0,1,2463.250000,0.0,0.000,2911.880000,46532.729780
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,2020-05-25,0.00,0.00,6562.143500,0.00,5501.142043,2537.86,0.0,5513.550497,0.00,...,0,0,0,1,0,2777.054278,0.0,0.000,8923.720012,23682.980950
178,2020-06-01,0.00,0.00,0.031994,0.00,0.000000,2739.21,0.0,4681.485249,0.00,...,0,0,0,0,0,2347.751201,0.0,0.016,1075.980003,15803.450730
179,2020-06-08,0.00,0.00,0.000000,0.00,0.000000,2782.00,0.0,4148.221368,0.00,...,0,1,0,0,0,3132.570888,0.0,0.000,3742.977999,2392.235897
180,2020-06-15,0.00,0.00,0.000000,0.00,0.000000,2622.45,0.0,3691.624404,0.00,...,0,0,1,0,0,3132.570888,0.0,380.505,4007.615998,13390.429120


# Catchup TV spends

In [293]:
catchuptv_columns = [col for col in df.columns if 'catchuptv' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [294]:
df['Bledina_total_catchuptv_spends'] = df[catchuptv_columns].apply(row_sum,axis=1)

In [295]:
catchuptv_columns

['digital_catchuptv_bledina_brand-equity_spends',
 'digital_catchuptv_bledina_product_spends']

In [296]:
df = df.drop(catchuptv_columns,axis=1)

# Programmatic spends

In [297]:
programmatic_columns = [col for col in df.columns if 'programmatic' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [298]:
df['Bledina_total_programmatic_spends'] = df[programmatic_columns].apply(row_sum,axis=1)

In [299]:
df = df.drop(programmatic_columns,axis=1)

In [300]:
df

Unnamed: 0,week_start,digital_directbuying_bledina_brand-equity_spends,digital_keywordtargeting_bledina_brand-equity_spends,digital_nativeads_bledina_brand-equity_spends,digital_sea_bledina_brand-equity_spends,digital_directbuying_bledina_brand-range_spends,digital_keywordtargeting_bledina_brand-range_spends,digital_sea_bledina_brand-range_spends,app_total_bledina_brand_visits,crm_automatic_bledina_brand_emails,...,3,4,5,Bledina_total_tv_spends,Bledina_total_press_spends,Bledina_total_dataretailers_spends,Bledina_total_social_spends,Bledina_total_promo_spends,Bledina_total_catchuptv_spends,Bledina_total_programmatic_spends
0,2017-01-02,0.01,0.000000,2367.40,16089.49,0.0,0.000000,0.00,0,21274.0000,...,0,0,0,2369.390000,0.0,0.000,0.010000,7238.542484,0.01,0.060000
1,2017-01-09,0.01,0.000000,2785.65,11618.44,0.0,0.000000,0.00,0,41976.0000,...,0,0,0,2470.600000,0.0,0.000,10034.990000,20498.188230,0.00,0.050000
2,2017-01-16,0.01,0.000000,2708.31,10388.53,0.0,0.000000,0.00,0,24593.0000,...,1,0,0,2470.600000,0.0,0.000,6955.560000,104070.912100,0.00,0.090000
3,2017-01-23,0.00,0.000000,2473.94,9699.73,0.0,0.000000,0.00,0,6784.0000,...,0,1,0,2470.600000,0.0,0.000,4630.010000,67294.616930,0.00,0.090000
4,2017-01-30,0.01,0.000000,618.99,6182.01,0.0,0.000000,0.00,0,23766.0000,...,0,0,1,2463.250000,0.0,0.000,2911.880000,46532.729780,0.00,0.010000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,2020-05-25,0.00,6562.143500,0.00,2537.86,0.0,5513.550497,2521.41,19769,43550.0000,...,0,1,0,2777.054278,0.0,0.000,8923.720012,23682.980950,0.00,5501.142043
178,2020-06-01,0.00,0.031994,0.00,2739.21,0.0,4681.485249,2689.86,21325,210240.5329,...,0,0,0,2347.751201,0.0,0.016,1075.980003,15803.450730,0.00,0.000000
179,2020-06-08,0.00,0.000000,0.00,2782.00,0.0,4148.221368,2688.40,20683,210240.5329,...,0,0,0,3132.570888,0.0,0.000,3742.977999,2392.235897,0.00,0.000000
180,2020-06-15,0.00,0.000000,0.00,2622.45,0.0,3691.624404,2123.31,19906,210240.5329,...,1,0,0,3132.570888,0.0,380.505,4007.615998,13390.429120,0.00,0.000000


# Direct Spends

In [301]:
directbuying_columns = [col for col in df.columns if 'directbuying' in col.lower() and 'nestle' not in col.lower() and 'competition' not in col.lower()]

In [302]:
df['Bledina_total_directbuying_spends'] = df[directbuying_columns].apply(row_sum,axis=1)

In [303]:
df = df.drop(directbuying_columns,axis=1)

In [304]:
df.columns

Index(['week_start', 'digital_keywordtargeting_bledina_brand-equity_spends',
       'digital_nativeads_bledina_brand-equity_spends',
       'digital_sea_bledina_brand-equity_spends',
       'digital_keywordtargeting_bledina_brand-range_spends',
       'digital_sea_bledina_brand-range_spends',
       'app_total_bledina_brand_visits', 'crm_automatic_bledina_brand_emails',
       'crm_event_bledina_brand_emails', 'website_total_bledina_brand_visits',
       'competition_digital_competition-nestle_total_spends',
       'competition_digital_competition-others_total_spends',
       'competition_ooh_competition-nestle_total_spends',
       'competition_ooh_competition-others_total_spends',
       'competition_press_competition-nestle_total_spends',
       'competition_press_competition-others_total_spends',
       'competition_tv_competition-nestle_total_spends',
       'competition_tv_competition-others_total_spends',
       'macroeconomic_total_total_total_cci',
       'macroeconomic_total_

# Lag

In [305]:
import statsmodels.api as sm
def graph(x): 
    a=np.array(x) 
    b=np.array(df['Sales']) 
    c=sm.tsa.stattools.ccf(a, b, adjusted=False) 
    print(c[:10])

In [306]:
ad_feats = [ 'digital_keywordtargeting_bledina_brand-equity_spends',
       'digital_nativeads_bledina_brand-equity_spends',
       'digital_sea_bledina_brand-equity_spends',
       'digital_keywordtargeting_bledina_brand-range_spends',
       'digital_sea_bledina_brand-range_spends',
       'app_total_bledina_brand_visits', 'crm_automatic_bledina_brand_emails',
       'crm_event_bledina_brand_emails', 'website_total_bledina_brand_visits',
       'competition_digital_competition-nestle_total_spends',
       'competition_digital_competition-others_total_spends',
       'competition_ooh_competition-nestle_total_spends',
       'competition_ooh_competition-others_total_spends',
       'competition_press_competition-nestle_total_spends',
       'competition_press_competition-others_total_spends',
       'competition_tv_competition-nestle_total_spends',
       'competition_tv_competition-others_total_spends',
       'macroeconomic_total_total_total_cci',
       'macroeconomic_total_total_total_holiday',
       'macroeconomic_total_total_total_livebirths',
       'macroeconomic_total_total_total_lockdownflag',
       'macroeconomic_total_total_total_population',
       'macroeconomic_total_total_total_unemploymentrate',
       'macroeconomic_total_total_total_weeklycases',
       'macroeconomic_total_total_total_weeklydeath',
       'digital_keywordtargeting_bledina_product_spends',
       'retail_total_bledina_product_volume',
       'retail_total_bledina_product_dvm',
       'retail_total_bledina_product_price',
       'competition_retail_competition_nonorganic_dvm',
       'competition_retail_competition_nonorganic_price',
       'Bledina_total_tv_spends', 'Bledina_total_press_spends',
       'Bledina_total_dataretailers_spends', 'Bledina_total_social_spends',
       'Bledina_total_promo_spends', 'Bledina_total_catchuptv_spends',
       'Bledina_total_programmatic_spends',
       'Bledina_total_directbuying_spends']

In [307]:
for i in ad_feats:
    graph(df[i])
    print(">>",i)

[-0.10278589 -0.07751079 -0.0187978  -0.08654796 -0.14852569 -0.15808895
 -0.11657314 -0.10412928 -0.04270735  0.22555071]
>> digital_keywordtargeting_bledina_brand-equity_spends
[0.03137769 0.08198552 0.18875103 0.15861826 0.10529814 0.06937996
 0.09140405 0.08711082 0.10205379 0.09598324]
>> digital_nativeads_bledina_brand-equity_spends
[ 0.10654427  0.09202687  0.15114158  0.11857173  0.05120218 -0.0145781
 -0.054055   -0.08187611 -0.08439674 -0.06878255]
>> digital_sea_bledina_brand-equity_spends
[ 0.09381173  0.05645849  0.03365571 -0.02074966 -0.16012748 -0.20925312
 -0.12114521 -0.11860509 -0.08114447 -0.0255558 ]
>> digital_keywordtargeting_bledina_brand-range_spends
[-0.24801461 -0.25872614 -0.18467462 -0.13761355 -0.10370296 -0.06476786
 -0.00907971  0.01529251 -0.01021031 -0.04673333]
>> digital_sea_bledina_brand-range_spends
[-0.11155585 -0.12813055 -0.1070765  -0.10871283 -0.10124027 -0.073978
 -0.04438259 -0.0156144  -0.05533021 -0.0800616 ]
>> app_total_bledina_brand_vis