# Imports

In [557]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
import seaborn as sns

In [558]:
df = pd.read_csv("ALL_DATA_COMBINED.csv")

In [559]:
df['week_start'].head(35)

0     02-01-2017
1     09-01-2017
2     16-01-2017
3     23-01-2017
4     30-01-2017
5     06-02-2017
6     13-02-2017
7     20-02-2017
8     27-02-2017
9     06-03-2017
10    13-03-2017
11    20-03-2017
12    27-03-2017
13    03-04-2017
14    10-04-2017
15    17-04-2017
16    24-04-2017
17    01-05-2017
18    08-05-2017
19    15-05-2017
20    22-05-2017
21    29-05-2017
22    05-06-2017
23    12-06-2017
24    19-06-2017
25    26-06-2017
26    03-07-2017
27    10-07-2017
28    17-07-2017
29    24-07-2017
30    31-07-2017
31    07-08-2017
32    14-08-2017
33    21-08-2017
34    28-08-2017
Name: week_start, dtype: object

# Conversion to datetime

In [560]:
df['week_start'] = pd.to_datetime(df['week_start'], format='%d-%m-%Y')

In [561]:
df['week_start'].head(35)

0    2017-01-02
1    2017-01-09
2    2017-01-16
3    2017-01-23
4    2017-01-30
5    2017-02-06
6    2017-02-13
7    2017-02-20
8    2017-02-27
9    2017-03-06
10   2017-03-13
11   2017-03-20
12   2017-03-27
13   2017-04-03
14   2017-04-10
15   2017-04-17
16   2017-04-24
17   2017-05-01
18   2017-05-08
19   2017-05-15
20   2017-05-22
21   2017-05-29
22   2017-06-05
23   2017-06-12
24   2017-06-19
25   2017-06-26
26   2017-07-03
27   2017-07-10
28   2017-07-17
29   2017-07-24
30   2017-07-31
31   2017-08-07
32   2017-08-14
33   2017-08-21
34   2017-08-28
Name: week_start, dtype: datetime64[ns]

In [562]:
df.shape

(182, 52)

# Month Level Flag

In [563]:
df['month'] = df['week_start'].dt.month

In [564]:
df_months = pd.get_dummies(df['month'],drop_first=True)

In [565]:
df = pd.concat([df,df_months],axis = 1, join='inner', ignore_index=False, keys=None)

# Week Level Flag

In [566]:
df['week_start'] = pd.to_datetime(df['week_start'])

In [567]:
def assign_week_numbers(group):
    group = group.sort_values('week_start')
    group['week_number'] = range(1, len(group) + 1)
    month_starts = group['week_start'].dt.to_period('M').unique().to_timestamp()
    for month_start in month_starts:
        mask = group['week_start'] >= month_start
        group.loc[mask, 'week_number'] -= group.loc[mask, 'week_number'].min() - 1
    return group

In [568]:
df = df.groupby(df['week_start'].dt.to_period('M')).apply(assign_week_numbers)

In [569]:
l=['week_start','week_number']

In [570]:
df[l].head(30)

Unnamed: 0,week_start,week_number
0,2017-01-02,1
1,2017-01-09,2
2,2017-01-16,3
3,2017-01-23,4
4,2017-01-30,5
5,2017-02-06,1
6,2017-02-13,2
7,2017-02-20,3
8,2017-02-27,4
9,2017-03-06,1


In [571]:
df_weeks = pd.get_dummies(df['week_number'],drop_first=True)

In [572]:
df = pd.concat([df,df_weeks],axis = 1, join='inner', ignore_index=False, keys=None)

# Dropping

In [573]:
df=df.drop(['week_number','month'],axis=1)

In [574]:
df.shape

(182, 67)

# AdStock

In [575]:
df.columns = df.columns.astype(str)

In [576]:
digital_columns = [col for col in df.columns if 'digital' in col.lower()]
Digital=df[digital_columns]

In [577]:
promo_columns = [col for col in df.columns if 'promo' in col.lower()]
Promo=df[promo_columns]

In [578]:
non_digital_columns= [col for col in df.columns if 'spends' in col.lower() and 'digital' not in col.lower() and 'competition' not in col.lower()]
non_digital_columns.remove('promo_total_bledina_bledina_spends')
Non_Digital=df[non_digital_columns]
Non_Digital

Unnamed: 0,press_equity_bledina_brand-equity_spends,tv_equity_bledina_brand-equity_spends,press_product_bledina_brand-range_spends,press_product_bledina_product_spends,tv_product_bledina_product_spends
0,0.0,2369.390000,0,0.0,0.0
1,0.0,2470.600000,0,0.0,0.0
2,0.0,2470.600000,0,0.0,0.0
3,0.0,2470.600000,0,0.0,0.0
4,0.0,2463.250000,0,0.0,0.0
...,...,...,...,...,...
177,0.0,2777.054278,0,0.0,0.0
178,0.0,2347.751201,0,0.0,0.0
179,0.0,3132.570888,0,0.0,0.0
180,0.0,3132.570888,0,0.0,0.0


In [579]:
def calculate_adstock(spend_data, decay_rate):
    adstock = []
    for i in range(len(spend_data)):
        if i == 0:
            adstock.append(spend_data[i])
        else:
            adstock_value = spend_data[i] + adstock[i-1] * decay_rate
            adstock.append(adstock_value)
    return adstock

In [580]:
decay_rate_Digital = 0.002104
decay_rate_Non_Digital = -0.001700
decay_rate_Promo = -0.001155

In [581]:
Adstock_df = pd.DataFrame()

In [582]:
for i in digital_columns:
    Adstock_df[i]= calculate_adstock(df[i], decay_rate_Digital)

In [583]:
for i in non_digital_columns:
    Adstock_df[i]= calculate_adstock(df[i], decay_rate_Non_Digital)

In [584]:
for i in promo_columns:
    Adstock_df[i]= calculate_adstock(df[i], decay_rate_Promo)

In [585]:
new_columns = {col: col + '_adstock' for col in Adstock_df.columns}
Adstock_df = Adstock_df.rename(columns=new_columns)

In [586]:
df = pd.concat([df, Adstock_df], axis=1)

# Columns as String

In [587]:
df.columns = df.columns.astype(str)

In [588]:
df.columns

Index(['week_start', 'digital_catchuptv_bledina_brand-equity_spends',
       'digital_dataretailers_bledina_brand-equity_spends',
       'digital_directbuying_bledina_brand-equity_spends',
       'digital_keywordtargeting_bledina_brand-equity_spends',
       'digital_nativeads_bledina_brand-equity_spends',
       'digital_programmatic_bledina_brand-equity_spends',
       'digital_sea_bledina_brand-equity_spends',
       'digital_social_bledina_brand-equity_spends',
       'press_equity_bledina_brand-equity_spends',
       'tv_equity_bledina_brand-equity_spends',
       'digital_dataretailers_bledina_brand-range_spends',
       'digital_directbuying_bledina_brand-range_spends',
       'digital_keywordtargeting_bledina_brand-range_spends',
       'digital_programmatic_bledina_brand-range_spends',
       'digital_sea_bledina_brand-range_spends',
       'digital_social_bledina_brand-range_spends',
       'press_product_bledina_brand-range_spends',
       'app_total_bledina_brand_visits', '

In [589]:
df.tail()

Unnamed: 0,week_start,digital_catchuptv_bledina_brand-equity_spends,digital_dataretailers_bledina_brand-equity_spends,digital_directbuying_bledina_brand-equity_spends,digital_keywordtargeting_bledina_brand-equity_spends,digital_nativeads_bledina_brand-equity_spends,digital_programmatic_bledina_brand-equity_spends,digital_sea_bledina_brand-equity_spends,digital_social_bledina_brand-equity_spends,press_equity_bledina_brand-equity_spends,...,digital_dataretailers_bledina_product_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,digital_programmatic_bledina_product_spends_adstock,digital_social_bledina_product_spends_adstock,press_equity_bledina_brand-equity_spends_adstock,tv_equity_bledina_brand-equity_spends_adstock,press_product_bledina_brand-range_spends_adstock,press_product_bledina_product_spends_adstock,tv_product_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock
177,2020-05-25,0.0,0.0,0.0,6562.1435,0.0,5501.142043,2537.86,8923.720012,0.0,...,4.76556e-05,3.001346e-148,4.592549e-78,5.2939089999999995e-78,0.0,2774.357626,-2.798754e-66,-3.1858709999999998e-232,0.0,23619.839876
178,2020-06-01,0.0,0.0,0.0,0.031994,0.0,0.0,2739.21,1075.980003,0.0,...,1.002674e-07,6.314831e-151,9.662723e-81,1.113838e-80,0.0,2343.034793,4.757882e-69,5.415981e-235,0.0,15776.169815
179,2020-06-08,0.0,0.0,0.0,0.0,0.0,0.0,2782.0,3742.977999,0.0,...,2.109626e-10,1.328641e-153,2.033037e-83,2.343516e-83,0.0,3128.587729,-8.088399999999999e-72,-9.207168e-238,0.0,2374.014421
180,2020-06-15,0.0,0.0,0.0,0.0,0.0,0.0,2622.45,4007.615998,0.0,...,380.505,2.79546e-156,4.27751e-86,4.930758e-86,0.0,3127.252289,1.375028e-74,1.565219e-240,0.0,13387.687133
181,2020-06-22,0.0,0.0,0.0,0.031994,0.0,0.0,2422.14,3086.460004,0.0,...,4289.766,5.881647000000001e-159,8.99988e-89,1.037431e-88,0.0,2436.34492,-2.337548e-77,-2.6608720000000003e-243,0.0,23843.046561


# X and Y initialization

In [590]:
X=df.drop(['week_start','Sales','retail_total_bledina_product_volume',
           'retail_total_bledina_product_price',
           'macroeconomic_total_total_total_population',
           'macroeconomic_total_total_total_weeklycases',
           'macroeconomic_total_total_total_unemploymentrate',
           'crm_automatic_bledina_brand_emails',
           'digital_catchuptv_bledina_brand-equity_spends',
           'digital_dataretailers_bledina_brand-equity_spends',
           'digital_directbuying_bledina_brand-equity_spends',
           'digital_keywordtargeting_bledina_brand-equity_spends',
           'digital_nativeads_bledina_brand-equity_spends',
           'digital_programmatic_bledina_brand-equity_spends',
           'digital_sea_bledina_brand-equity_spends',
           'digital_social_bledina_brand-equity_spends',
           'press_equity_bledina_brand-equity_spends',
           'tv_equity_bledina_brand-equity_spends',
           'digital_dataretailers_bledina_brand-range_spends',
           'digital_directbuying_bledina_brand-range_spends',
           'digital_keywordtargeting_bledina_brand-range_spends',
           'digital_programmatic_bledina_brand-range_spends',
           'digital_sea_bledina_brand-range_spends',
           'digital_social_bledina_brand-range_spends',
           'press_product_bledina_brand-range_spends',
           'digital_catchuptv_bledina_product_spends',
           'digital_dataretailers_bledina_product_spends',
           'digital_keywordtargeting_bledina_product_spends',
           'digital_programmatic_bledina_product_spends',
           'digital_social_bledina_product_spends',
           'press_product_bledina_product_spends',
           'tv_product_bledina_product_spends',
           'promo_total_bledina_bledina_spends','competition_ooh_competition-nestle_total_spends',
       'competition_ooh_competition-others_total_spends',
       'competition_press_competition-nestle_total_spends',
       'competition_press_competition-others_total_spends',
       'competition_tv_competition-nestle_total_spends'],axis=1)
y=df['Sales']

In [591]:
O = df.drop(['week_start','Sales','retail_total_bledina_product_volume','retail_total_bledina_product_price','macroeconomic_total_total_total_population'],axis=1)

In [592]:
O.head()

Unnamed: 0,digital_catchuptv_bledina_brand-equity_spends,digital_dataretailers_bledina_brand-equity_spends,digital_directbuying_bledina_brand-equity_spends,digital_keywordtargeting_bledina_brand-equity_spends,digital_nativeads_bledina_brand-equity_spends,digital_programmatic_bledina_brand-equity_spends,digital_sea_bledina_brand-equity_spends,digital_social_bledina_brand-equity_spends,press_equity_bledina_brand-equity_spends,tv_equity_bledina_brand-equity_spends,...,digital_dataretailers_bledina_product_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,digital_programmatic_bledina_product_spends_adstock,digital_social_bledina_product_spends_adstock,press_equity_bledina_brand-equity_spends_adstock,tv_equity_bledina_brand-equity_spends_adstock,press_product_bledina_brand-range_spends_adstock,press_product_bledina_product_spends_adstock,tv_product_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock
0,0.01,0.0,0.01,0.0,2367.4,0.06,16089.49,0.01,0.0,2369.39,...,0.0,0.0,0.0,0.0,0.0,2369.39,0.0,0.0,0.0,7238.542484
1,0.0,0.0,0.01,0.0,2785.65,0.05,11618.44,10034.99,0.0,2470.6,...,0.0,0.0,0.0,0.0,0.0,2466.572037,0.0,0.0,0.0,20489.827713
2,0.0,0.0,0.01,0.0,2708.31,0.08,10388.53,6955.56,0.0,2470.6,...,0.0,0.0,0.0,0.0,0.0,2466.406828,0.0,0.0,0.0,104047.246349
3,0.0,0.0,0.0,0.0,2473.94,0.08,9699.73,4630.01,0.0,2470.6,...,0.0,0.0,0.0,0.0,0.0,2466.407108,0.0,0.0,0.0,67174.44236
4,0.0,0.0,0.01,0.0,618.99,0.01,6182.01,2911.88,0.0,2463.25,...,0.0,0.0,0.0,0.0,0.0,2459.057108,0.0,0.0,0.0,46455.143299


In [593]:
Columns_names = X.columns.tolist()

# Scaling - MINMAX transfrom

In [594]:
X.columns = X.columns.astype(str)

In [595]:
scaler = MinMaxScaler()

In [596]:
scaler.fit(X)

In [597]:
X=scaler.transform(X)

In [598]:
X = pd.DataFrame(X)

In [599]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,46,47,48,49,50,51,52,53,54,55
0,0.000000,0.000000,1.000000,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.008957,0.001697,0.001697,0.00119,0.027287
1,0.000000,0.026748,0.709505,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009305,0.001697,0.001697,0.00119,0.077355
2,0.000000,0.000000,0.633474,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009304,0.001697,0.001697,0.00119,0.393066
3,0.000000,0.000000,0.511875,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009304,0.001697,0.001697,0.00119,0.253747
4,0.000000,0.000000,0.313338,0.092657,0.046649,0.203037,0.635714,0.0,0.348068,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009278,0.001697,0.001697,0.00119,0.175462
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,0.587768,0.000000,0.394958,0.066757,0.060152,0.009851,0.250000,0.0,0.602864,1.0,...,1.706063e-09,1.483903e-151,9.764621e-83,1.147760e-82,0.001697,0.010406,0.001697,0.001697,0.00119,0.089182
178,0.634031,0.159759,0.434708,0.046458,0.021938,0.190081,0.250000,0.5,0.567898,1.0,...,3.589556e-12,3.122131e-154,2.054476e-85,2.414887e-85,0.001697,0.008863,0.001697,0.001697,0.00119,0.059545
179,0.614943,0.159759,0.506666,0.046458,0.021938,0.190081,0.250000,0.0,0.567898,1.0,...,7.552426e-15,6.568964e-157,4.322618e-88,5.080923e-88,0.001697,0.011673,0.001697,0.001697,0.00119,0.008907
180,0.591842,0.159759,0.441729,0.046458,0.021938,0.190081,0.250000,0.0,0.567898,1.0,...,1.362202e-02,1.382110e-159,9.094789e-91,1.069026e-90,0.001697,0.011668,0.001697,0.001697,0.00119,0.050521


In [600]:
X = X.rename(columns=dict(enumerate(Columns_names)))

In [601]:
X

Unnamed: 0,app_total_bledina_brand_visits,crm_event_bledina_brand_emails,website_total_bledina_brand_visits,competition_digital_competition-nestle_total_spends,competition_digital_competition-others_total_spends,competition_tv_competition-others_total_spends,macroeconomic_total_total_total_cci,macroeconomic_total_total_total_holiday,macroeconomic_total_total_total_livebirths,macroeconomic_total_total_total_lockdownflag,...,digital_dataretailers_bledina_product_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,digital_programmatic_bledina_product_spends_adstock,digital_social_bledina_product_spends_adstock,press_equity_bledina_brand-equity_spends_adstock,tv_equity_bledina_brand-equity_spends_adstock,press_product_bledina_brand-range_spends_adstock,press_product_bledina_product_spends_adstock,tv_product_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock
0,0.000000,0.000000,1.000000,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.008957,0.001697,0.001697,0.00119,0.027287
1,0.000000,0.026748,0.709505,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009305,0.001697,0.001697,0.00119,0.077355
2,0.000000,0.000000,0.633474,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009304,0.001697,0.001697,0.00119,0.393066
3,0.000000,0.000000,0.511875,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009304,0.001697,0.001697,0.00119,0.253747
4,0.000000,0.000000,0.313338,0.092657,0.046649,0.203037,0.635714,0.0,0.348068,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009278,0.001697,0.001697,0.00119,0.175462
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,0.587768,0.000000,0.394958,0.066757,0.060152,0.009851,0.250000,0.0,0.602864,1.0,...,1.706063e-09,1.483903e-151,9.764621e-83,1.147760e-82,0.001697,0.010406,0.001697,0.001697,0.00119,0.089182
178,0.634031,0.159759,0.434708,0.046458,0.021938,0.190081,0.250000,0.5,0.567898,1.0,...,3.589556e-12,3.122131e-154,2.054476e-85,2.414887e-85,0.001697,0.008863,0.001697,0.001697,0.00119,0.059545
179,0.614943,0.159759,0.506666,0.046458,0.021938,0.190081,0.250000,0.0,0.567898,1.0,...,7.552426e-15,6.568964e-157,4.322618e-88,5.080923e-88,0.001697,0.011673,0.001697,0.001697,0.00119,0.008907
180,0.591842,0.159759,0.441729,0.046458,0.021938,0.190081,0.250000,0.0,0.567898,1.0,...,1.362202e-02,1.382110e-159,9.094789e-91,1.069026e-90,0.001697,0.011668,0.001697,0.001697,0.00119,0.050521


# Test-Train Split

In [602]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3, random_state=42,shuffle=False)

In [603]:
# Create a linear regression model to use for feature selection
lr = LinearRegression()

# RFE features

In [604]:
rfe = RFE(estimator=lr,n_features_to_select=20, step=1)

rfe.fit(X_train,y_train)

# Get the list of selected features
selected_features = X.columns[rfe.support_]
selected_df = pd.DataFrame({'Selected Features': selected_features})

# Print the dataframe as a table
selected_df

Unnamed: 0,Selected Features
0,crm_event_bledina_brand_emails
1,website_total_bledina_brand_visits
2,competition_digital_competition-nestle_total_s...
3,competition_digital_competition-others_total_s...
4,macroeconomic_total_total_total_livebirths
5,retail_total_bledina_product_dvm
6,competition_retail_competition_nonorganic_price
7,2
8,3
9,11


In [605]:
selected_features

Index(['crm_event_bledina_brand_emails', 'website_total_bledina_brand_visits',
       'competition_digital_competition-nestle_total_spends',
       'competition_digital_competition-others_total_spends',
       'macroeconomic_total_total_total_livebirths',
       'retail_total_bledina_product_dvm',
       'competition_retail_competition_nonorganic_price', '2', '3', '11', '2',
       '3', '4', 'digital_sea_bledina_brand-equity_spends_adstock',
       'digital_programmatic_bledina_brand-range_spends_adstock',
       'digital_sea_bledina_brand-range_spends_adstock',
       'competition_digital_competition-nestle_total_spends_adstock',
       'competition_digital_competition-others_total_spends_adstock',
       'digital_keywordtargeting_bledina_product_spends_adstock',
       'promo_total_bledina_bledina_spends_adstock'],
      dtype='object')

In [606]:
n = selected_features.tolist()

In [607]:
n

['crm_event_bledina_brand_emails',
 'website_total_bledina_brand_visits',
 'competition_digital_competition-nestle_total_spends',
 'competition_digital_competition-others_total_spends',
 'macroeconomic_total_total_total_livebirths',
 'retail_total_bledina_product_dvm',
 'competition_retail_competition_nonorganic_price',
 '2',
 '3',
 '11',
 '2',
 '3',
 '4',
 'digital_sea_bledina_brand-equity_spends_adstock',
 'digital_programmatic_bledina_brand-range_spends_adstock',
 'digital_sea_bledina_brand-range_spends_adstock',
 'competition_digital_competition-nestle_total_spends_adstock',
 'competition_digital_competition-others_total_spends_adstock',
 'digital_keywordtargeting_bledina_product_spends_adstock',
 'promo_total_bledina_bledina_spends_adstock']

In [608]:
list(zip(X_train.columns,rfe.support_,rfe.ranking_))

[('app_total_bledina_brand_visits', False, 31),
 ('crm_event_bledina_brand_emails', True, 1),
 ('website_total_bledina_brand_visits', True, 1),
 ('competition_digital_competition-nestle_total_spends', True, 1),
 ('competition_digital_competition-others_total_spends', True, 1),
 ('competition_tv_competition-others_total_spends', False, 9),
 ('macroeconomic_total_total_total_cci', False, 29),
 ('macroeconomic_total_total_total_holiday', False, 14),
 ('macroeconomic_total_total_total_livebirths', True, 1),
 ('macroeconomic_total_total_total_lockdownflag', False, 35),
 ('macroeconomic_total_total_total_weeklydeath', False, 33),
 ('retail_total_bledina_product_dvm', True, 1),
 ('competition_retail_competition_nonorganic_dvm', False, 7),
 ('competition_retail_competition_nonorganic_price', True, 1),
 ('2', True, 1),
 ('3', True, 1),
 ('4', False, 3),
 ('5', False, 25),
 ('6', False, 5),
 ('7', False, 17),
 ('8', False, 16),
 ('9', False, 12),
 ('10', False, 11),
 ('11', True, 1),
 ('12', Fal

In [609]:
n = selected_features.tolist()

In [610]:
n

['crm_event_bledina_brand_emails',
 'website_total_bledina_brand_visits',
 'competition_digital_competition-nestle_total_spends',
 'competition_digital_competition-others_total_spends',
 'macroeconomic_total_total_total_livebirths',
 'retail_total_bledina_product_dvm',
 'competition_retail_competition_nonorganic_price',
 '2',
 '3',
 '11',
 '2',
 '3',
 '4',
 'digital_sea_bledina_brand-equity_spends_adstock',
 'digital_programmatic_bledina_brand-range_spends_adstock',
 'digital_sea_bledina_brand-range_spends_adstock',
 'competition_digital_competition-nestle_total_spends_adstock',
 'competition_digital_competition-others_total_spends_adstock',
 'digital_keywordtargeting_bledina_product_spends_adstock',
 'promo_total_bledina_bledina_spends_adstock']

In [611]:
X_train.columns

Index(['app_total_bledina_brand_visits', 'crm_event_bledina_brand_emails',
       'website_total_bledina_brand_visits',
       'competition_digital_competition-nestle_total_spends',
       'competition_digital_competition-others_total_spends',
       'competition_tv_competition-others_total_spends',
       'macroeconomic_total_total_total_cci',
       'macroeconomic_total_total_total_holiday',
       'macroeconomic_total_total_total_livebirths',
       'macroeconomic_total_total_total_lockdownflag',
       'macroeconomic_total_total_total_weeklydeath',
       'retail_total_bledina_product_dvm',
       'competition_retail_competition_nonorganic_dvm',
       'competition_retail_competition_nonorganic_price', '2', '3', '4', '5',
       '6', '7', '8', '9', '10', '11', '12', '2', '3', '4', '5',
       'digital_catchuptv_bledina_brand-equity_spends_adstock',
       'digital_dataretailers_bledina_brand-equity_spends_adstock',
       'digital_directbuying_bledina_brand-equity_spends_adstock',


In [612]:
X_train

Unnamed: 0,app_total_bledina_brand_visits,crm_event_bledina_brand_emails,website_total_bledina_brand_visits,competition_digital_competition-nestle_total_spends,competition_digital_competition-others_total_spends,competition_tv_competition-others_total_spends,macroeconomic_total_total_total_cci,macroeconomic_total_total_total_holiday,macroeconomic_total_total_total_livebirths,macroeconomic_total_total_total_lockdownflag,...,digital_dataretailers_bledina_product_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,digital_programmatic_bledina_product_spends_adstock,digital_social_bledina_product_spends_adstock,press_equity_bledina_brand-equity_spends_adstock,tv_equity_bledina_brand-equity_spends_adstock,press_product_bledina_brand-range_spends_adstock,press_product_bledina_product_spends_adstock,tv_product_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock
0,0.0,0.000000,1.000000,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.008957,0.001697,0.001697,0.00119,0.027287
1,0.0,0.026748,0.709505,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009305,0.001697,0.001697,0.00119,0.077355
2,0.0,0.000000,0.633474,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009304,0.001697,0.001697,0.00119,0.393066
3,0.0,0.000000,0.511875,0.000000,0.084921,0.335121,0.600000,0.0,0.644009,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009304,0.001697,0.001697,0.00119,0.253747
4,0.0,0.000000,0.313338,0.092657,0.046649,0.203037,0.635714,0.0,0.348068,0.0,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.001697,0.009278,0.001697,0.001697,0.00119,0.175462
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,0.0,0.000000,0.319052,0.000000,0.219378,0.855345,0.550000,0.5,0.617935,0.0,...,3.947142e-07,2.534486e-04,1.653904e-04,1.188700e-05,0.001697,0.007169,0.001697,0.001697,0.00119,0.016496
123,0.0,0.000000,0.347662,0.000000,0.219378,0.855345,0.550000,0.0,0.617935,0.0,...,8.304787e-10,5.332559e-07,3.479815e-07,2.501024e-08,0.001697,0.008509,0.001697,0.001697,0.00119,0.008136
124,0.0,0.279544,0.305220,0.000000,0.219378,0.855345,0.550000,0.0,0.617935,0.0,...,1.747327e-12,1.121970e-09,7.321531e-10,5.262155e-11,0.001697,0.008507,0.001697,0.001697,0.00119,0.017267
125,0.0,0.000000,0.265300,0.000000,0.202664,0.896675,0.578571,0.5,0.611476,0.0,...,3.676376e-15,2.360626e-12,1.540450e-12,1.107157e-13,0.001697,0.008507,0.001697,0.001697,0.00119,0.013063


In [613]:
X_train = X_train[selected_features]

# Adding Values

In [614]:
X_train['constant']=1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['constant']=1


In [615]:
X_train

Unnamed: 0,crm_event_bledina_brand_emails,website_total_bledina_brand_visits,competition_digital_competition-nestle_total_spends,competition_digital_competition-others_total_spends,macroeconomic_total_total_total_livebirths,retail_total_bledina_product_dvm,competition_retail_competition_nonorganic_price,2,2.1,3,...,4,4.1,digital_sea_bledina_brand-equity_spends_adstock,digital_programmatic_bledina_brand-range_spends_adstock,digital_sea_bledina_brand-range_spends_adstock,competition_digital_competition-nestle_total_spends_adstock,competition_digital_competition-others_total_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock,constant
0,0.000000,1.000000,0.000000,0.084921,0.644009,1.000000,0.195811,0.0,0.0,0.0,...,0.0,0.0,1.000000,0.000000e+00,0.000000,0.000000e+00,0.084742,0.000000e+00,0.027287,1
1,0.026748,0.709505,0.000000,0.084921,0.644009,0.725928,0.193077,0.0,1.0,0.0,...,0.0,0.0,0.724218,0.000000e+00,0.000000,0.000000e+00,0.084920,0.000000e+00,0.077355,1
2,0.000000,0.633474,0.000000,0.084921,0.644009,0.805448,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.647196,7.344175e-07,0.000000,0.000000e+00,0.084921,0.000000e+00,0.393066,1
3,0.000000,0.511875,0.000000,0.084921,0.644009,0.765664,0.261707,0.0,0.0,0.0,...,0.0,1.0,0.604223,7.359627e-07,0.000000,0.000000e+00,0.084921,0.000000e+00,0.253747,1
4,0.000000,0.313338,0.092657,0.046649,0.348068,0.837492,0.393876,0.0,0.0,0.0,...,0.0,0.0,0.385498,1.548465e-09,0.000000,9.246200e-02,0.046729,0.000000e+00,0.175462,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,0.000000,0.319052,0.000000,0.219378,0.617935,0.372520,0.769788,0.0,0.0,0.0,...,0.0,0.0,0.340445,8.014969e-128,0.165340,7.720201e-52,0.219463,2.534486e-04,0.016496,1
123,0.000000,0.347662,0.000000,0.219378,0.617935,0.287074,0.619902,0.0,1.0,0.0,...,0.0,0.0,0.379291,1.686350e-130,0.142254,1.624330e-54,0.219379,5.332559e-07,0.008136,1
124,0.279544,0.305220,0.000000,0.219378,0.617935,0.149383,0.626127,0.0,0.0,0.0,...,0.0,0.0,0.365159,3.548080e-133,0.107421,3.417591e-57,0.219378,1.121970e-09,0.017267,1
125,0.000000,0.265300,0.000000,0.202664,0.611476,0.221048,0.658446,0.0,0.0,0.0,...,0.0,1.0,0.384565,7.465159e-136,0.098164,7.190611e-60,0.202699,2.360626e-12,0.013063,1


In [616]:
y_train = pd.DataFrame(y_train)

In [617]:
y_train

Unnamed: 0,Sales
0,6886165.0
1,5875465.0
2,5897865.0
3,5431465.0
4,5497440.0
...,...
122,4671190.0
123,4396190.0
124,4487690.0
125,4924492.0


In [618]:
model = sm.OLS(y_train, X_train).fit()

In [619]:
y_train

Unnamed: 0,Sales
0,6886165.0
1,5875465.0
2,5897865.0
3,5431465.0
4,5497440.0
...,...
122,4671190.0
123,4396190.0
124,4487690.0
125,4924492.0


In [620]:
X_train

Unnamed: 0,crm_event_bledina_brand_emails,website_total_bledina_brand_visits,competition_digital_competition-nestle_total_spends,competition_digital_competition-others_total_spends,macroeconomic_total_total_total_livebirths,retail_total_bledina_product_dvm,competition_retail_competition_nonorganic_price,2,2.1,3,...,4,4.1,digital_sea_bledina_brand-equity_spends_adstock,digital_programmatic_bledina_brand-range_spends_adstock,digital_sea_bledina_brand-range_spends_adstock,competition_digital_competition-nestle_total_spends_adstock,competition_digital_competition-others_total_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock,constant
0,0.000000,1.000000,0.000000,0.084921,0.644009,1.000000,0.195811,0.0,0.0,0.0,...,0.0,0.0,1.000000,0.000000e+00,0.000000,0.000000e+00,0.084742,0.000000e+00,0.027287,1
1,0.026748,0.709505,0.000000,0.084921,0.644009,0.725928,0.193077,0.0,1.0,0.0,...,0.0,0.0,0.724218,0.000000e+00,0.000000,0.000000e+00,0.084920,0.000000e+00,0.077355,1
2,0.000000,0.633474,0.000000,0.084921,0.644009,0.805448,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.647196,7.344175e-07,0.000000,0.000000e+00,0.084921,0.000000e+00,0.393066,1
3,0.000000,0.511875,0.000000,0.084921,0.644009,0.765664,0.261707,0.0,0.0,0.0,...,0.0,1.0,0.604223,7.359627e-07,0.000000,0.000000e+00,0.084921,0.000000e+00,0.253747,1
4,0.000000,0.313338,0.092657,0.046649,0.348068,0.837492,0.393876,0.0,0.0,0.0,...,0.0,0.0,0.385498,1.548465e-09,0.000000,9.246200e-02,0.046729,0.000000e+00,0.175462,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,0.000000,0.319052,0.000000,0.219378,0.617935,0.372520,0.769788,0.0,0.0,0.0,...,0.0,0.0,0.340445,8.014969e-128,0.165340,7.720201e-52,0.219463,2.534486e-04,0.016496,1
123,0.000000,0.347662,0.000000,0.219378,0.617935,0.287074,0.619902,0.0,1.0,0.0,...,0.0,0.0,0.379291,1.686350e-130,0.142254,1.624330e-54,0.219379,5.332559e-07,0.008136,1
124,0.279544,0.305220,0.000000,0.219378,0.617935,0.149383,0.626127,0.0,0.0,0.0,...,0.0,0.0,0.365159,3.548080e-133,0.107421,3.417591e-57,0.219378,1.121970e-09,0.017267,1
125,0.000000,0.265300,0.000000,0.202664,0.611476,0.221048,0.658446,0.0,0.0,0.0,...,0.0,1.0,0.384565,7.465159e-136,0.098164,7.190611e-60,0.202699,2.360626e-12,0.013063,1


In [621]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.730
Model:                            OLS   Adj. R-squared:                  0.676
Method:                 Least Squares   F-statistic:                     13.50
Date:                Tue, 25 Apr 2023   Prob (F-statistic):           3.73e-21
Time:                        00:15:58   Log-Likelihood:                -1755.8
No. Observations:                 127   AIC:                             3556.
Df Residuals:                     105   BIC:                             3618.
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------

In [622]:
X_train

Unnamed: 0,crm_event_bledina_brand_emails,website_total_bledina_brand_visits,competition_digital_competition-nestle_total_spends,competition_digital_competition-others_total_spends,macroeconomic_total_total_total_livebirths,retail_total_bledina_product_dvm,competition_retail_competition_nonorganic_price,2,2.1,3,...,4,4.1,digital_sea_bledina_brand-equity_spends_adstock,digital_programmatic_bledina_brand-range_spends_adstock,digital_sea_bledina_brand-range_spends_adstock,competition_digital_competition-nestle_total_spends_adstock,competition_digital_competition-others_total_spends_adstock,digital_keywordtargeting_bledina_product_spends_adstock,promo_total_bledina_bledina_spends_adstock,constant
0,0.000000,1.000000,0.000000,0.084921,0.644009,1.000000,0.195811,0.0,0.0,0.0,...,0.0,0.0,1.000000,0.000000e+00,0.000000,0.000000e+00,0.084742,0.000000e+00,0.027287,1
1,0.026748,0.709505,0.000000,0.084921,0.644009,0.725928,0.193077,0.0,1.0,0.0,...,0.0,0.0,0.724218,0.000000e+00,0.000000,0.000000e+00,0.084920,0.000000e+00,0.077355,1
2,0.000000,0.633474,0.000000,0.084921,0.644009,0.805448,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.647196,7.344175e-07,0.000000,0.000000e+00,0.084921,0.000000e+00,0.393066,1
3,0.000000,0.511875,0.000000,0.084921,0.644009,0.765664,0.261707,0.0,0.0,0.0,...,0.0,1.0,0.604223,7.359627e-07,0.000000,0.000000e+00,0.084921,0.000000e+00,0.253747,1
4,0.000000,0.313338,0.092657,0.046649,0.348068,0.837492,0.393876,0.0,0.0,0.0,...,0.0,0.0,0.385498,1.548465e-09,0.000000,9.246200e-02,0.046729,0.000000e+00,0.175462,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,0.000000,0.319052,0.000000,0.219378,0.617935,0.372520,0.769788,0.0,0.0,0.0,...,0.0,0.0,0.340445,8.014969e-128,0.165340,7.720201e-52,0.219463,2.534486e-04,0.016496,1
123,0.000000,0.347662,0.000000,0.219378,0.617935,0.287074,0.619902,0.0,1.0,0.0,...,0.0,0.0,0.379291,1.686350e-130,0.142254,1.624330e-54,0.219379,5.332559e-07,0.008136,1
124,0.279544,0.305220,0.000000,0.219378,0.617935,0.149383,0.626127,0.0,0.0,0.0,...,0.0,0.0,0.365159,3.548080e-133,0.107421,3.417591e-57,0.219378,1.121970e-09,0.017267,1
125,0.000000,0.265300,0.000000,0.202664,0.611476,0.221048,0.658446,0.0,0.0,0.0,...,0.0,1.0,0.384565,7.465159e-136,0.098164,7.190611e-60,0.202699,2.360626e-12,0.013063,1


In [623]:
y_train

Unnamed: 0,Sales
0,6886165.0
1,5875465.0
2,5897865.0
3,5431465.0
4,5497440.0
...,...
122,4671190.0
123,4396190.0
124,4487690.0
125,4924492.0


In [624]:
X_test = X_test[selected_features]

In [625]:
X_test['constant']=1

In [626]:
model2 = sm.OLS(y_test, X_test).fit()

In [627]:
print(model2.summary())

                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.797
Model:                            OLS   Adj. R-squared:                  0.678
Method:                 Least Squares   F-statistic:                     6.672
Date:                Tue, 25 Apr 2023   Prob (F-statistic):           7.85e-07
Time:                        00:15:58   Log-Likelihood:                -760.07
No. Observations:                  55   AIC:                             1562.
Df Residuals:                      34   BIC:                             1604.
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                                                                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------