# Promotions

In [162]:
import pandas as pd
import numpy as np

In [163]:
promotions = pd.read_csv('../../data/01_raw/promotions.csv')

## Exploratory analysis

In [164]:
promotions.head()

Unnamed: 0,prod_code,customer,location,category,time_var,promo_type
0,8684_05,ALDI,sansebastian,snack,2022-09-26T00:00:00.000+0000,
1,8684_05,ALDI,sansebastian,snack,2022-12-26T00:00:00.000+0000,-
2,8684_05,ALDI,sansebastian,snack,2023-04-03T00:00:00.000+0000,-
3,8684_05,ALDI,sansebastian,snack,2021-04-12T00:00:00.000+0000,3x2
4,8684_05,ALDI,sansebastian,snack,2021-11-22T00:00:00.000+0000,


In [165]:
promotions.dtypes

prod_code     object
customer      object
location      object
category      object
time_var      object
promo_type    object
dtype: object

In [188]:
promotions["time_var"] = pd.to_datetime(promotions['time_var']) # Change to timestamp

In [166]:
promotions.shape

(5370, 6)

In [167]:
promotions = promotions.drop_duplicates()

In [168]:
promotions

Unnamed: 0,prod_code,customer,location,category,time_var,promo_type
0,8684_05,ALDI,sansebastian,snack,2022-09-26T00:00:00.000+0000,
1,8684_05,ALDI,sansebastian,snack,2022-12-26T00:00:00.000+0000,-
2,8684_05,ALDI,sansebastian,snack,2023-04-03T00:00:00.000+0000,-
3,8684_05,ALDI,sansebastian,snack,2021-04-12T00:00:00.000+0000,3x2
4,8684_05,ALDI,sansebastian,snack,2021-11-22T00:00:00.000+0000,
...,...,...,...,...,...,...
5365,7196_04,aldi,Vitoria,beverage,2023-03-20T00:00:00.000+0000,-
5366,7196_04,aldi,Vitoria,beverage,2020-11-16T00:00:00.000+0000,-
5367,7196_04,aldi,Vitoria,beverage,2021-02-22T00:00:00.000+0000,-
5368,7196_04,aldi,Vitoria,beverage,2020-12-07T00:00:00.000+0000,-


## Checks

### customers

In [169]:
promotions["customer"].unique()

array(['ALDI', 'lidl', 'LIDL', 'bm', 'EROSKI', 'aldi', 'Eroski', 'BM',
       'Mercadona', 'eroski'], dtype=object)

In [170]:
for customer in promotions["customer"].unique():
    promotions.loc[promotions["customer"] == customer, "customer"] = customer.upper()

In [171]:
promotions["customer"].unique()

array(['ALDI', 'LIDL', 'BM', 'EROSKI', 'MERCADONA'], dtype=object)

### locations

In [172]:
promotions["location"].unique()

array(['sansebastian', 'Vitoria', 'bilbao'], dtype=object)

### promo type

In [173]:
promotions["promo_type"].unique()

array([nan, '-', '3x2', 'menos 50%', '10% descuento', '10% desc',
       'tres por dos', '20%', 'goodie'], dtype=object)

In [174]:
promotions.loc[promotions["promo_type"] == '-', "promo_type"] = np.nan

In [175]:
promotions.loc[(promotions["promo_type"] == '10% descuento') | (promotions["promo_type"] == "10% desc"), "promo_type"] = "d10%"
promotions.loc[promotions["promo_type"] == 'menos 50%', "promo_type"] = "d50%"
promotions.loc[promotions["promo_type"] == '20%', "promo_type"] = "d20%"
promotions.loc[(promotions["promo_type"] == 'tres por dos') | (promotions["promo_type"] == '3x2'), "promo_type"] = "p3x2"

In [176]:
promotions["promo_type"].unique()

array([nan, 'p3x2', 'd50%', 'd10%', 'd20%', 'goodie'], dtype=object)

## Generate new features

In [177]:
for disscount in promotions["promo_type"].unique():
    if disscount != np.nan:
        promotions[str(disscount)] = promotions["promo_type"].apply(lambda x: 1 if x == disscount else 0) 

In [178]:
promotions = promotions.dropna()

In [179]:
promotions = promotions.drop_duplicates()

In [180]:
promotions["is_national"] = promotions["customer"].apply(lambda x: 1 if x in ["BM","EROSKI","MERCADONA"] else 0)
promotions["is_basque"] = promotions["customer"].apply(lambda x: 1 if x in ["BM","EROSKI"] else 0)

In [181]:
promotions = promotions.drop("nan",axis=1)

In [182]:
promotions["promo_type_aggregated"] = promotions["promo_type"]

for promo in promotions["promo_type"].unique():
    prefix_promo = promo[0]
    if prefix_promo == "d":
        promotions.loc[promotions["promo_type"] == promo, "promo_type_aggregated"] = "disscount"

In [183]:
promotions

Unnamed: 0,prod_code,customer,location,category,time_var,promo_type,p3x2,d50%,d10%,d20%,goodie,is_national,is_basque,promo_type_aggregated
3,8684_05,ALDI,sansebastian,snack,2021-04-12T00:00:00.000+0000,p3x2,1,0,0,0,0,0,0,p3x2
17,8684_05,ALDI,sansebastian,snack,2022-02-07T00:00:00.000+0000,d50%,0,1,0,0,0,0,0,disscount
21,8684_05,ALDI,sansebastian,snack,2021-05-03T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
27,8684_05,ALDI,sansebastian,snack,2022-02-14T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
35,8684_05,ALDI,sansebastian,snack,2021-03-08T00:00:00.000+0000,p3x2,1,0,0,0,0,0,0,p3x2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5325,7196_04,ALDI,Vitoria,beverage,2022-10-24T00:00:00.000+0000,p3x2,1,0,0,0,0,0,0,p3x2
5332,7196_04,ALDI,Vitoria,beverage,2022-05-30T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
5338,7196_04,ALDI,Vitoria,beverage,2023-01-30T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
5342,7196_04,ALDI,Vitoria,beverage,2022-05-23T00:00:00.000+0000,d50%,0,1,0,0,0,0,0,disscount


In [184]:
promotions.columns

Index(['prod_code', 'customer', 'location', 'category', 'time_var',
       'promo_type', 'p3x2', 'd50%', 'd10%', 'd20%', 'goodie', 'is_national',
       'is_basque', 'promo_type_aggregated'],
      dtype='object')

In [185]:
promotions

Unnamed: 0,prod_code,customer,location,category,time_var,promo_type,p3x2,d50%,d10%,d20%,goodie,is_national,is_basque,promo_type_aggregated
3,8684_05,ALDI,sansebastian,snack,2021-04-12T00:00:00.000+0000,p3x2,1,0,0,0,0,0,0,p3x2
17,8684_05,ALDI,sansebastian,snack,2022-02-07T00:00:00.000+0000,d50%,0,1,0,0,0,0,0,disscount
21,8684_05,ALDI,sansebastian,snack,2021-05-03T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
27,8684_05,ALDI,sansebastian,snack,2022-02-14T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
35,8684_05,ALDI,sansebastian,snack,2021-03-08T00:00:00.000+0000,p3x2,1,0,0,0,0,0,0,p3x2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5325,7196_04,ALDI,Vitoria,beverage,2022-10-24T00:00:00.000+0000,p3x2,1,0,0,0,0,0,0,p3x2
5332,7196_04,ALDI,Vitoria,beverage,2022-05-30T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
5338,7196_04,ALDI,Vitoria,beverage,2023-01-30T00:00:00.000+0000,d10%,0,0,1,0,0,0,0,disscount
5342,7196_04,ALDI,Vitoria,beverage,2022-05-23T00:00:00.000+0000,d50%,0,1,0,0,0,0,0,disscount
