In [None]:
# data analysis and wrangling
from datetime import datetime
import pandas as pd
import numpy as np
import statistics
from itertools import chain
# visualization
from IPython.core.display import HTML
from pandas.plotting import scatter_matrix
from statsmodels.graphics.tsaplots import plot_acf
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from pprint import pprint
%matplotlib inline
import plotly.io as pio
pio.renderers.default='notebook'
#plotly
import plotly.io as pio
import plotly.express as px
from plotly.offline import download_plotlyjs,init_notebook_mode, plot, iplot
import plotly as py 
import plotly.graph_objs as go # plotly graphical object
# setting the general visualization style
sns.set_style('whitegrid')
# feature engineering
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
#Libraries for Statistical Models
import statsmodels.api as sm
# ignoring warnings in the notebook
import warnings 
warnings.filterwarnings('ignore') 
# To display full output 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
def plotly(name, mode):
    if mode == 1:
        fig = px.line(df_sku[df_sku['full_name']== name],
                  x='date', y='price_per_unit', title='Price_per_unit for '+ name,
                  color='channel', template="none")

        fig.update_xaxes(
            rangeslider_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
        fig.show();
        fig = px.line(df_sku[df_sku['full_name']==name],
                      x='date', y='Sales', title='Sales for '+ name,
                      color='channel', template="none")

        fig.update_xaxes(
            rangeslider_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
        fig.show();

        fig = px.line(df_sku[df_sku['full_name']==name],
                      x='date', y='units_sold', title='units_sold for '+ name,
                      color='channel', template="none")

        fig.update_xaxes(
            rangeslider_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
        fig.show();
    else:
        fig = px.line(df_sku[df_sku['full_name']== name],
              x='date', y='price_per_unit', title='Price_per_units for '+ name,
              color='channel', template="none")

        fig.update_xaxes(
            rangeslider_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
        fig.show();


In [None]:
def plotly1(name, mode):
    fig = px.line(df_sku[df_sku['full_name']== name],
              x='date', y='price_per_unit', title= mode,
              color='channel', template="none")
    fig.update_xaxes(
        rangeslider_visible=True,
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        )
    )
    fig.show();

In [None]:
def plot(data):
    ax = sns.lineplot(x=data[df_sku['full_name'] == name]['date'],
                     y=df_sku[df_sku['full_name'] == name]['price_per_unit'],
                     hue=df_sku[df_sku['full_name'] == name]['channel'])
    #plt.figure(figsize=[20, 10]) # Set dimensions for figure
    ax.set_title(name) #set title;
    plt.show();

# whether is a promo/promo length/frequency labeling

In [None]:
display(HTML(labels.to_html()))

In [None]:
labels['full_name'].drop_duplicates().count()

In [None]:
df_sku = pd.read_csv("/project/data_cleaning/imputed2.csv",index_col=0)
labels = pd.read_csv("/project/data_for_models/promo_label_new.csv")
labels = labels.drop(['Unnamed: 0'],axis=1)
df_sku = df_sku[df_sku['full_name'].isin(labels['full_name'].drop_duplicates().values.tolist())]
df_labels = pd.merge(df_sku, labels, on=['full_name','channel'], how='left')
df_labels.to_excel("/project/data_for_models/df_labels.xlsx", sheet_name='Sheet1')
df_labels

In [None]:
plotly('BSCTS RCH T MCVTS RCH T PLN 300 GM SNGL',1)

In [None]:
plotly1('JFF CKS DRK CHCLT & ORNG 244 GM SNGL','Figure 1: Price Movement for McVitie\'s Rich Tea Plain 300g Single Pack')

In [None]:
df_labeled = pd.read_csv("/project/data_for_models/df_promo_labeled_new.csv",index_col=0)
df_labeled = df_labeled.drop(['units_sold','kg_sold','treshold','change',
             '2nd','Break','Cum.promo1','Cum.promo2','Cum.promo3','Cum.promo4','Cum.promo5','Cum.promo6'
             ,'Cum.promo7','Cum.promo8','Cum.promo9','Cum.promo10','Cum.promo11','Cum.promo12','Cum.promo13','Cum.promo14'
             ,'Cum.promo15','Cum.promo16','Cum.promo17','Cum.promo18','Cum.promo19','Cum.promo20','Cum.promo21','Cum.promo22'
             ,'Cum.promo23'],axis=1)
#df_labeled['date'] = pd.to_datetime(df_labeled['date'])
column_names = ["full_name", "brand", "company",'Subsegment','flavour','pack_type',
                'weight','channel','retailer','format','date','Year','WeekOfYear','Month',
                'Sales','distribution','price_per_kg','price_per_unit','promo','Cum.promo.total']
df_labeled = df_labeled.reindex(columns=column_names)
df_labeled.describe()
df_labeled.info()
df_labeled.to_excel("/project/data_for_models/df_labeled_new.xlsx", sheet_name='Sheet1')
df_labels.drop_duplicates()

# promo depth

In [None]:
#df_labeled = pd.read_csv("/project/data_for_models/df_labeled.csv",index_col=0)
promo_label_depth = pd.read_csv("/project/data_for_models/promo_label_depth_new.csv",index_col=0)
depth = pd.merge(df_labeled, promo_label_depth, on=['full_name','channel'], how='left')
depth.to_excel("/project/data_for_models/depth.xlsx")  

In [None]:
depth_final = pd.read_csv("/project/data_for_models/depth_new.csv",index_col=0)
depth_final = depth_final.drop(['mannual','base0','break_total','date_break1',
             'base1','date_break2','base2','date_break3','base3','date_break4','base4'],axis=1)
depth_final['date'] = pd.to_datetime(depth_final['date'])
depth_final.loc[depth_final.promo==1,'Promo_depth'] = (depth_final['price_per_unit']-depth_final['base_price1'])/depth_final['base_price1']
#depth_final.drop_duplicates()
depth_final.to_csv("/project/data_for_models/df_final.csv") 

In [None]:
depth_final.info()
depth_final.describe()