In [1282]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler,MinMaxScaler


import warnings
warnings.filterwarnings("ignore")

In [1283]:
df = pd.read_csv("Output/Df_new.csv")
print(df.shape)

(420212, 16)


In [1284]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420212 entries, 0 to 420211
Data columns (total 16 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Store         420212 non-null  int64  
 1   Dept          420212 non-null  int64  
 2   Date          420212 non-null  object 
 3   Weekly_Sales  420212 non-null  float64
 4   IsHoliday     420212 non-null  bool   
 5   Temperature   420212 non-null  float64
 6   Fuel_Price    420212 non-null  float64
 7   MarkDown1     420212 non-null  float64
 8   MarkDown2     420212 non-null  float64
 9   MarkDown3     420212 non-null  float64
 10  MarkDown4     420212 non-null  float64
 11  MarkDown5     420212 non-null  float64
 12  CPI           420212 non-null  float64
 13  Unemployment  420212 non-null  float64
 14  Type          420212 non-null  object 
 15  Size          420212 non-null  int64  
dtypes: bool(1), float64(10), int64(3), object(2)
memory usage: 48.5+ MB


Lojas e Departamentos

In [1285]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

avg_sales_per_store_department = df.groupby(['Date'])['Weekly_Sales'].mean().reset_index()

fig.add_trace(go.Scatter(x=avg_sales_per_store_department['Date'], y=avg_sales_per_store_department['Weekly_Sales'], mode='lines', name='Quantidade de Vendas da Semana'))
fig.add_trace(go.Scatter(x=avg_sales_per_store_department['Date'], y=avg_sales_per_store_department['Weekly_Sales'], mode='markers', name='Semana', marker=dict(color='red', size=8)))

# Adicionar título e rótulos dos eixos
fig.update_layout(title='Media do Volume de Vendas Semanais ao Longo do Tempo',
                   xaxis_title='Data',
                   yaxis_title='Vendas Semanais')

fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=30,
                     label="1M",
                     step="day",
                     stepmode="todate"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)


fig.show()

In [1286]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

avg_sales_per_store = df.groupby(['Store'])['Weekly_Sales'].mean().reset_index()

# Use the hovertext kw argument for hover text
fig = go.Figure(data=[go.Bar(x=avg_sales_per_store['Store'], y=avg_sales_per_store['Weekly_Sales'], name='Volume Medio de Vendas por Loja')])

media_vendas = avg_sales_per_store['Weekly_Sales'].mean()
fig.add_hline(y=media_vendas, line_dash='dash', line_color='red', annotation_text=f'Média: {media_vendas:.2f}', annotation_position='bottom right')


fig.update_layout(title_text='Volume Medio de Vendas por Loja', xaxis_title='Numero da Loja',
                   yaxis_title='Volume Medio de Vendas')

fig.show()

In [1287]:
Melhores_Lojas = avg_sales_per_store.loc[avg_sales_per_store['Weekly_Sales']>= media_vendas]

In [1288]:
Melhores_Lojas.nunique()

Store           19
Weekly_Sales    19
dtype: int64

In [1289]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(Melhores_Lojas.columns),  
                align='left'),
    cells=dict(values=[Melhores_Lojas['Store'], Melhores_Lojas['Weekly_Sales']],
               align='left'))
])

# Definir os botões de seleção para ordenar as lojas
dropdown_buttons = [
    dict(label='Menor para Maior',
         method='restyle',
         args=[{'cells.values[0]': [Melhores_Lojas['Store'].iloc[Melhores_Lojas['Weekly_Sales'].argsort()]],
                'cells.values[1]': [Melhores_Lojas['Weekly_Sales'].sort_values()]}]),
    dict(label='Maior para Menor',
         method='restyle',
         args=[{'cells.values[0]': [Melhores_Lojas['Store'].iloc[Melhores_Lojas['Weekly_Sales'].argsort()[::-1]]],
                'cells.values[1]': [Melhores_Lojas['Weekly_Sales'].sort_values(ascending=False)]}])
]

# Adicionar botões de seleção ao layout
fig.update_layout(
    updatemenus=[
        dict(
            buttons=dropdown_buttons,
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0.87,
            xanchor='left',
            y=1.2,
            yanchor='top'
        ),
    ]
)

# Adicionar título e rótulos dos eixos
fig.update_layout(title_text='Lojas com Maior Volume Médio de Vendas', xaxis_title='Número da Loja',
                   yaxis_title='Volume Médio de Vendas')

# Exibir o gráfico
fig.show()

In [1290]:
Stores = Melhores_Lojas['Store'].unique()

In [1291]:
Stores

array([ 1,  2,  4,  6, 10, 11, 13, 14, 18, 19, 20, 23, 24, 27, 28, 31, 32,
       39, 41])

In [1292]:
# Extrair todas as linhas que correspondem às lojas na lista
Stores_info = df.loc[df['Store'].isin(Stores)]

In [1293]:
Stores_info.shape

(192433, 16)

In [1294]:
Stores_info['Store'].unique()

array([ 1,  2,  4,  6, 10, 11, 13, 14, 18, 19, 20, 23, 24, 27, 28, 31, 32,
       39, 41])

In [1295]:
Stores_info.columns

Index(['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday', 'Temperature',
       'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4',
       'MarkDown5', 'CPI', 'Unemployment', 'Type', 'Size'],
      dtype='object')

In [1296]:
avg_sales_per_store_department = Stores_info.groupby(['Store','Dept'])['Weekly_Sales'].mean().reset_index()

In [1297]:
avg_sales_per_store_department

Unnamed: 0,Store,Dept,Weekly_Sales
0,1,1,22513.322937
1,1,2,46102.090420
2,1,3,13150.478042
3,1,4,36964.154476
4,1,5,24257.941119
...,...,...,...
1465,41,95,88666.468392
1466,41,96,2883.492238
1467,41,97,19789.219231
1468,41,98,9371.531608


In [1298]:
avg_sales_per_store_department.nunique()

Store             19
Dept              80
Weekly_Sales    1468
dtype: int64

In [1299]:
avg_sales_per_store_department['Store'].unique()

array([ 1,  2,  4,  6, 10, 11, 13, 14, 18, 19, 20, 23, 24, 27, 28, 31, 32,
       39, 41])

In [1300]:
lojas_unicas = avg_sales_per_store_department['Store'].unique()

fig = go.Figure(data=[go.Table(
    header=dict(values=list(avg_sales_per_store_department.columns),  
                align='left'),
    cells=dict(values=[avg_sales_per_store_department['Store'],avg_sales_per_store_department['Dept'], avg_sales_per_store_department['Weekly_Sales']],
               align='left'))
])

# Definir os botões de seleção para ordenar as lojas
dropdown_buttons = [
    dict(label='Menor para Maior',
         method='restyle',
         args=[{'cells.values[0]': [avg_sales_per_store_department['Store'].iloc[avg_sales_per_store_department['Weekly_Sales'].argsort()]],
                'cells.values[1]': [avg_sales_per_store_department['Dept'].iloc[avg_sales_per_store_department['Weekly_Sales'].argsort()]],
                'cells.values[2]': [avg_sales_per_store_department['Weekly_Sales'].sort_values()]}]),
    dict(label='Maior para Menor',
         method='restyle',
         args=[{'cells.values[0]': [avg_sales_per_store_department['Store'].iloc[avg_sales_per_store_department['Weekly_Sales'].argsort()[::-1]]],
                'cells.values[1]': [avg_sales_per_store_department['Dept'].iloc[avg_sales_per_store_department['Weekly_Sales'].argsort()[::-1]]],
                'cells.values[2]': [avg_sales_per_store_department['Weekly_Sales'].sort_values(ascending=False)]}])
]

# Adicionar botões de seleção ao layout
fig.update_layout(
    updatemenus=[
        dict(
            buttons=dropdown_buttons,
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0.87,
            xanchor='left',
            y=1.2,
            yanchor='top'
        ),
        
    ]
)


# Exibir o gráfico
fig.show()

In [1301]:
import plotly.graph_objects as go

# Supondo que você tenha um DataFrame chamado 'avg_sales_per_store_department'

# Lista de lojas únicas
lojas_unicas = avg_sales_per_store_department['Store'].unique()

# Criar a tabela
fig = go.Figure(data=[go.Table(
    header=dict(values=list(avg_sales_per_store_department.columns),  
                align='left'),
    cells=dict(values=[avg_sales_per_store_department['Store'], avg_sales_per_store_department['Dept'], avg_sales_per_store_department['Weekly_Sales']],
               align='left'))
])

# Criar botões de seleção para cada loja
buttons = []
for loja in lojas_unicas:
    button = dict(label=str(loja),
                  method='update',
                  args=[{'cells.values[0]': [avg_sales_per_store_department[avg_sales_per_store_department['Store'] == loja]['Store']],
                         'cells.values[1]': [avg_sales_per_store_department[avg_sales_per_store_department['Store'] == loja]['Dept']],
                         'cells.values[2]': [avg_sales_per_store_department[avg_sales_per_store_department['Store'] == loja]['Weekly_Sales']]}])
    buttons.append(button)

# Adicionar o botão "Selecione uma loja" e os botões de seleção para cada loja
buttons.insert(0, dict(label='Selecione uma loja',
                       method='update',
                       args=[{'cells.values[0]': [avg_sales_per_store_department['Store']],
                              'cells.values[1]': [avg_sales_per_store_department['Dept']],
                              'cells.values[2]': [avg_sales_per_store_department['Weekly_Sales']]}]))

# Adicionar botões de seleção ao layout
fig.update_layout(
    updatemenus=[dict(
            buttons=buttons,
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0.86,
            xanchor='left',
            y=1.2,
            yanchor='top'
        )]
)

# Exibir o gráfico
fig.show()


In [1302]:
import plotly.express as px
fig = px.scatter(avg_sales_per_store_department, y=avg_sales_per_store_department['Weekly_Sales'], x=avg_sales_per_store_department['Store'], color='Weekly_Sales',hover_name='Dept',
                 labels={'Weekly_Sales': 'Vendas Semanais', 'Dept': 'Departamento', 'Store': 'Loja'})

media_vendas = avg_sales_per_store_department['Weekly_Sales'].mean()
fig.add_hline(y=media_vendas, line_dash='dash', line_color='red', annotation_text=f'Média: {media_vendas:.2f}', annotation_position='bottom right')

# Exibir o gráfico
fig.show()

Feriado

In [1303]:
avg_sales_per_holiday = df.groupby(['IsHoliday'])['Weekly_Sales'].mean().reset_index()

In [1304]:
avg_sales_per_holiday

Unnamed: 0,IsHoliday,Weekly_Sales
0,False,15952.816352
1,True,17094.300918


In [1305]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

# Use the hovertext kw argument for hover text
fig = go.Figure(data=[go.Bar(x=avg_sales_per_holiday['IsHoliday'], y=avg_sales_per_holiday['Weekly_Sales'])])

fig.show()

In [1306]:
df_holiday = df.loc[df['IsHoliday']==True]
df_holiday['Date'].unique() 

array(['2010-02-12', '2010-09-10', '2010-11-26', '2010-12-31',
       '2011-02-11', '2011-09-09', '2011-11-25', '2011-12-30',
       '2012-02-10', '2012-09-07'], dtype=object)

In [1307]:
df_holiday = df.loc[df['IsHoliday']==True]
df_holiday['Date'].nunique() 

10

In [1308]:
df_not_holiday = df.loc[df['IsHoliday']==False]
df_not_holiday['Date'].nunique() 

133

There are 4 holiday values such as;

Super Bowl: 12-Feb-10, 11-Feb-11, 10-Feb-12, 8-Feb-13

Labor Day: 10-Sep-10, 9-Sep-11, 7-Sep-12, 6-Sep-13

Thanksgiving: 26-Nov-10, 25-Nov-11, 23-Nov-12, 29-Nov-13

Christmas: 31-Dec-10, 30-Dec-11, 28-Dec-12, 27-Dec-13

After the 07-Sep-2012 holidays are in test set for prediction. 

In [1309]:
super_bowl_dates = ["2010-02-12", "2011-02-11", "2012-02-10"]
df['SuperBowl'] = df['Date'].isin(super_bowl_dates)

In [1310]:
Labor_day_dates = ["2010-09-10", "2011-10-09", "2012-02-07"]
df['LaborDay'] = df['Date'].isin(Labor_day_dates)

In [1311]:
Thanksgiving_dates = ["2010-11-26", "2011-11-25", "2012-11-23"]
df['Thanksgiving'] = df['Date'].isin(Thanksgiving_dates)

In [1312]:
Christmas_dates = ["2010-12-31", "2011-12-30", "2012-12-28"]
df['Christmas'] = df['Date'].isin(Christmas_dates)

In [1313]:
df['SuperBowl'].value_counts()

SuperBowl
False    411339
True       8873
Name: count, dtype: int64

In [1314]:
df['LaborDay'].value_counts()

LaborDay
False    417287
True       2925
Name: count, dtype: int64

In [1315]:
df['Thanksgiving'].value_counts()

Thanksgiving
False    414266
True       5946
Name: count, dtype: int64

In [1316]:
df['Christmas'].value_counts()

Christmas
False    414303
True       5909
Name: count, dtype: int64

In [1317]:
avg_sales_per_SuperBowl = df.groupby(['SuperBowl'])['Weekly_Sales'].mean().reset_index()

In [1318]:
avg_sales_per_LaborDay = df.groupby(['LaborDay'])['Weekly_Sales'].mean().reset_index()

In [1319]:
avg_sales_per_Thanksgiving = df.groupby(['Thanksgiving'])['Weekly_Sales'].mean().reset_index()

In [1320]:
avg_sales_per_Christmas = df.groupby(['Christmas'])['Weekly_Sales'].mean().reset_index()

In [1321]:
avg_sales_per_SuperBowl

Unnamed: 0,SuperBowl,Weekly_Sales
0,False,16024.795449
1,True,16418.777595


In [1322]:
avg_sales_per_LaborDay

Unnamed: 0,LaborDay,Weekly_Sales
0,False,16036.13755
1,True,15601.852533


In [1323]:
avg_sales_per_Thanksgiving

Unnamed: 0,Thanksgiving,Weekly_Sales
0,False,15943.601687
1,True,22269.601768


In [1324]:
avg_sales_per_Christmas

Unnamed: 0,Christmas,Weekly_Sales
0,False,16053.053218
1,True,14635.139843


In [1325]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

# Use the hovertext kw argument for hover text
fig = go.Figure(data=[go.Bar(x=avg_sales_per_SuperBowl['SuperBowl'], y=avg_sales_per_SuperBowl['Weekly_Sales'])])

fig.show()

In [1326]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

# Use the hovertext kw argument for hover text
fig = go.Figure(data=[go.Bar(x=avg_sales_per_LaborDay['LaborDay'], y=avg_sales_per_LaborDay['Weekly_Sales'])])

fig.show()

In [1327]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

# Use the hovertext kw argument for hover text
fig = go.Figure(data=[go.Bar(x=avg_sales_per_Thanksgiving['Thanksgiving'], y=avg_sales_per_Thanksgiving['Weekly_Sales'])])

fig.show()

In [1328]:
import plotly.graph_objects as go

# Criar o gráfico de linha
fig = go.Figure()

# Use the hovertext kw argument for hover text
fig = go.Figure(data=[go.Bar(x=avg_sales_per_Christmas['Christmas'], y=avg_sales_per_Christmas['Weekly_Sales'])])

fig.show()

It is shown that for the graphs, Labor Day and Christmas do not increase weekly average sales. There is positive effect on sales in Super bowl, but the highest difference is in the Thanksgiving. I think, people generally prefer to buy Christmas gifts 1-2 weeks before Christmas, so it does not change sales in the Christmas week. And, there is Black Friday sales in the Thanksgiving week.

Type Effect on Holidays

In [1329]:
df.columns

Index(['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday', 'Temperature',
       'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4',
       'MarkDown5', 'CPI', 'Unemployment', 'Type', 'Size', 'SuperBowl',
       'LaborDay', 'Thanksgiving', 'Christmas'],
      dtype='object')

In [1330]:
df['Type'].value_counts()

Type
A    214961
B    162787
C     42464
Name: count, dtype: int64

In [1331]:
avg_sales_per_type_superbowl = df.groupby(['Type','SuperBowl'])['Weekly_Sales'].mean().reset_index()

In [1332]:
avg_sales_per_type_laborday = df.groupby(['Type','LaborDay'])['Weekly_Sales'].mean().reset_index()

In [1333]:
avg_sales_per_type_thanksgiving = df.groupby(['Type','Thanksgiving'])['Weekly_Sales'].mean().reset_index()

In [1334]:
avg_sales_per_type_christman = df.groupby(['Type','Christmas'])['Weekly_Sales'].mean().reset_index()

In [1335]:
import plotly.express as px
fig = px.scatter(avg_sales_per_type_superbowl, x=avg_sales_per_type_superbowl['Type'], y=avg_sales_per_type_superbowl['Weekly_Sales'], color=avg_sales_per_type_superbowl['SuperBowl'])
fig.show()

In [1336]:
import plotly.express as px
fig = px.scatter(avg_sales_per_type_laborday, x=avg_sales_per_type_laborday['Type'], y=avg_sales_per_type_laborday['Weekly_Sales'], color=avg_sales_per_type_laborday['LaborDay'])
fig.show()

In [1337]:
import plotly.express as px
fig = px.scatter(avg_sales_per_type_thanksgiving, x=avg_sales_per_type_thanksgiving['Type'], y=avg_sales_per_type_thanksgiving['Weekly_Sales'], color=avg_sales_per_type_thanksgiving['Thanksgiving'])
fig.show()

In [1338]:
avg_sales_per_type = df.groupby(['Type'])['Weekly_Sales'].mean().reset_index()

In [1339]:
avg_sales_per_type.value_counts()

Type  Weekly_Sales
A     20148.108162    1
B     12290.549297    1
C     9549.454168     1
Name: count, dtype: int64

In [1340]:
Total_sum = avg_sales_per_type['Weekly_Sales'].sum()

In [1341]:
# Calculate the percentage of each store type
store_type_percentage = (avg_sales_per_type['Weekly_Sales'] / Total_sum) * 100

In [1342]:
store_type_percentage

0    47.985269
1    29.271498
2    22.743233
Name: Weekly_Sales, dtype: float64

In [1343]:
labels = ['A','B','C']

In [1344]:
fig = px.pie(store_type_percentage, values = store_type_percentage, names = labels, title='Porcentagem de vendas de cada tipo de loja')
fig.show()

In [1345]:
df["Date"] = pd.to_datetime(df["Date"])  # convert to datetime
df['week'] = df['Date'].dt.isocalendar().week
df['month'] = df['Date'].dt.month 
df['year'] = df['Date'].dt.year

In [1346]:
avg_sales_per_week = df.groupby(['week','year'])['Weekly_Sales'].mean().reset_index()

In [1347]:
avg_sales_per_month = df.groupby(['month','year'])['Weekly_Sales'].mean().reset_index()

In [1348]:
avg_sales_per_year = df.groupby(['year'])['Weekly_Sales'].mean().reset_index()

In [1349]:
fig = px.line(avg_sales_per_week, x=avg_sales_per_week['week'], y=avg_sales_per_week['Weekly_Sales'], text=avg_sales_per_week['week'], color=avg_sales_per_week['year'])
fig.update_traces(textposition="bottom right")
fig.show()

In [1350]:
fig = px.line(avg_sales_per_month, x=avg_sales_per_month['month'], y=avg_sales_per_month['Weekly_Sales'], text=avg_sales_per_month['month'], color=avg_sales_per_month['year'])
fig.update_traces(textposition="bottom right")
fig.show()

Fuel Price, CPI , Unemployment , Temperature Effects

In [1351]:
df.columns

Index(['Store', 'Dept', 'Date', 'Weekly_Sales', 'IsHoliday', 'Temperature',
       'Fuel_Price', 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4',
       'MarkDown5', 'CPI', 'Unemployment', 'Type', 'Size', 'SuperBowl',
       'LaborDay', 'Thanksgiving', 'Christmas', 'week', 'month', 'year'],
      dtype='object')

In [1352]:
avg_sales_per_fuelprice = df.groupby(['Fuel_Price'])['Weekly_Sales'].mean().reset_index()

In [1353]:
fig = px.line(avg_sales_per_fuelprice, x=avg_sales_per_fuelprice['Fuel_Price'], y=avg_sales_per_fuelprice['Weekly_Sales'])
fig.update_traces(textposition="bottom right")
fig.show()

In [1354]:
avg_sales_per_cpi = df.groupby(['CPI'])['Weekly_Sales'].mean().reset_index()

In [1355]:
fig = px.line(avg_sales_per_cpi, x=avg_sales_per_cpi['CPI'], y=avg_sales_per_cpi['Weekly_Sales'])
fig.update_traces(textposition="bottom right")
fig.show()

In [1356]:
avg_sales_per_Unemployment = df.groupby(['Unemployment'])['Weekly_Sales'].mean().reset_index()

In [1357]:
fig = px.line(avg_sales_per_Unemployment, x=avg_sales_per_Unemployment['Unemployment'], y=avg_sales_per_Unemployment['Weekly_Sales'])
fig.update_traces(textposition="bottom right")
fig.show()

In [1358]:
avg_sales_per_temp = df.groupby(['Temperature'])['Weekly_Sales'].mean().reset_index()

In [1359]:
fig = px.line(avg_sales_per_temp, x=avg_sales_per_temp['Temperature'], y=avg_sales_per_temp['Weekly_Sales'])
fig.update_traces(textposition="bottom right")
fig.show()