In [1]:
#import the needed libraries
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split

**Data Collection**

In [9]:
#import the dataset
file="../Service Clients Darty 2017-2021.xlsx"
df = pd.read_excel(file)
df.drop(columns=['Appels EGP'],inplace=True)
df.set_index('Date',inplace=True)
df.head(),df.tail()

(            Appels
 Date              
 2017-01-01     0.0
 2017-01-02  5774.0
 2017-01-03  5744.0
 2017-01-04  5833.0
 2017-01-05  5588.0,
             Appels
 Date              
 2021-12-27     NaN
 2021-12-28     NaN
 2021-12-29     NaN
 2021-12-30     NaN
 2021-12-31     NaN)

In [10]:
df.describe() 

Unnamed: 0,Appels
count,1734.0
mean,5461.132065
std,2635.572188
min,0.0
25%,4211.75
50%,5804.5
75%,6899.75
max,16049.0


**Split the data (first 3 years)**


In [12]:
train_df = df[df.index<'2020-01-01']
test_df = df[df.index>='2020-01-01']
train_df.tail(),test_df.head()

(            Appels
 Date              
 2019-12-27  5968.0
 2019-12-28  3952.0
 2019-12-29   868.0
 2019-12-30  5520.0
 2019-12-31  4051.0,
             Appels
 Date              
 2020-01-01     0.0
 2020-01-02  5979.0
 2020-01-03  5690.0
 2020-01-04  3834.0
 2020-01-05   687.0)

In [13]:
def resample_df(df, freq):
    """
    Resample a dataframe to a given frequency
    args: 
    - df: dataframe to resample
    - freq: string that gives the frequency ie. 'Y','W','M'
    
    returns: resampled dataframe
    """
    return df.resample(freq).sum()

In [69]:
def selectPeriod(df,period):
    """
    Select a periodicity of a dataframe
    args: 
    - df: dataframe studied
    
    returns: periodicity studied
    """
    if period == 'Y':
        return df.index.year
    elif period == 'M':
        return df.index.month
    elif period == 'W':
        return df.index.week


In [88]:
def selectSeason(df,season):
    """
    Select a season of a dataframe
    args: 
    - df: dataframe studied
    
    returns: season studied
    """
    if season == 'M':
        return df.index.month
    elif season == 'W':
        return df.index.week
    elif season == 'D':
        return df.index.weekday

In [91]:
def getBuysBallot(df,period='W',season='D'):
    """
    Transform a df into a buys ballot table
    args: 
    - df: dataframe to resample
    
    returns: formatted df
    """

    
    df_buysBallot = pd.DataFrame()
    selected_period = selectPeriod(df,period)
    periods = set(selected_period)
    for period in periods:
        period_condition = selected_period == period
        selected_season = selectSeason(df[period_condition],season)
        df_buysBallot[str(period)] = df[period_condition].groupby(selected_season).sum()
    df_buysBallot=df_buysBallot.T
    
    return df_buysBallot

In [78]:
def compute_mean(df,period='Y',season='M'):
    """
    Compute the mean of a dataframe
    args: 
    - df: dataframe to resample
    - period: string that gives the period ie. 'Y','W','M'
    - season: string that gives the season ie. 'Y','W','M'
    
    returns: mean of the dataframe
    """
    df = getBuysBallot(df,period,season)
    print(df)
    return df.mean(axis=1)                 
    


In [79]:
def compute_std(df,period='Y',season='M'):
    """
    Compute the standard deviation of a dataframe
    args: 
    - df: dataframe to resample
    - period: string that gives the period ie. 'Y','W','M'
    - season: string that gives the season ie. 'Y','W','M'
    
    returns: standard deviation of the dataframe
    """
    df = getBuysBallot(df,period,season)
    return df.std(axis=1)

In [93]:
refacto_df = getBuysBallot(train_df)
refacto_df

  return df.index.week


Date,0,1,2,3,4,5,6
1,14477.0,16762.0,18541.0,18166.0,18088.0,12775.0,2300.0
2,18319.0,17253.0,19934.0,19928.0,20433.0,15623.0,4063.0
3,22084.0,21171.0,22009.0,20505.0,20182.0,14903.0,3312.0
4,21356.0,20190.0,20772.0,18466.0,19482.0,14538.0,3356.0
5,21185.0,19226.0,19658.0,18438.0,18680.0,13828.0,2851.0
6,20470.0,19601.0,20230.0,19309.0,19349.0,14234.0,2652.0
7,21838.0,20480.0,20483.0,18653.0,18587.0,12461.0,2495.0
8,19399.0,18124.0,18316.0,16733.0,15846.0,10973.0,2222.0
9,17587.0,16448.0,16752.0,15803.0,15941.0,11373.0,2360.0
10,17746.0,16681.0,17394.0,15713.0,16107.0,11175.0,2317.0


In [None]:
def plot_decomposition_type(mean_df,std_df):
    """
    Relationship between the seasonal means and the seasonal standard deviations
    args: 
    - mean_df: dataframe with means values
    - std_df: dataframe with std values
    
    returns: plot of the seasonal means and the pooled standard deviations
    """

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=mean_df.index, y=mean_df,name="Seasonal mean",line = dict(color='black', width=2)))
    fig.add_trace(go.Scatter(x=mean_df.index, y=std_df,name="Seasonal standard deviation",line = dict(color='black', width=1, dash='dash')))
    fig.update_layout(title='Relationship between the seasonal means and the seasonal pooled standard deviations',
                    xaxis_title='Period',
                    yaxis_title='NUmber of calls')

    fig.show()