In [5]:
#import the needed libraries
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go

# Cleaning

In [3]:
#import the dataset
file="Service Clients Darty 2017-2021.xlsx"
df = pd.read_excel(file)

#delete the useless columns for this study
df=df.drop(columns=["Unnamed: 4",'Unnamed: 3',"Appels EGP"])

#from the 1/10/21 the column "appels" is empty, drop these values
df.dropna(inplace=True)

#check the type of the columns "appels"
df.dtypes

#split the data
train_df,test_df=df[:1095],df[1095:]
train_df,test_df=train_df.copy(),test_df.copy()

#set the date as index
train_df.set_index('Date',inplace=True)
test_df.set_index('Date',inplace=True)

# Definition of the functions

In [4]:
def resample_df(df, freq):
    """
    Resample a dataframe to a given frequency
    args: 
    - df: dataframe to resample
    - freq: string that gives the frequency ie. 'Y','W','M'
    
    returns: resampled dataframe
    """
    return df.resample(freq).sum()

In [10]:
def compute_mean_std(df,period,season):

    """
    Compute the seasonal means and the seasonal standard deviations for a sample based on a periodicity and a seasonality
    args:
    - df : dataframe representing a time serie
    - season : string that gives the seasonality ie. 'W','M','D'
    - period : string that gives the periodicity ie. 'Y','W','M'

    returns: plot the seasonal means and the seasonal standard deviations 

    """

    #resample the df
    if season!='D':
        sample_df=resample_df(df, season)
    else :
        sample_df=df


    #group the df by periods
    if period=='Y':
        size=3
        big_group=sample_df.groupby(sample_df.index.year)

    elif period=='M':
        size=12*3
        big_group=sample_df.groupby(sample_df.index.month)
    
    elif period=='W':
        size=52*3
        big_group=sample_df.groupby(sample_df.index.isocalendar().week)
    else:
        return "cannot compute"
  

    #aggregate in a dicitonnary the periods
    params = {}
    for name, group in big_group:
        params[f"{name}"] = group.reset_index().Appels
    
    #compute the season means
    params_df=pd.DataFrame.from_dict(params)
    params_df.dropna(axis=1,thresh=2,inplace=True)
    params_df.dropna(axis=0,inplace=True)
    season_total_df=pd.DataFrame.from_dict(params_df).sum(axis=1)
    
    #compute Season Standard deviation
    season_mean=season_total_df/size
    
    #compute the pooled Standard deviation
    pooled_std_df=[]
    n=len(season_mean)
    for i in range(n):
        s=0
        for key in params:
            a=params[key][i]-season_mean[i]
            s+=a**2
        s=np.sqrt((1/(size-1))*s)
        pooled_std_df.append(s)


    return(season_mean,pooled_std_df)

In [None]:

def plot_decomposition_type(mean_df,std_df):
    """
    Relationship between the seasonal means and the seasonal standard deviations
    args: 
    - mean_df: dataframe with means values
    - std_df: dataframe with std values
    
    returns: plot of the seasonal means and the pooled standard deviations
    """

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=mean_df.index, y=mean_df,name="Seasonal mean",line = dict(color='black', width=2)))
    fig.add_trace(go.Scatter(x=mean_df.index, y=std_df,name="Seasonal standard deviation",line = dict(color='black', width=1, dash='dash')))
    fig.update_layout(title='Relationship between the seasonal means and the seasonal pooled standard deviations',
                    xaxis_title='Period',
                    yaxis_title='NUmber of calls')

    fig.show()