# Time Series Analysis Project : Forecast Post-Sale Customer Service Calls

**`Study of model type`**

## **`Data Collection`**

In [1]:
#import the needed libraries
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go

In [2]:
#import the dataset
file="./Service Clients Darty 2017-2021.xlsx"
df = pd.read_excel(file)
df.drop(columns=['Appels EGP'],inplace=True)
df.set_index('Date',inplace=True)
df.head(),df.tail()

(            Appels
 Date              
 2017-01-01     0.0
 2017-01-02  5774.0
 2017-01-03  5744.0
 2017-01-04  5833.0
 2017-01-05  5588.0,
             Appels
 Date              
 2021-12-27     NaN
 2021-12-28     NaN
 2021-12-29     NaN
 2021-12-30     NaN
 2021-12-31     NaN)

In [3]:
df.describe() 

Unnamed: 0,Appels
count,1734.0
mean,5461.132065
std,2635.572188
min,0.0
25%,4211.75
50%,5804.5
75%,6899.75
max,16049.0


**Split the data (first 3 years)**


In [4]:
train_df = df[df.index<'2020-01-01']
test_df = df[df.index>='2020-01-01']
train_df.tail(),test_df.head()

(            Appels
 Date              
 2019-12-27  5968.0
 2019-12-28  3952.0
 2019-12-29   868.0
 2019-12-30  5520.0
 2019-12-31  4051.0,
             Appels
 Date              
 2020-01-01     0.0
 2020-01-02  5979.0
 2020-01-03  5690.0
 2020-01-04  3834.0
 2020-01-05   687.0)

## **`Useful Functions`**

In [5]:
def resample_df(df, freq):
    """
    Resample a dataframe to a given frequency
    args: 
    - df: dataframe to resample
    - freq: string that gives the frequency ie. 'Y','W','M'
    
    returns: resampled dataframe
    """
    return df.resample(freq).sum()

In [6]:
def selectPeriod(df,period):
    """
    Select a periodicity of a dataframe
    args: 
    - df: dataframe studied
    
    returns: periodicity studied
    """
    if period == 'Y':
        return df.index.year
    elif period == 'M':
        return df.index.month
    elif period == 'W':
        return df.index.isocalendar().week


In [7]:
def selectSeason(df,season):
    """
    Select a season of a dataframe
    args: 
    - df: dataframe studied
    
    returns: season studied
    """
    if season == 'M':
        return df.index.month
    elif season == 'W':
        return df.index.isocalendar().week
    elif season == 'WD':
        return df.index.weekday
    elif season == 'D':
        return df.index.day_of_year

In [8]:
def getBuysBallot(df,period='Y',season='M'):
    """
    Transform a df into a buys ballot table
    args: 
    - df: dataframe to resample
    
    returns: formatted df
    """

    
    df_buysBallot = pd.DataFrame()
    selected_period = selectPeriod(df,period)
    periods = set(selected_period)
    for period in periods:
        period_condition = selected_period == period
        selected_season = selectSeason(df[period_condition],season)
        df_buysBallot[str(period)] = df[period_condition].groupby(selected_season).sum()
    df_buysBallot=df_buysBallot.T
    
    return df_buysBallot

In [27]:
def getBuysBallot2(df,period_chosen='Y',season='M'):
    """
    Transform a df into a buys ballot table
    args: 
    - df: dataframe to resample
    
    returns: formatted df
    """

    
    df_buysBallot = pd.DataFrame()
    selected_period = selectPeriod(df,period_chosen)
    periods = set(selected_period)
    for period in periods:
        period_condition = selected_period == period
        selected_season = selectSeason(df[period_condition],season)
        if period_chosen == "M":
            df_buysBallot[str(period)] = df[period_condition].groupby(selected_season).sum()
        
        else:
            df_buysBallot[str(period)] = df[period_condition].groupby(selected_season).sum()
    df_buysBallot=df_buysBallot.T
    
    return df_buysBallot

In [68]:
df_week = train_df.resample('W').sum()
df_week.head()

Unnamed: 0_level_0,Appels
Date,Unnamed: 1_level_1
2017-01-01,0.0
2017-01-08,33095.0
2017-01-15,37485.0
2017-01-22,40932.0
2017-01-29,38141.0


In [70]:
df_study = train_df.copy()
df_study = df_study.resample('W').sum()
df_study['Month'] = df_study.index.to_period('M')
df_study

Unnamed: 0_level_0,Appels,Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-01,0.0,2017-01
2017-01-08,33095.0,2017-01
2017-01-15,37485.0,2017-01
2017-01-22,40932.0,2017-01
2017-01-29,38141.0,2017-01
...,...,...
2019-12-08,56602.0,2019-12
2019-12-15,55792.0,2019-12
2019-12-22,57201.0,2019-12
2019-12-29,30637.0,2019-12


In [80]:
df_study_2017 = df_study[df_study.index.year==2017]
df_study_2017.reset_index(inplace=True)
df_study_2017.drop(columns=['Date'],inplace=True)
df_study_2017.set_index('Month',inplace=True)
df_study_2017



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Appels
Month,Unnamed: 1_level_1
2017-01,0.0
2017-01,33095.0
2017-01,37485.0
2017-01,40932.0
2017-01,38141.0
2017-02,37787.0
2017-02,37408.0
2017-02,34647.0
2017-02,33088.0
2017-03,32603.0


In [57]:
test_refacto = train_df.groupby([train_df.index.month,train_df.index.week]).sum()
test_refacto.index.names = ['month','week']
test_refacto
#test_refacto.unstack(-1)
#test_refacto.reset_index(level="week",inplace=True)
#test_refacto


weekofyear and week have been deprecated, please use DatetimeIndex.isocalendar().week instead, which returns a Series. To exactly reproduce the behavior of week and weekofyear and return an Index, you may call pd.Int64Index(idx.isocalendar().week)



Unnamed: 0_level_0,Unnamed: 1_level_0,Appels
month,week,Unnamed: 2_level_1
1,1,88355.0
1,2,115553.0
1,3,124166.0
1,4,118160.0
1,5,60037.0
...,...,...
12,48,25532.0
12,49,155350.0
12,50,156601.0
12,51,165190.0


In [9]:
def compute_mean(df,period='Y',season='M'):
    """
    Compute the mean of a dataframe
    args: 
    - df: dataframe to resample
    - period: string that gives the period ie. 'Y','W','M'
    - season: string that gives the season ie. 'Y','W','M'
    
    returns: mean of the dataframe
    """
    df = getBuysBallot(df,period,season)
    return df.mean(axis=0)                 
    


In [10]:
def compute_std(df,period='Y',season='M'):
    """
    Compute the standard deviation of a dataframe
    args: 
    - df: dataframe to resample
    - period: string that gives the period ie. 'Y','W','M'
    - season: string that gives the season ie. 'Y','W','M'
    
    returns: standard deviation of the dataframe
    """
    df = getBuysBallot(df,period,season)
    return df.std(axis=0)

In [11]:
def plot_decomposition_type(mean_df,std_df):
    """
    Relationship between the seasonal means and the seasonal standard deviations
    args: 
    - mean_df: dataframe with means values
    - std_df: dataframe with std values
    
    returns: plot of the seasonal means and the pooled standard deviations
    """

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=mean_df.index, y=mean_df,name="Seasonal mean",line = dict(color='black', width=2)))
    fig.add_trace(go.Scatter(x=mean_df.index, y=std_df,name="Seasonal standard deviation",line = dict(color='black', width=1, dash='dash')))
    fig.update_layout(title='Relationship between the seasonal means and the seasonal pooled standard deviations',
                    xaxis_title='Period',
                    yaxis_title='NUmber of calls')

    fig.show()

## **`Results`**

### **`Period: Year & Season: Month`**

In [12]:
getBuysBallot(train_df,"Y","M")

Date,1,2,3,4,5,6,7,8,9,10,11,12
2017,163082.0,140966.0,141573.0,114999.0,130337.0,152602.0,193753.0,179083.0,178460.0,165064.0,188319.0,221464.0
2018,173209.0,160203.0,160186.0,142896.0,127959.0,138021.0,166649.0,151997.0,144416.0,150877.0,194987.0,171215.0
2019,169980.0,134671.0,117593.0,115688.0,126345.0,139291.0,186758.0,149692.0,142779.0,144144.0,144316.0,211975.0


In [13]:
mean_df = compute_mean(train_df,"Y","M")
std_df = compute_std(train_df,"Y","M")

plot_decomposition_type(mean_df,std_df)
std_df.corr(mean_df)

0.49294984342916687

### **`Period: Year & Season: Week`**

In [14]:
getBuysBallot(train_df,"Y","W")

week,1,2,3,4,5,6,7,8,9,10,...,43,44,45,46,47,48,49,50,51,52
2017,33095.0,37485.0,40932.0,38141.0,37787.0,37408.0,34647.0,33088.0,32603.0,32187.0,...,36703.0,37815.0,39095.0,38704.0,47032.0,56641.0,54597.0,56352.0,60650.0,32595.0
2018,34963.0,39047.0,41375.0,40782.0,38256.0,41803.0,44037.0,37663.0,36122.0,36343.0,...,30416.0,27475.0,32864.0,33209.0,59552.0,64270.0,44151.0,44457.0,47339.0,25995.0
2019,33051.0,39021.0,41859.0,39237.0,37823.0,36634.0,36313.0,30862.0,27539.0,28603.0,...,30626.0,26612.0,31887.0,30344.0,31540.0,46840.0,56602.0,55792.0,57201.0,30637.0


In [15]:
mean_df = compute_mean(train_df,"Y","W")
std_df = compute_std(train_df,"Y","W")

plot_decomposition_type(mean_df,std_df)
std_df.corr(mean_df)

0.49601796476619214

### **`Period: Year & Season: Day`**

In [16]:
getBuysBallot(train_df,"Y","D")

Date,1,2,3,4,5,6,7,8,9,10,...,356,357,358,359,360,361,362,363,364,365
2017,0.0,5774.0,5744.0,5833.0,5588.0,5236.0,4135.0,785.0,5789.0,5581.0,...,9710.0,6901.0,1699.0,0.0,6958.0,7357.0,6779.0,6291.0,4383.0,827.0
2018,0.0,6967.0,6468.0,6446.0,6422.0,4604.0,873.0,6426.0,5919.0,6888.0,...,4960.0,1816.0,4566.0,0.0,5729.0,5806.0,5455.0,3647.0,792.0,3183.0
2019,0.0,6240.0,6132.0,6430.0,4036.0,642.0,6104.0,5753.0,6550.0,6785.0,...,2011.0,7796.0,5705.0,0.0,6348.0,5968.0,3952.0,868.0,5520.0,4051.0


In [17]:
mean_df = compute_mean(train_df,"Y","D")
std_df = compute_std(train_df,"Y","D")

plot_decomposition_type(mean_df,std_df)
std_df.corr(mean_df)

-0.34295429848293985

### **`Period: Month & Season: Week Day`**

In [18]:
getBuysBallot(train_df,"M","WD")

Date,0,1,2,3,4,5,6
1,88718.0,90551.0,94610.0,83337.0,78185.0,57839.0,13031.0
2,79294.0,74653.0,76305.0,71410.0,72462.0,51496.0,10220.0
3,70210.0,65381.0,72676.0,72714.0,77413.0,51104.0,9854.0
4,65254.0,70335.0,64264.0,60581.0,59347.0,44071.0,9731.0
5,60768.0,69202.0,74613.0,65208.0,65220.0,41866.0,7764.0
6,66754.0,70978.0,73617.0,76212.0,80535.0,52398.0,9420.0
7,113244.0,102701.0,95646.0,87650.0,78174.0,57179.0,12566.0
8,77905.0,79188.0,88597.0,92780.0,84397.0,49445.0,8460.0
9,87162.0,76402.0,76299.0,74841.0,80382.0,58622.0,11947.0
10,87885.0,87889.0,82784.0,74914.0,69381.0,46610.0,10622.0


In [19]:
mean_df = compute_mean(train_df,"M","WD")
std_df = compute_std(train_df,"M","WD")

plot_decomposition_type(mean_df,std_df)
std_df.corr(mean_df)

0.8888694911737144

### **`Period: Month & Season: Day`**

In [20]:
getBuysBallot(train_df,"W","WD")

Date,0,1,2,3,4,5,6
1,14477.0,16762.0,18541.0,18166.0,18088.0,12775.0,2300.0
2,18319.0,17253.0,19934.0,19928.0,20433.0,15623.0,4063.0
3,22084.0,21171.0,22009.0,20505.0,20182.0,14903.0,3312.0
4,21356.0,20190.0,20772.0,18466.0,19482.0,14538.0,3356.0
5,21185.0,19226.0,19658.0,18438.0,18680.0,13828.0,2851.0
6,20470.0,19601.0,20230.0,19309.0,19349.0,14234.0,2652.0
7,21838.0,20480.0,20483.0,18653.0,18587.0,12461.0,2495.0
8,19399.0,18124.0,18316.0,16733.0,15846.0,10973.0,2222.0
9,17587.0,16448.0,16752.0,15803.0,15941.0,11373.0,2360.0
10,17746.0,16681.0,17394.0,15713.0,16107.0,11175.0,2317.0


In [21]:
mean_df = compute_mean(train_df,"M","WD")
std_df = compute_std(train_df,"M","WD")

plot_decomposition_type(mean_df,std_df)
std_df.corr(mean_df)

0.8888694911737144

### **`Correlation`**

In [22]:
corr_dic = {}
corr_df = pd.DataFrame()

for period in ['Y','M','W']:
    for season in ['M','W','WD','D']:
        mean_df = compute_mean(train_df,period,season)
        std_df = compute_std(train_df,period,season)
        corr_dic[f"P:{period}&S:{season}"] = std_df.corr(mean_df)

for key in corr_dic.keys():
    corr_df[key] = [corr_dic[key]]
corr_df.rename(index={0: 'correlation'},inplace=True)
corr_df

Unnamed: 0,P:Y&S:M,P:Y&S:W,P:Y&S:WD,P:Y&S:D,P:M&S:M,P:M&S:W,P:M&S:WD,P:M&S:D,P:W&S:M,P:W&S:W,P:W&S:WD,P:W&S:D
correlation,0.49295,0.496018,0.73968,-0.342954,,-0.939555,0.888869,,1.0,,0.971735,0.498822
