# La tendance

La tendance est une méthode de calcul réalisée sur des séries temporelles.

Qu'est-ce qu'une série temporelle ?
<br>C'est tout simplement la collecte de données dans le temps à une même fréquence. Fréquence qui peut être plus ou moins régulière (tous les jours, 3 fois semaine etc.).

Et donc le calcul de la tendance sur une série temporelle permet de décrire l'évolution (à la hausse ou la baisse) de cette série sur le long terme. En d'autres mots, la tendance permet de lisser notre collecte de données brutes dans le temps ou communément appelé le signal brut.

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

## 1. Nos données

In [2]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,nomSeance,dateSeance,sportif,duree,monotonie,animationpeda,intensitemax,intensitemoy,energie,sensations,...,vigueur,sommeil,bienetresocial,bienetre,bienetrefamilial,recuperer,fatigue,viepersonnelle,vieprofessionnelle,viegroupe
0,Seance collective,2020-06-29,Sportif_10,70,1.1,1.0,5.3,4.3,1,1,...,1.0,1.0,1,1.0,1.0,1,5.0,1,1.0,1.0
1,Seance collective,2020-06-29,Sportif_12,70,3.5,3.3,1.0,2.7,1,1,...,2.6,2.9,1,1.8,1.7,1,2.6,1,1.0,2.0
2,Seance collective,2020-06-29,Sportif_13,70,2.7,1.3,1.0,4.6,1,1,...,3.8,7.6,1,1.0,1.0,1,4.1,1,1.0,1.0
3,Seance collective,2020-06-29,Sportif_14,70,3.2,1.0,1.0,2.1,1,1,...,2.0,6.3,1,1.0,1.2,1,8.6,1,1.0,1.2
4,Seance collective,2020-06-29,Sportif_15,70,4.3,1.1,4.8,4.5,1,1,...,1.0,1.0,1,1.0,1.0,1,3.8,1,1.0,1.0


In [3]:
data_s = data[data['sportif']=='Sportif_26'].drop_duplicates(subset='dateSeance').reset_index(drop=True).copy()

## 2. Nos données brutes

In [4]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig

### 2.1 Différence ?

In [5]:
data_s['fatigue_diff'] = data_s['fatigue'].diff()

In [6]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue_diff'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig

### 2.2 % différence ?

In [7]:
data_s['fatigue_pct'] = round(data_s['fatigue'].pct_change()*100, 0)

In [8]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue_pct'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig

## 3. Calculs de tendance

### 3.1 Cumulative ?

In [9]:
data_s['fatigue_cumul'] = data_s['fatigue'].expanding().quantile(.5)

In [10]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_cumul'],
                         mode='lines', line=dict(shape='spline', color='red', width=1.3), name='Tendance'))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

### 3.2 Glissante ?

#### 3.2.1 Arithmétique

In [11]:
data_s['fatigue_arth'] = data_s['fatigue'].rolling(28).quantile(.5)

In [12]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_arth'],
                         mode='lines', line=dict(shape='spline', color='red', width=1.3), name='Tendance'))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

#### 3.2.2 Exponentielle

In [13]:
data_s['fatigue_ewb'] = data_s['fatigue'].rolling(1).quantile(.5).ewm(span=28).mean()
data_s['fatigue_ewc'] = data_s['fatigue'].rolling(28).quantile(.5).ewm(span=28).mean()

In [14]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewb'],
                         mode='lines', line=dict(shape='spline', color='red', width=1.3), name='Tendance brute'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc'],
                         mode='lines', line=dict(shape='spline', color='green', width=1.3), name='Tendance cours'))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

## 4. Variations

### 4.1 Différence à la tendance ?

In [15]:
data_s['fatigue_diff_tdc'] = data_s['fatigue'] - data_s['fatigue_ewc']

In [16]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue_diff_tdc'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Différence à la tdc'))
fig.update_layout(hovermode='x')
fig

In [19]:
for z in [-0.5, -1, -2, -3, 0.5, 1, 2, 3, 4]:
    if z < 0:
        data_s[f'fatigue_zm{abs(z)}'] = z
    else:
        data_s[f'fatigue_zp{abs(z)}'] = z

In [20]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue_diff_tdc'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Différence à la tdc'))
for z, c in zip(['zm3', 'zm2', 'zm1', 'zm0.5', 'zp0.5', 'zp1', 'zp2', 'zp3'],
                ['lightblue', 'violet', 'darkblue', 'green', 'green', 'yellow', 'orange', 'red']):
    fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s[f'fatigue_{z}'],
                             mode='lines', line=dict(shape='spline', color=c, width=1.5), name=z))
fig.update_layout(hovermode='x')
fig

### 4.3 Bornes de la tendance ?

#### 4.3.1 Cumulatif ?

In [21]:
data_s['fatigue_ewc_min'] = data_s['fatigue_ewc'].expanding().min()
data_s['fatigue_ewc_max'] = data_s['fatigue_ewc'].expanding().max()

In [22]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc'],
                         mode='lines', line=dict(shape='spline', color='violet', width=3), name='Tendance cours'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc_min'],
                         mode='lines', line=dict(shape='spline', color='blue'), name='Min. Tendance'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc_max'],
                         mode='lines', line=dict(shape='spline', color='red'), name='Max. Tendance'))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

#### 4.3.2 Glissant ?

In [23]:
data_s['fatigue_ewc_rmin'] = data_s['fatigue_ewc'].rolling(90, min_periods=1).min()
data_s['fatigue_ewc_rmax'] = data_s['fatigue_ewc'].rolling(90, min_periods=1).max()

In [24]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc'],
                         mode='lines', line=dict(shape='spline', color='violet', width=3), name='Tendance cours'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc_rmin'],
                         mode='lines', line=dict(shape='spline', color='blue'), name='Min. Tendance'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewc_rmax'],
                         mode='lines', line=dict(shape='spline', color='red'), name='Max. Tendance'))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

### 4.4 Variations "exponentielles" ?

In [25]:
for q in [2, 4, 5, 6, 8]:
    data_s[f'fatigue_ewq{q}'] = data_s['fatigue'].rolling(28, min_periods=1).quantile(q/10).ewm(span=28).mean()

In [26]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewq5'],
                         mode='lines', line=dict(shape='spline', color='violet', width=3), name='Tendance cours'))
for q, c in zip(['q2', 'q4', 'q6', 'q8'], ['blue', 'green', 'orange', 'red']):
    fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s[f'fatigue_ew{q}'],
                             mode='lines', line=dict(shape='spline', color=c, width=.8), name=q))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

#### 4.4.1 Effets immédiats vs. cumulés différés

In [27]:
for q in [2, 4, 5, 6, 8]:
    data_s[f'fatigue_ei{q}'] = data_s['fatigue'].rolling(7, min_periods=1).quantile(q/10).ewm(span=7).mean()

In [28]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ei5'],
                         mode='lines', line=dict(shape='spline', color='violet', width=3), name='Effets immédiats'))
for e, c in zip(['ei2', 'ei4', 'ei6', 'ei8'], ['blue', 'green', 'orange', 'red']):
    fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s[f'fatigue_{e}'],
                             mode='lines', line=dict(shape='spline', color=c, width=.8), name=e))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

In [29]:
fig = go.Figure()
fig.add_trace(go.Bar(x=data_s['dateSeance'], y=data_s['fatigue'], opacity=.6,
                     marker=dict(color=['red' if f==3.9 else 'grey' for f in data_s['fatigue']],
                                 line_color='blue', line_width=.3), name='Fatigue'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ewq5'],
                         mode='lines', line=dict(shape='spline', color='green', width=3), name='Tendance cours'))
fig.add_trace(go.Scatter(x=data_s['dateSeance'], y=data_s['fatigue_ei5'],
                         mode='lines', line=dict(shape='spline', color='red', width=3), name='Effets immédiats'))
fig.update_layout(yaxis_range=[0, 10], hovermode='x')
fig

## 5. Approche collective

### 5.1 Boxplots + IQR

In [30]:
list_sp = data['sportif'].value_counts().head(20).index.tolist()
data_sp = data[data['sportif'].isin(list_sp)].drop_duplicates(subset=['sportif', 'dateSeance']).reset_index(drop=True).copy()
data_sp = data_sp.groupby('sportif').head(20)

In [31]:
data_sp['fatigue_iqr'] = data_sp.groupby('dateSeance')['fatigue'].transform(lambda x: x.quantile(.75) - x.quantile(.25))

In [32]:
fig = go.Figure()
fig.add_trace(go.Box(x=data_sp['dateSeance'], y=data_sp['fatigue'], name='Saisies Fatigue'))
fig.add_trace(go.Scatter(x=data_sp['dateSeance'], y=data_sp['fatigue_iqr'], mode='lines',
                         line=dict(shape='spline'), name='IQR'))
fig.update_layout(yaxis_range=[-1, 11], hovermode='x')
fig

### 5.2 Approche statistique

#### 5.2.1 Fréquentiste

In [33]:
import scipy.stats as stats

In [34]:
data_stat = data_sp.set_index('sportif').copy()

In [35]:
wilco_df = pd.DataFrame(index=[0])

start_date = data_stat['dateSeance'].unique()[0]

for d in data_stat['dateSeance'].unique()[1:]:
    date_0 = data_stat.loc[data_stat['dateSeance']==start_date, 'fatigue'].copy()
    date_1 = data_stat.loc[data_stat['dateSeance']==d, 'fatigue'].copy()

    wilco_test = pd.DataFrame({'Date': date_0, 'Date+1': date_1}).dropna(axis='index')

    if (wilco_test.empty) or (wilco_test.shape[0]==1):
        next
    else:
        stat, p = stats.wilcoxon(wilco_test['Date+1'], wilco_test['Date'])
        statg, pg = stats.wilcoxon(wilco_test['Date+1'], wilco_test['Date'], alternative = 'greater')
        statl, pl = stats.wilcoxon(wilco_test['Date+1'], wilco_test['Date'], alternative = 'less')
        
        if p > 0.05:
            sign = '='
        elif p < 0.05 and pg < 0.05:
            sign = '>'
        elif p < 0.05 and pl < 0.05:
            sign = '<'
        wilco_df[d] = sign
        start_date = d


Sample size too small for normal approximation.


Sample size too small for normal approximation.



In [37]:
wilco_df

Unnamed: 0,2020-06-30,2020-07-01,2020-07-02,2020-07-03,2020-07-04,2020-07-06,2020-07-07,2020-07-08,2020-07-09,2020-07-13,...,2020-07-24,2020-07-25,2020-07-27,2020-07-28,2020-07-29,2020-07-30,2020-07-31,2020-08-01,2020-08-04,2020-08-05
0,=,>,<,=,=,<,=,>,<,=,...,=,=,=,=,=,=,=,=,=,=


#### 5.2.2 Bayésienne

In [38]:
start_date = '2020-07-01'
date = '2020-07-02'

date_0 = data_stat.loc[data_stat['dateSeance']==start_date, 'fatigue'].copy()
date_1 = data_stat.loc[data_stat['dateSeance']==date, 'fatigue'].copy()

bayes_test = pd.DataFrame({'Date': date_0, 'Date+1': date_1}).dropna(axis='index')

In [39]:
bayes_test.to_csv('bayes_test.csv', index=False)