# Missing RPE Data

On some of the game dates, we don't have RPE data. But, we are mainly concerned with the acute/chronic ratio which we can compute for the dates for which it is missing.

In [25]:
import pandas as pd
import numpy as np
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo
from scipy.stats import pointbiserialr
import matplotlib.pyplot as plt

In [26]:
np.random.seed(5151)
rpe_df = pd.read_csv('./processed_data/processed_rpe.csv')
print(rpe_df.head())


   Unnamed: 0        Date  PlayerID  Training SessionType  Duration  RPE  \
0           0  2017-08-01        15         1    Strength      60.0  4.0   
1           1  2017-08-01         1         1       Speed      60.0  3.0   
2           2  2017-08-01         1         1    Strength      90.0  4.0   
3           3  2017-08-01         3         1       Speed      45.0  5.0   
4           4  2017-08-01         3         1    Strength      90.0  5.0   

   SessionLoad  DailyLoad  AcuteChronicRatio  ObjectiveRating  FocusRating  \
0        240.0      300.0               4.29              6.0          7.0   
1        180.0      540.0               4.29              0.0          0.0   
2        360.0      540.0               4.29              0.0          0.0   
3        225.0      675.0               4.29              7.0          7.0   
4        450.0      675.0               4.29              7.0          7.0   

   BestOutOfMyself  AcuteLoad  ChronicLoad  
0              3.0      42.86

## Drop Duplicate Dates

This occurs when there are multiple training sessions on the same date. We only care about the daily load so we can drop the duplicate sessions.

In [27]:
rpe_df = rpe_df.copy()[['Date', 'PlayerID', 'DailyLoad', 'AcuteChronicRatio', 'AcuteLoad', 'ChronicLoad']]
rpe_df.head()

Unnamed: 0,Date,PlayerID,DailyLoad,AcuteChronicRatio,AcuteLoad,ChronicLoad
0,2017-08-01,15,300.0,4.29,42.86,10.0
1,2017-08-01,1,540.0,4.29,77.14,18.0
2,2017-08-01,1,540.0,4.29,77.14,18.0
3,2017-08-01,3,675.0,4.29,96.43,22.5
4,2017-08-01,3,675.0,4.29,96.43,22.5


In [28]:
rpe_df = rpe_df.drop_duplicates()

## Fill In Missing RPE Data

In [29]:
rpe_df[rpe_df['AcuteChronicRatio'].isnull()]

Unnamed: 0,Date,PlayerID,DailyLoad,AcuteChronicRatio,AcuteLoad,ChronicLoad
265,2017-08-13,6,0.0,,0.0,0.0


This row should not be in the dataset since it is the first entry for this player and they are not training.

In [30]:
rpe_df = rpe_df[~ rpe_df['AcuteChronicRatio'].isnull()]

In [31]:
rpe_df['Date'] = pd.to_datetime(rpe_df['Date'])

Adding missing dates and player ids to rpe data:

In [32]:
dates = pd.date_range(start=min(rpe_df['Date']), end=max(rpe_df['Date']))
players = rpe_df['PlayerID'].unique()
idx = pd.MultiIndex.from_product((dates, players), names=['Date', 'PlayerID'])

rpe_df = rpe_df.set_index(['Date', 'PlayerID']).reindex(idx, fill_value=0).reset_index().sort_values(by=['Date', 'PlayerID'])



Use a rolling window to compute acute load and chronic load:

In [33]:
temp = rpe_df[['Date','PlayerID','DailyLoad']].drop_duplicates()
past7Days = temp.groupby('PlayerID').rolling('7d', on='Date')['DailyLoad'].sum().reset_index()
past7Days['newAcuteLoad'] = (past7Days['DailyLoad'] / 7.).round(2)
past7Days = past7Days.drop(columns = 'DailyLoad')

rpe_df = pd.merge(rpe_df, past7Days, how='left', on=['Date', 'PlayerID'])

In [34]:
past30Days = temp.groupby('PlayerID').rolling('30d', on='Date')['DailyLoad'].sum().reset_index()
past30Days['newChronicLoad'] = (past30Days['DailyLoad'] / 30.).round(2)
past30Days = past30Days.drop(columns = 'DailyLoad')

rpe_df = pd.merge(rpe_df, past30Days, how="left", on=["Date", "PlayerID"])

In [35]:
rpe_df.drop(['ChronicLoad','AcuteLoad'],axis = 1, inplace = True)
rpe_df.rename(columns = {'newChronicLoad':'ChronicLoad','newAcuteLoad':'AcuteLoad'}, inplace = True)
rpe_df['AcuteChronicRatio'] = (rpe_df['AcuteLoad'] / rpe_df['ChronicLoad']).round(2)
rpe_df[rpe_df['PlayerID'] == 1]

Unnamed: 0,Date,PlayerID,DailyLoad,AcuteChronicRatio,AcuteLoad,ChronicLoad
0,2017-08-01,1,540.0,4.29,77.14,18.00
17,2017-08-02,1,0.0,4.29,77.14,18.00
34,2017-08-03,1,0.0,4.29,77.14,18.00
51,2017-08-04,1,0.0,4.29,77.14,18.00
68,2017-08-05,1,0.0,4.29,77.14,18.00
85,2017-08-06,1,0.0,4.29,77.14,18.00
102,2017-08-07,1,720.0,4.29,180.00,42.00
119,2017-08-08,1,0.0,2.45,102.86,42.00
136,2017-08-09,1,0.0,2.45,102.86,42.00
153,2017-08-10,1,0.0,2.45,102.86,42.00


In [38]:
gps_df = pd.read_csv('./processed_data/processed_gps.csv')
gps_df = gps_df.drop(columns=['Unnamed: 0'])
gps_df.head()


Unnamed: 0,Date,GameID,Outcome,TeamPoints,PlayerID,MaxSpeedInGame,MaxAccelImpulseInGame
0,2017-11-30,1,W,19,2,7.284728,4.423615
1,2017-11-30,1,W,19,3,6.852436,4.888893
2,2017-11-30,1,W,19,4,7.267784,5.694449
3,2017-11-30,1,W,19,6,6.338894,4.777782
4,2017-11-30,1,W,19,7,7.065283,4.091273


In [39]:
# merged_df = gps_df.merge(rpe_df, how='left', on=['Date', 'PlayerID'])
# merged_df.head()
gps_df['Date'] = pd.to_datetime(gps_df['Date'])
train_performance = gps_df.merge(rpe_df[[
    'Date','PlayerID', 'AcuteLoad',
    'ChronicLoad','AcuteChronicRatio']],
    how='left', on= ['Date','PlayerID']).drop_duplicates()
print(len(train_performance))
train_performance.head()

443


Unnamed: 0,Date,GameID,Outcome,TeamPoints,PlayerID,MaxSpeedInGame,MaxAccelImpulseInGame,AcuteLoad,ChronicLoad,AcuteChronicRatio
0,2017-11-30,1,W,19,2,7.284728,4.423615,206.57,371.2,0.56
1,2017-11-30,1,W,19,3,6.852436,4.888893,407.86,454.67,0.9
2,2017-11-30,1,W,19,4,7.267784,5.694449,236.0,406.9,0.58
3,2017-11-30,1,W,19,6,6.338894,4.777782,297.29,415.87,0.71
4,2017-11-30,1,W,19,7,7.065283,4.091273,237.57,476.6,0.5


In [41]:
wellness_df = pd.read_csv('./processed_data/processed_wellness.csv')
wellness_df = wellness_df.drop(columns=['Unnamed: 0'])
wellness_df['Date'] = pd.to_datetime(wellness_df['Date'])
wellness_df.head()

Unnamed: 0,Date,PlayerID,MonitoringScore,Pain,Illness,Nutrition,wellness
0,2018-07-21,1,-1.450204,-0.364611,-0.301008,0.712604,-1.40322
1,2018-07-21,2,0.170622,2.742646,-0.301008,0.712604,3.324864
2,2018-07-21,3,0.170622,-0.364611,-0.301008,0.712604,0.217606
3,2018-07-21,4,-0.909929,-0.364611,-0.301008,0.712604,-0.862945
4,2018-07-21,5,0.440759,-0.364611,-0.301008,-1.217589,-1.442449


In [42]:
merged_df = train_performance.merge(wellness_df,
    how='left', on= ['Date','PlayerID']).drop_duplicates()
print(len(merged_df))
merged_df.head()

443


Unnamed: 0,Date,GameID,Outcome,TeamPoints,PlayerID,MaxSpeedInGame,MaxAccelImpulseInGame,AcuteLoad,ChronicLoad,AcuteChronicRatio,MonitoringScore,Pain,Illness,Nutrition,wellness
0,2017-11-30,1,W,19,2,7.284728,4.423615,206.57,371.2,0.56,0.981035,-0.364611,-0.301008,-1.217589,-0.902173
1,2017-11-30,1,W,19,3,6.852436,4.888893,407.86,454.67,0.9,1.251172,-0.364611,-0.301008,0.712604,1.298157
2,2017-11-30,1,W,19,4,7.267784,5.694449,236.0,406.9,0.58,0.710897,-0.364611,-0.301008,0.712604,0.757881
3,2017-11-30,1,W,19,6,6.338894,4.777782,297.29,415.87,0.71,0.440759,-0.364611,-0.301008,0.712604,0.487744
4,2017-11-30,1,W,19,7,7.065283,4.091273,237.57,476.6,0.5,1.251172,-0.364611,-0.301008,0.712604,1.298157


In [45]:
print(merged_df[merged_df['MonitoringScore'].isnull()])

          Date  GameID Outcome  TeamPoints  PlayerID  MaxSpeedInGame  \
47  2017-12-01       4       W          24        14        7.454450   
73  2017-12-01       6       L           5        14        1.360418   
75  2018-01-26       7       W          24         2        6.712505   
86  2018-01-26       8       W          24         2        6.517366   
97  2018-01-26       9       W          19         2        6.688894   
123 2018-01-27      11       L           0         5        6.930006   
135 2018-01-28      12       W          40         5        7.824213   
140 2018-01-28      12       W          40        12        4.815745   
238 2018-04-22      21       L          19         5        7.736117   
250 2018-04-22      22       W          33         5        7.466117   
385 2018-06-10      34       W          17         1        7.955562   
389 2018-06-10      34       W          17         5        8.062506   

     MaxAccelImpulseInGame  AcuteLoad  ChronicLoad  AcuteChroni

In [46]:
merged_df.to_csv('./processed_data/merged_df.csv')