In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

%matplotlib inline

In [2]:
rpe_df = pd.read_csv("../data/rpe.csv")
games_df = pd.read_csv("../data/games.csv")
gps_df = pd.read_csv("../data/gps.csv")
well_df = pd.read_csv("../data/wellness.csv")

In [3]:
'''
    Insights: 
                1. The data for DailyLoad, AcuteLoad, ChronicLoad and AcuteChronicRatio are missing for players when the session
                    type is Combat
    
    Cleaning:
                1. Have dropped N/A values in place
                2. Changed Best out of Myself ordinal to Numerical values (1,0.5,0)
    
    # groupby player id, Normalize from (0,1) over axis 0 for each variable
    # Can also use session type, and training as groupby variables
    
    Convert Best outof myself from ordinal to numerical
    fill_na with a negative value. Can also do a missing value plot to see under which values are the data missing 
    (i.e. MCAR/ MAR etc.). # R
    
    Date, Player id with well 
    
'''

def BOM_num(x):
    if(x=='Absolutely'):
        return(1)
    elif(x=='Somewhat'):
        return(0.5)
    else:
        return(0)

rpe_df2 = rpe_df

# Dropping the na values in place
rpe_df.dropna(inplace=True)

# Normalizing data to 0,1 scale grouped by diffrent variables
rpe_gb = rpe_df.drop(['Date','BestOutOfMyself'],axis=1).groupby(['PlayerID','Training','SessionType']).transform(lambda x: (x- x.min())/(x.max() - x.min()))

rpe_gb['Date'] = rpe_df['Date']
rpe_gb['PlayerID'] = rpe_df['PlayerID']
rpe_gb['Training'] = rpe_df['Training']
rpe_gb['SessionType'] = rpe_df['SessionType']

# Converting BestOutofMyself ordinal to numerical
rpe_gb['BestOutOfMyself'] = rpe_df['BestOutOfMyself'].apply(lambda x: BOM_num(x))

# Resetting the index, changed due to dropping the NA values
rpe_gb = rpe_gb.reset_index()
rpe_gb.drop('index',axis=1,inplace=True)
rpe_gb.head()

Unnamed: 0,Duration,RPE,SessionLoad,DailyLoad,AcuteLoad,ChronicLoad,AcuteChronicRatio,ObjectiveRating,FocusRating,Date,PlayerID,Training,SessionType,BestOutOfMyself
0,0.862069,0.625,0.424,0.446786,0.219261,0.225017,0.272727,0.5,1.0,2018-07-20,11,Yes,Game,1.0
1,1.0,0.555556,0.551122,0.20122,0.19283,0.12948,0.22619,0.666667,0.5,2018-07-19,11,Yes,Skills,1.0
2,0.666667,0.285714,0.347826,0.142857,0.102597,0.274728,0.098361,0.5,1.0,2018-07-19,16,Yes,Skills,1.0
3,1.0,0.555556,0.551122,0.20122,0.237091,0.32636,0.220238,0.666667,0.5,2018-07-17,11,Yes,Skills,1.0
4,0.0,0.5,0.5,0.076923,0.0,0.67313,0.0,0.5,1.0,2018-07-17,15,Yes,Skills,1.0


In [21]:
rpe_df_new = pd.read_csv("../data/rpe.csv")

'''
    Acute Load is the avg daily session load over the past 7 days
    Chronic Load is the avg daily session load over the past 30 days
    
    Can create pseudo loads?
'''

rpe_df_new[rpe_df_new['AcuteLoad'].isna()]

Unnamed: 0,Date,PlayerID,Training,SessionType,Duration,RPE,SessionLoad,DailyLoad,AcuteLoad,ChronicLoad,AcuteChronicRatio,ObjectiveRating,FocusRating,BestOutOfMyself
1,2018-07-21,13,Yes,Game,29.0,7.0,203.0,,,,,,,Not at all
2,2018-07-21,13,Yes,Game,27.0,9.0,243.0,,,,,,,Not at all
4,2018-07-20,11,Yes,Game,36.0,8.0,288.0,,,,,9.0,10.0,Absolutely
5,2018-07-20,11,Yes,Mobility/Recovery,15.0,2.0,30.0,,,,,9.0,10.0,Absolutely
7,2018-07-20,13,Yes,Game,24.0,6.0,144.0,,,,,,,Not at all
8,2018-07-20,13,Yes,Mobility/Recovery,10.0,0.0,0.0,,,,,,,Not at all
12,2018-07-19,6,Yes,Strength,30.0,4.0,120.0,,,,,8.0,8.0,
13,2018-07-19,6,Yes,Combat,20.0,4.0,80.0,,,,,8.0,8.0,
18,2018-07-19,13,Yes,Mobility/Recovery,30.0,0.0,0.0,,,,,,,Not at all
30,2018-07-17,1,Yes,Mobility/Recovery,30.0,0.0,15.0,,,,,,,


In [26]:
rpe_df_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8860 entries, 0 to 8859
Data columns (total 14 columns):
Date                 8860 non-null object
PlayerID             8860 non-null int64
Training             8860 non-null object
SessionType          7621 non-null object
Duration             7621 non-null float64
RPE                  7621 non-null float64
SessionLoad          7621 non-null float64
DailyLoad            3149 non-null float64
AcuteLoad            4349 non-null float64
ChronicLoad          4383 non-null float64
AcuteChronicRatio    4349 non-null float64
ObjectiveRating      4724 non-null float64
FocusRating          4751 non-null float64
BestOutOfMyself      3019 non-null object
dtypes: float64(9), int64(1), object(4)
memory usage: 969.1+ KB


In [81]:
rpe_df_new['Date'] = pd.to_datetime(rpe_df_new.Date)


#timedelta(days=N)

time_7 = rpe_df_new.Date - timedelta(days=7)
time_30 = rpe_df_new.Date - timedelta(days=30)

rpe_df_new['Date_7'] = time_7
rpe_df_new['Date_30'] = time_30

rpe_df_new.head()

'''
rpe_df_new[(rpe_df_new["PlayerID"]==1) & 
           (rpe_df_new["SessionType"]=="Game") &
           (rpe_df_new["Training"]==True) &
           (rpe_df_new['Date']>rpe_df_new['Date_7']) &
           (rpe_df_new['Date']>rpe_df_new['Date_7'])
          ]['DailyLoad'].mean()
'''

def pseudo7(x):
    PlayerID = x.PlayerID
    SessionType = x.SessionType
    Training = x.Training
    Date = x.Date
    Date_7 = x.Date_7
    
    rpe_df_new[(rpe_df_new["PlayerID"]==PlayerID) & 
           (rpe_df_new["SessionType"]==SessionType) &
           (rpe_df_new["Training"]==True) &
           (rpe_df_new['Date']>rpe_df_new['Date_7']) &
           (rpe_df_new['Date']>rpe_df_new['Date_7'])
          ]['DailyLoad'].mean()
    
    

nan

In [None]:
print(games_df.shape)
games_df.head()

In [None]:
print(gps_df.shape)
gps_df.head()

In [None]:
print(well_df.shape)
well_df.head()

In [None]:
well_df