### Process Fitness Data

##### Preprocess Data:
* Extract recorded activities
* Create uids
* Create max RMs

##### Pull Data

In [2]:
from pydrive.drive import GoogleDrive
from pydrive.auth import GoogleAuth
from pandleau import *

gauth = GoogleAuth()
drive = GoogleDrive(gauth)
drive?

You are using the Extract API 2.0, please save the output as .hyper format


[1;31mType:[0m           GoogleDrive
[1;31mString form:[0m    <pydrive.drive.GoogleDrive object at 0x000002A8B373CB08>
[1;31mFile:[0m           c:\python37\lib\site-packages\pydrive\drive.py
[1;31mDocstring:[0m      Main Google Drive class.
[1;31mInit docstring:[0m
Create an instance of GoogleDrive.

:param auth: authorized GoogleAuth instance.
:type auth: pydrive.auth.GoogleAuth.


In [3]:
mimetypes = {
    # Drive Document files as MS Word files.
    'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',

    # Drive Sheets files as MS Excel files.
    'application/vnd.google-apps.spreadsheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'

    # etc.
}

file_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList()

for file1 in file_list:
    
    if file1['title'] == "FY20 H1 Workout Tracker" :
        
        download_mimetype = None
        if file1['mimeType'] in mimetypes:
            download_mimetype = mimetypes[file1['mimeType']]

        
        file1.GetContentFile("tmp_"+file1['title']+".xlsx", mimetype=download_mimetype)

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=874659251868-8fdkk74gtuje4j65gvjosq2gcu447hga.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.


In [6]:
import pandas as pd
import datetime
import pprint

print("SYSTEM INIT: {0}".format(
    datetime.datetime.strftime(datetime.datetime.now(), '%D %r')
))

file = "tmp_FY20 H1 Workout Tracker.xlsx"

df = pd.read_excel(file, skiprows=3)
print("\n",df.shape)
df.head(3)

SYSTEM INIT: 11/21/19 10:50:34 PM

 (101, 13)


Unnamed: 0,Rotation,1RM,Workout,Exercise,Week,Sets,Reps,% 1RM (pace if running),Actual Lift,Date,Time,Bodyweight,Notes
0,3.0,65.17,1.0,Shoulder Press,1.0,3.0,5,"65, 75, 85",4x55,2019-10-24,14:00:00,88.7,
1,,,,Shoulder Press,2.0,3.0,3,"70, 80, 90",3x57.5,2019-10-31,19:30:00,90.0,
2,,,,Shoulder Press,3.0,3.0,5 / 3 / 1,"75, 85, 95",3x60,2019-11-09,16:15:00,90.1,


#### Preprocessing

In [8]:
# split out powerlifting, accessory, and running rows
recorded_exercise = [
    'Shoulder Press',
    'Bench Press',
    'Deadlifts',
    'Squat',
    'Run'
]
df['map'] = df['Exercise'].apply(
    lambda x: True if x in recorded_exercise and pd.notnull(x) else False
)
rec_df = df[df['map']==True].drop('map', axis=1)
print("Total exercises:",len(rec_df))


# drop unfilled nan rows
rec_df = rec_df.dropna(
    how='any',
    subset=['Date']
)
print("Total recorded exercises:",len(rec_df))
print(rec_df['Exercise'].value_counts())

Total exercises: 63
Total recorded exercises: 61
Run               14
Squat             12
Shoulder Press    12
Deadlifts         12
Bench Press       11
Name: Exercise, dtype: int64


In [9]:
# propogate Rotation, and Workout down, create new sub_Workout column -> have a 3 digit code to identify any workout eg 2_1.2 
rec_df[['Rotation', 'Workout']] = rec_df[['Rotation', 'Workout']].fillna(method='ffill')

def make_uid(row) : 
    return str(int(row['Rotation'])) + "_" + str(row['Workout']) + "." + str(int(row['Week']))
rec_df['uid'] = rec_df.apply(
    make_uid, axis=1
)

rec_df.set_index('uid', inplace=True)

# calculate theoritcal_1RM from Actual lift
def make_1rm(r) :
    
    if r['Exercise'] != 'Run':
        x = r['Actual Lift']
        r, w = x.split('x')[0], x.split('x')[1]
        return float(w)*(1 + (int(r)/30))
    
    return None # find a better proxy

rec_df['calculated_1RM'] = rec_df.apply(
    make_1rm, axis=1
)

# process runnning data
rec_df['Distance'], rec_df['Pace'], rec_df['% 1RM'] = None, None, None
def make_running(r) :
    
    if r['Exercise'] == 'Run' :
        pace_numeric = r['% 1RM (pace if running)'].hour + r['% 1RM (pace if running)'].minute * 0.01
        return float(r['Actual Lift'].split("km")[0]), r['% 1RM (pace if running)'], pace_numeric, None, None
    else :
        return None, None, None, r['Actual Lift'], r['% 1RM (pace if running)']
    
rec_df[['Distance', 'Pace', 'Pace Numeric', 'Actual Lift', '% 1RM']] = rec_df.apply(make_running, axis=1, result_type="expand")
rec_df = rec_df.drop('% 1RM (pace if running)', axis=1)

rec_df.head(2)

Unnamed: 0_level_0,Rotation,1RM,Workout,Exercise,Week,Sets,Reps,Actual Lift,Date,Time,Bodyweight,Notes,calculated_1RM,Distance,Pace,% 1RM,Pace Numeric
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
3_1.0.1,3.0,65.17,1.0,Shoulder Press,1.0,3.0,5,4x55,2019-10-24,14:00:00,88.7,,62.333333,,,"65, 75, 85",
3_1.0.2,3.0,,1.0,Shoulder Press,2.0,3.0,3,3x57.5,2019-10-31,19:30:00,90.0,,63.25,,,"70, 80, 90",


#### Write out

In [10]:
rec_df.to_excel("Workout_Tracker_Processed.xlsx", index=False)
print("SYSTEM COMPLETE: {0}".format(
    datetime.datetime.strftime(datetime.datetime.now(), '%D %r')
))

SYSTEM COMPLETE: 11/21/19 10:51:08 PM


In [13]:
# not a valid solution?
'''
def make_conversion(df) :
    
    print(df.shape)
    tmp_df = pandleau(df)

    # remove if file exists, write out new file
    out_file = os.path.join("Workout_Tracker_Processed.hyper")
    
    if os.path.isfile(out_file) :
        os.remove(out_file)

    tmp_df.to_tableau(out_file, add_index=False)

    print("PYTHON: {0} conversion complete")
    print("PYTHON: {0}".format(datetime.datetime.now().strftime("%d/%m/%Y %H:%M")))

def clean_logs() :

    # clean up log files
    log_path = os.path.join(os.getcwd(), "logs")

    log_files = [
        f for f in os.listdir("./")
        if os.path.isfile(os.path.join("./", f)) and ".log" in f
        or "hyper_db_" in f
    ]

    for file in log_files :
        os.rename(file, os.path.join(log_path, file))

    print("\nPYTHON: Directory cleaned")
    print("PYTHON: {0}".format(datetime.datetime.now().strftime("%d/%m/%Y %H:%M")))

make_conversion(rec_df)
clean_logs()
'''

'\ndef make_conversion(df) :\n    \n    print(df.shape)\n    tmp_df = pandleau(df)\n\n    # remove if file exists, write out new file\n    out_file = os.path.join("Workout_Tracker_Processed.hyper")\n    \n    if os.path.isfile(out_file) :\n        os.remove(out_file)\n\n    tmp_df.to_tableau(out_file, add_index=False)\n\n    print("PYTHON: {0} conversion complete")\n    print("PYTHON: {0}".format(datetime.datetime.now().strftime("%d/%m/%Y %H:%M")))\n\ndef clean_logs() :\n\n    # clean up log files\n    log_path = os.path.join(os.getcwd(), "logs")\n\n    log_files = [\n        f for f in os.listdir("./")\n        if os.path.isfile(os.path.join("./", f)) and ".log" in f\n        or "hyper_db_" in f\n    ]\n\n    for file in log_files :\n        os.rename(file, os.path.join(log_path, file))\n\n    print("\nPYTHON: Directory cleaned")\n    print("PYTHON: {0}".format(datetime.datetime.now().strftime("%d/%m/%Y %H:%M")))\n\nmake_conversion(rec_df)\nclean_logs()\n'