In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import datetime 
import matplotlib.pyplot as plt
import seaborn as sns
import os as os
import plotly.express as px
import time



plt.style.use("fivethirtyeight")
#https://www.python-engineer.com/posts/apple-health-data-python/#:~:text=different%20workout%20types-,Get%20the%20data,project%20and%20put%20the%20Export.


# create element tree object
location = '/Users/andrewspruce/Library/Mobile Documents/com~apple~CloudDocs/apple_health_export'
tree = ET.parse(f"{location}/export.xml") 
# for every health record, extract the attributes
root = tree.getroot()
record_list = [x.attrib for x in root.iter('Record')]


In [2]:
record_data = pd.DataFrame(record_list)

# proper type to dates
for col in ['creationDate', 'startDate', 'endDate']:
    record_data[col] = pd.to_datetime(record_data[col])

# value is numeric, NaN if fails
record_data['value'] = pd.to_numeric(record_data['value'], errors='coerce')

# some records do not measure anything, just count occurences
# filling with 1.0 (= one time) makes it easier to aggregate
record_data['value'] = record_data['value'].fillna(1.0)

# shorter observation names
record_data['type'] = record_data['type'].str.replace('HKQuantityTypeIdentifier', '')
record_data['type'] = record_data['type'].str.replace('HKCategoryTypeIdentifier', '')
#record_data['type'].unique()


In [3]:
workout_list = [x.attrib for x in root.iter('Workout')]




In [4]:
# create DataFrame
workout_data = pd.DataFrame(workout_list)
workout_data['workoutActivityType'] = workout_data['workoutActivityType'].str.replace('HKWorkoutActivityType', '')
workout_data = workout_data.rename({"workoutActivityType": "Type"}, axis=1)

# proper type to dates
for col in ['creationDate', 'startDate', 'endDate']:
    workout_data[col] = pd.to_datetime(workout_data[col])

#workout_data[(workout_data['Type'] == 'Running')]

In [5]:
activity_list = [x.attrib for x in root.iter('ActivitySummary')]

activity_data = pd.DataFrame(activity_list)

# convert string to numeric   

activity_data['activeEnergyBurned'] = pd.to_numeric(activity_data['activeEnergyBurned'])
activity_data['appleExerciseTime'] = pd.to_numeric(activity_data['appleExerciseTime'])
activity_data.tail()





Unnamed: 0,dateComponents,activeEnergyBurned,activeEnergyBurnedGoal,activeEnergyBurnedUnit,appleMoveTime,appleMoveTimeGoal,appleExerciseTime,appleExerciseTimeGoal,appleStandHours,appleStandHoursGoal
1451,2023-08-09,920.631,600,kcal,0,0,87,35,14,11
1452,2023-08-10,1185.18,600,kcal,0,0,132,35,14,11
1453,2023-08-11,1266.59,1100,kcal,0,0,153,60,14,10
1454,2023-08-12,1290.19,1100,kcal,0,0,236,60,11,10
1455,2023-08-13,547.906,1100,kcal,0,0,90,60,6,10


In [6]:


activity_data = activity_data[['dateComponents','activeEnergyBurned','appleExerciseTime']]


# data  where the index is the date
fig = px.line(activity_data, x='dateComponents', y=['activeEnergyBurned','appleExerciseTime'])

# Show plot 
fig.update_layout(xaxis_rangeslider_visible=True)
fig


activity_data = activity_data.rename(columns={'dateComponents':'Date','activeEnergyBurned':'Calories','appleExerciseTime':'Duration'})
activity_data = activity_data.assign(Source='Apple')

In [7]:
fig = px.scatter(activity_data, 
                x='Date', 
                y='Calories', 
                title='Calories Burned and Exercise Time over the years',
                size='Duration')
fig.update_layout(xaxis_rangeslider_visible=True)
fig

In [37]:
stravaData ='/users/andrewspruce/Downloads/export_11406294/activities.csv'
strava_df = pd.read_csv(stravaData)

for col in ['Activity Date']:
    strava_df[col] = pd.to_datetime(strava_df[col])

strava_df['Moving Time'].max()

    

14294.0

In [19]:
fig = px.scatter(strava_df, 
                x='Activity Date', 
                y='Calories',
                color='Activity Type',
                size='Moving Time',
                height=500)

fig.update_layout(xaxis_rangeslider_visible=True)

fig

In [12]:
strava_df = strava_df[['Activity Date','Calories','Moving Time']]
strava_df = strava_df.rename(columns={'Activity Date':'Date','Moving Time':'Duration'})
strava_df = strava_df.assign(Source='Strava')
strava_df['Duration'] = strava_df['Duration'] / 60


merged_df = pd.concat([activity_data,strava_df],axis=0)
merged_df

Unnamed: 0,Date,Calories,Duration,Source
0,2016-04-24,0.000000,0.000000,Apple
1,2016-04-25,0.008652,0.000000,Apple
2,2016-04-26,8.096350,0.000000,Apple
3,2016-04-27,346.106000,23.000000,Apple
4,2016-04-28,855.409000,74.000000,Apple
...,...,...,...,...
367,2021-10-24 08:29:11,468.000000,49.266667,Strava
368,2021-10-26 06:55:19,158.000000,27.600000,Strava
369,2021-10-25 17:47:29,416.000000,59.650000,Strava
370,2021-10-26 16:34:06,419.000000,70.833333,Strava


In [None]:
merged_df['Date'] = pd.to_datetime(merged_df['Date'],format='mixed')
merged_df


Unnamed: 0,Date,Calories,Duration,Source
0,2016-04-24 00:00:00,0.000000,0.000000,Apple
1,2016-04-25 00:00:00,0.008652,0.000000,Apple
2,2016-04-26 00:00:00,8.096350,0.000000,Apple
3,2016-04-27 00:00:00,346.106000,23.000000,Apple
4,2016-04-28 00:00:00,855.409000,74.000000,Apple
...,...,...,...,...
367,2021-10-24 08:29:11,468.000000,49.266667,Strava
368,2021-10-26 06:55:19,158.000000,27.600000,Strava
369,2021-10-25 17:47:29,416.000000,59.650000,Strava
370,2021-10-26 16:34:06,419.000000,70.833333,Strava


In [None]:
fig = px.scatter(merged_df, 
                x='Date', 
                y='Calories',
                size='Duration',
                color='Source',
                height=800)
fig.update_layout(xaxis_rangeslider_visible=True)
fig

In [None]:
import glob
import os
import json

data = {}

# Calories
calories_files= '/Users/andrewspruce/Downloads/MyFitbitData/AndrewSpruce/Physical Activity/calories*.json'
files = glob.glob(calories_files)

for f in files:
    d = {}
    #d['files'] = f
    dt= os.path.basename(f)\
                        .replace('.json','')\
                        .replace('calories-','')
    total=0.0
    with open(f) as listFile:
        entries=json.loads(listFile.read())
        for e in entries:
            dt, junk = e['dateTime'].split(' ')
            if dt not in data:
                data[dt] = {}
            if 'Calories' not in data[dt]:
                data[dt]['Calories'] = 0
            data[dt]['Calories'] += float(e['value'])


# Duration
activity_files= '/Users/andrewspruce/Downloads/MyFitbitData/AndrewSpruce/Physical Activity/time_in*.json'
files = glob.glob(activity_files)

for f in files:
    d = {}
    #d['files'] = f
    dt = os.path.basename(f)\
                        .replace('.json','')\
                        .replace('time_in_heart_rate_zones-','')
    total=0.0
    with open(f) as listFile:
        entries=json.loads(listFile.read())
        for e in entries:
            dt, junk = e['dateTime'].split(' ')
            val2 = float(e['value']['valuesInZones']['IN_DEFAULT_ZONE_2'])
            val3 = float(e['value']['valuesInZones']['IN_DEFAULT_ZONE_3'])
            total += (val2+val3)
            if dt not in data:
                data[dt] = {}
            if 'Duration' not in data[dt]:
                data[dt]['Duration'] = 0
            data[dt]['Duration']+= total

fb_df = pd.DataFrame(data)
fb_df = fb_df.T
fb_df['Date'] = fb_df.index
fb_df = fb_df.assign(Source='Fitbit')


fb_df.sort_values(by='Calories',ascending=False)


Unnamed: 0,Calories,Duration,Date,Source
04/02/17,5023.05,,04/02/17,Fitbit
03/26/17,4811.76,,03/26/17,Fitbit
09/29/18,4460.81,69.0,09/29/18,Fitbit
08/07/20,4302.35,126.0,08/07/20,Fitbit
03/17/19,4291.27,19.0,03/17/19,Fitbit
...,...,...,...,...
09/27/14,1828.80,,09/27/14,Fitbit
04/14/17,1716.48,,04/14/17,Fitbit
08/17/23,1667.90,,08/17/23,Fitbit
04/16/17,897.28,,04/16/17,Fitbit


In [None]:
merged_df = pd.concat([merged_df,fb_df],axis=0)

In [None]:
merged_df = merged_df.fillna(0)
merged_df

Unnamed: 0,Date,Calories,Duration,Source
0,2016-04-24 00:00:00,0.000000,0.0,Apple
1,2016-04-25 00:00:00,0.008652,0.0,Apple
2,2016-04-26 00:00:00,8.096350,0.0,Apple
3,2016-04-27 00:00:00,346.106000,23.0,Apple
4,2016-04-28 00:00:00,855.409000,74.0,Apple
...,...,...,...,...
05/23/18,05/23/18,3450.840000,19.0,Fitbit
05/24/18,05/24/18,3457.470000,4.0,Fitbit
05/25/18,05/25/18,3216.840000,4.0,Fitbit
05/26/18,05/26/18,3578.470000,6.0,Fitbit


In [None]:
fig = px.scatter(merged_df, 
                x='Date', 
                y='Calories',
                size='Duration',
                color='Source',
                height=800)
fig.update_layout(xaxis_rangeslider_visible=True)
fig