# Convert Apple Health Sleep Data to JSON

In [77]:
import pandas as pd
import json

# read file
df = pd.read_csv('../../data/AW_sleep_data.csv')

df.head()

Unnamed: 0,Start Time,End Time,Category,Timestamp,Heart Rate,Source Name
0,2022-09-13 01:47:49,2022-09-13 01:59:19,Light/Core,2022-09-13 01:46:47,64.0,Apple Watch SE 2020
1,2022-09-13 01:59:19,2022-09-13 02:04:49,Deep,2022-09-13 01:59:47,62.0,Apple Watch SE 2020
2,2022-09-13 02:04:49,2022-09-13 02:12:49,Light/Core,2022-09-13 02:05:44,64.0,Apple Watch SE 2020
3,2022-09-13 02:12:49,2022-09-13 02:27:19,Deep,2022-09-13 02:13:49,65.0,Apple Watch SE 2020
4,2022-09-13 02:27:19,2022-09-13 02:35:49,Light/Core,2022-09-13 02:24:49,64.0,Apple Watch SE 2020


In [78]:
# Change the name of 'startdate' to 'startdate'
df = df.rename(columns={'Start Time': 'startdate', 'End Time': 'enddate', 'Category': 'value'})
valueMap = {'REM':'HKCategoryValueSleepAnalysisAsleepREM' ,'Light/Core': 'HKCategoryValueSleepAnalysisAsleepCore', 
            'Deep': 'HKCategoryValueSleepAnalysisAsleepDeep', 'Awake': 'HKCategoryValueSleepAnalysisAwake'}

# Convert category to value
# Drop value=='Unspecified'
df = df[df['value'] != 'Unspecified']
df['value'] = df['value'].map(valueMap)

# Add time zone by adding '-0400' to the end of the date
df['startdate'] = df['startdate'] + '-0400'
df['enddate'] = df['enddate'] + '-0400'

# Add one year for startdate and enddate
df['startdate'] = pd.to_datetime(df['startdate'])
df['enddate'] = pd.to_datetime(df['enddate'])
df['startdate'] = df['startdate'] + pd.DateOffset(years=1)
df['enddate'] = df['enddate'] + pd.DateOffset(years=1)

# Choose time range
df = df[(df['startdate'] >= '2023-10-18') & (df['startdate'] <= '2024-4-13')]
df = df.reset_index(drop=True)

# Sort by startdate
df = df.sort_values(by='startdate')
df = df.reset_index(drop=True) # VERY IMPORTANT IF YOU ARE GOING TO USE THE INDEX


# Group records
df['startdate'] = pd.to_datetime(df['startdate'])
df['hour'] = df['startdate'].dt.hour
df['day'] = df['startdate'].dt.day
df['starttime'] = df['startdate'].dt.minute + df['startdate'].dt.hour * 60
df['endtime'] = df['enddate'].dt.minute + df['enddate'].dt.hour * 60
df['startdate'] = df['startdate'].astype(str)
df['enddate'] = df['enddate'].astype(str)

df.head(-5)

Unnamed: 0,startdate,enddate,value,Timestamp,Heart Rate,Source Name,hour,day,starttime,endtime
0,2023-10-18 01:01:01-04:00,2023-10-18 01:51:31-04:00,HKCategoryValueSleepAnalysisAsleepCore,2022-10-18 01:00:17,73.0,Apple Watch SE 2020,1,18,61,111
1,2023-10-18 01:51:31-04:00,2023-10-18 02:00:31-04:00,HKCategoryValueSleepAnalysisAsleepDeep,2022-10-18 01:52:09,65.0,Apple Watch SE 2020,1,18,111,120
2,2023-10-18 02:00:31-04:00,2023-10-18 02:01:31-04:00,HKCategoryValueSleepAnalysisAsleepCore,2022-10-18 01:59:42,68.0,Apple Watch SE 2020,2,18,120,121
3,2023-10-18 02:01:31-04:00,2023-10-18 02:02:01-04:00,HKCategoryValueSleepAnalysisAwake,2022-10-18 01:59:42,68.0,Apple Watch SE 2020,2,18,121,122
4,2023-10-18 02:02:01-04:00,2023-10-18 02:07:31-04:00,HKCategoryValueSleepAnalysisAsleepCore,2022-10-18 02:03:22,65.0,Apple Watch SE 2020,2,18,122,127
...,...,...,...,...,...,...,...,...,...,...
6113,2024-04-12 05:07:32-04:00,2024-04-12 05:21:02-04:00,HKCategoryValueSleepAnalysisAsleepCore,2023-04-12 05:06:54,61.0,Apple Watch SE 2020,5,12,307,321
6114,2024-04-12 05:21:02-04:00,2024-04-12 05:52:32-04:00,HKCategoryValueSleepAnalysisAsleepDeep,2023-04-12 05:21:10,61.0,Apple Watch SE 2020,5,12,321,352
6115,2024-04-12 05:52:32-04:00,2024-04-12 06:06:02-04:00,HKCategoryValueSleepAnalysisAsleepCore,2023-04-12 05:52:16,61.0,Apple Watch SE 2020,5,12,352,366
6116,2024-04-12 06:06:02-04:00,2024-04-12 06:20:02-04:00,HKCategoryValueSleepAnalysisAsleepREM,2023-04-12 06:07:05,60.0,Apple Watch SE 2020,6,12,366,380


In [79]:
# Save to json
sleepCollection = []
oneDay = []
for i in range(len(df)):
    if i>0 and df['day'][i-1] != df['day'][i] and df['hour'][i-1] < 12 : # Wake up before 12am and Sleep after 0am
        sleepCollection.append(oneDay)
        oneDay = []
    elif i>0 and df['hour'][i-1] < 15 and df['hour'][i] >= 21: # Wake up before 15am and sleep after 9pm
        sleepCollection.append(oneDay) # previous day
        oneDay = [] # new day
    oneRecord = df.iloc[i].to_dict()
    oneDay.append(oneRecord)
    if i == len(df)-1:
        sleepCollection.append(oneDay)

with open('../../data/Apple_Watch_Sleep_2.json', 'w') as f:
    json.dump(sleepCollection, f)

# Calculate last 14 days average sleep duration

In [69]:
with open('../../data/Apple_Watch_Sleep_2.json', 'r') as f:
    sleepData = json.load(f)

sleepData = sleepData[-14:]

# Calculate average sleep time in the last 14 days
totalSleepTime = 0
durationList = []
for oneDay in sleepData:
    todaySleepTime = 0
    for record in oneDay:
        # ignore awake time
        if record['value'] == 'HKCategoryValueSleepAnalysisAwake':
            continue
        endTime = pd.to_datetime(record['enddate'])
        startTime = pd.to_datetime(record['startdate'])
        sleepTime = (endTime - startTime).seconds / 60
        totalSleepTime += sleepTime
        todaySleepTime += sleepTime
    durationList.append(todaySleepTime)
    
averageSleepTime = totalSleepTime / 14
avgHours = int(averageSleepTime / 60)
avgMinutes = int(averageSleepTime % 60)
print("Average sleep time in the last 14 days: ", avgHours, " hours ", avgMinutes, " minutes")

minSleepTime = 1000
maxSleepTime = 0
for duration in durationList:
    if duration < minSleepTime:
        minSleepTime = duration
    if duration > maxSleepTime:
        maxSleepTime = duration

minHours = int(minSleepTime / 60)
minMinutes = int(minSleepTime % 60)
maxHours = int(maxSleepTime / 60)
maxMinutes = int(maxSleepTime % 60)

print("Min sleep time in the last 14 days: ", minHours, " hours ", minMinutes, " minutes")
print("Max sleep time in the last 14 days: ", maxHours, " hours ", maxMinutes, " minutes")

Average sleep time in the last 14 days:  6  hours  51  minutes
Min sleep time in the last 14 days:  5  hours  39  minutes
Max sleep time in the last 14 days:  7  hours  35  minutes
