# Convert Apple Health Sleep Data to JSON

In [13]:
import pandas as pd
import json

# read file
df = pd.read_csv('../../data/Apple_Watch_Sleep.csv', sep=';')

df.head()

Unnamed: 0,type,sourcename,sourceversion,creationdate,startdate,enddate,value,HKTimeZone
0,HKCategoryTypeIdentifierSleepAnalysis,Zepp Life,202108031059,2021-08-26 20:15:20 -0400,2021-08-26 12:29:00 -0400,2021-08-26 12:51:59 -0400,HKCategoryValueSleepAnalysisAsleepUnspecified,
1,HKCategoryTypeIdentifierSleepAnalysis,Zepp Life,202108031059,2021-08-26 20:15:20 -0400,2021-08-26 12:52:00 -0400,2021-08-26 13:07:59 -0400,HKCategoryValueSleepAnalysisAsleepUnspecified,
2,HKCategoryTypeIdentifierSleepAnalysis,Zepp Life,202108031059,2021-08-26 20:15:20 -0400,2021-08-26 13:08:00 -0400,2021-08-26 15:28:59 -0400,HKCategoryValueSleepAnalysisAsleepUnspecified,
3,HKCategoryTypeIdentifierSleepAnalysis,Zepp Life,202108031059,2021-08-26 20:15:20 -0400,2021-08-26 15:29:00 -0400,2021-08-26 15:51:59 -0400,HKCategoryValueSleepAnalysisAsleepUnspecified,
4,HKCategoryTypeIdentifierSleepAnalysis,Zepp Life,202108031059,2021-08-26 20:15:20 -0400,2021-08-26 15:52:00 -0400,2021-08-26 16:21:59 -0400,HKCategoryValueSleepAnalysisAsleepUnspecified,


In [14]:
# Choose time range
df = df[(df['startdate'] >= '2023-10-18') & (df['startdate'] <= '2024-4-12')]
df = df.reset_index(drop=True)

# Sort by startdate
df = df.sort_values(by='startdate')
df = df.reset_index(drop=True) # VERY IMPORTANT IF YOU ARE GOING TO USE THE INDEX

df['startdate'] = pd.to_datetime(df['startdate'])
df['enddate'] = pd.to_datetime(df['enddate'])

# Group records
df['startdate'] = pd.to_datetime(df['startdate'])
df['hour'] = df['startdate'].dt.hour
df['day'] = df['startdate'].dt.day
df['starttime'] = df['startdate'].dt.minute + df['startdate'].dt.hour * 60
df['endtime'] = df['enddate'].dt.minute + df['enddate'].dt.hour * 60
df['creationdate'] = df['creationdate'].astype(str)
df['startdate'] = df['startdate'].astype(str)
df['enddate'] = df['enddate'].astype(str)

# Drop sourcename='GarField iPhone 13'
df = df[df['sourcename'] != 'GarField iPhone 13']
df = df.reset_index(drop=True)

# Drop value == HKCategoryValueSleepAnalysisInBed
df = df[df['value'] != 'HKCategoryValueSleepAnalysisInBed']
df = df.reset_index(drop=True)



df.head()

Unnamed: 0,type,sourcename,sourceversion,creationdate,startdate,enddate,value,HKTimeZone,hour,day,starttime,endtime
0,HKCategoryTypeIdentifierSleepAnalysis,GarField’s Apple Watch,10.0.1,2023-10-18 07:57:08 -0400,2023-10-18 00:55:54-04:00,2023-10-18 01:14:24-04:00,HKCategoryValueSleepAnalysisAsleepCore,America/New_York,0,18,55,74
1,HKCategoryTypeIdentifierSleepAnalysis,GarField’s Apple Watch,10.0.1,2023-10-18 07:57:08 -0400,2023-10-18 01:14:24-04:00,2023-10-18 01:51:24-04:00,HKCategoryValueSleepAnalysisAsleepDeep,America/New_York,1,18,74,111
2,HKCategoryTypeIdentifierSleepAnalysis,GarField’s Apple Watch,10.0.1,2023-10-18 07:57:08 -0400,2023-10-18 01:51:24-04:00,2023-10-18 01:52:24-04:00,HKCategoryValueSleepAnalysisAwake,America/New_York,1,18,111,112
3,HKCategoryTypeIdentifierSleepAnalysis,GarField’s Apple Watch,10.0.1,2023-10-18 07:57:08 -0400,2023-10-18 01:52:24-04:00,2023-10-18 01:58:24-04:00,HKCategoryValueSleepAnalysisAsleepCore,America/New_York,1,18,112,118
4,HKCategoryTypeIdentifierSleepAnalysis,GarField’s Apple Watch,10.0.1,2023-10-18 07:57:08 -0400,2023-10-18 01:58:24-04:00,2023-10-18 02:04:54-04:00,HKCategoryValueSleepAnalysisAsleepREM,America/New_York,1,18,118,124


In [8]:
# Save to json
sleepCollection = []
oneDay = []
for i in range(len(df)):
    if i>0 and df['day'][i-1] != df['day'][i] and df['hour'][i-1] < 12 : # Wake up before 12am and Sleep after 0am
        sleepCollection.append(oneDay)
        oneDay = []
    elif i>0 and df['hour'][i-1] < 12 and df['hour'][i] >= 22: # Wake up before 12am and sleep after 10pm
        sleepCollection.append(oneDay) # previous day
        oneDay = [] # new day
    oneRecord = df.iloc[i].to_dict()
    oneDay.append(oneRecord)


with open('../../data/Apple_Watch_Sleep.json', 'w') as f:
    json.dump(sleepCollection, f)

In [15]:
# generate mock data for every days' sleep
import random

startDay = pd.to_datetime(df['startdate'][0]).date()
endDay = pd.to_datetime(df['startdate'][len(df)-1]).date() - pd.Timedelta(days=1)

print("Start day: ", startDay)
print("End day: ", endDay)

mockData = []
for i in range((endDay - startDay).days):
    oneDay = {}
    oneDay['date'] = (startDay + pd.Timedelta(days=i)).strftime('%Y-%m-%d')
    oneDay['phone'] = random.randint(0, 1)
    oneDay['alcohol'] = random.randint(0, 1)
    oneDay['stress'] = random.randint(0, 10)
    mockData.append(oneDay)

with open('../../data/mockData.json', 'w') as f:
    json.dump(mockData, f)

Start day:  2023-10-18
End day:  2024-04-12
