In [1]:
import pandas as pd
import datetime
import os
import pytz
import json
print(pd.__version__)
utc = pytz.utc
AZtz = pytz.timezone("US/Arizona")

dataFileColumns = ["timestamp", "stage", "endTimestamp"]
outputFileName = "fitbitSleepStages.parquet.gzip"

#get list of files in current directroy
files = [f for f in os.listdir("./") if os.path.isfile(f)]

#if the output file isn't already made, make it
if not (outputFileName in files):
    print("making new df")
    emptydf = pd.DataFrame(columns = dataFileColumns)
    emptydf.set_index('timestamp')
    print(emptydf)
        
    #save as a parquet file
    emptydf.to_parquet(outputFileName, compression='gzip') 


df = pd.read_parquet(outputFileName)

print(df.head(100))


1.2.4
making new df
Empty DataFrame
Columns: [timestamp, stage, endTimestamp]
Index: []
Empty DataFrame
Columns: [timestamp, stage, endTimestamp]
Index: []


In [2]:
def fitbitSleepStageToStandard(stageName):
    if stageName == "restless":
        return 0
    if stageName == "asleep":
        return 1
    if stageName == "wake":
        return 0
    if stageName == "light":
        return 1
    if stageName == "deep":
        return 2
    if stageName == "rem":
        return 3


In [20]:
# there's a list of sleep objects for each sleep session
# we want start time and assign the phoenix timezone
#then there's a data list of sleep stage json objects
def addJSONdataTodf(jd, returnDF):
    for sleepSession in jd:
        #print(sleepSession['levels']['data'])
        #print(sleepSession)
        
        for sleepStage in sleepSession['levels']['data']:
            toadd = []
            stageTime = AZtz.localize(pd.to_datetime(sleepStage['dateTime'], format='%Y-%m-%dT%H:%M:%S.%f')).astimezone(utc)
            toadd.append(stageTime)
            toadd.append(fitbitSleepStageToStandard(sleepStage['level']))
            toadd.append(stageTime + datetime.timedelta(seconds=int(sleepStage['seconds'])))
            returnDF.loc[stageTime] = toadd
    return returnDF


In [21]:
datDir = "./fitbit/"

sleepFiles = os.listdir(datDir)

print(sleepFiles)
for sf in sleepFiles:
  with open(datDir + sf) as f:
    jsonData = json.load(f)
  df = addJSONdataTodf(jsonData,df)
  print("imported: " + sf)

['sleep-2020-06-11.json', 'sleep-2020-05-12.json', 'sleep-2020-07-11.json']
imported: sleep-2020-06-11.json
imported: sleep-2020-05-12.json
imported: sleep-2020-07-11.json


In [22]:
print(df.head(20))

                                          timestamp stage  \
index                                                       
2020-07-11 09:13:00+00:00 2020-07-11 09:13:00+00:00     1   
2020-07-11 09:15:00+00:00 2020-07-11 09:15:00+00:00     0   
2020-07-11 09:35:30+00:00 2020-07-11 09:35:30+00:00     1   
2020-07-11 09:54:30+00:00 2020-07-11 09:54:30+00:00     2   
2020-07-11 10:07:00+00:00 2020-07-11 10:07:00+00:00     1   
2020-07-11 10:31:30+00:00 2020-07-11 10:31:30+00:00     3   
2020-07-11 10:43:00+00:00 2020-07-11 10:43:00+00:00     1   
2020-07-11 10:56:00+00:00 2020-07-11 10:56:00+00:00     0   
2020-07-11 11:01:00+00:00 2020-07-11 11:01:00+00:00     1   
2020-07-11 11:09:30+00:00 2020-07-11 11:09:30+00:00     2   
2020-07-11 11:40:30+00:00 2020-07-11 11:40:30+00:00     1   
2020-07-11 11:55:00+00:00 2020-07-11 11:55:00+00:00     3   
2020-07-11 12:17:00+00:00 2020-07-11 12:17:00+00:00     1   
2020-07-11 13:19:00+00:00 2020-07-11 13:19:00+00:00     2   
2020-07-11 13:37:00+00:0

In [6]:
df.to_parquet(outputFileName, compression='gzip') 