In [8]:
import pandas as pd
import datetime
import os
import pytz
import json
print(pd.__version__)
utc = pytz.utc

datDir = "./oura/oura_2021-08-10T02-14-44.json"


dataFileColumns = ["timestamp", "stage", "endTimestamp"]

outputFileName = "ouraRingSleepStages.parquet.gzip"

#get list of files in current directroy
files = [f for f in os.listdir("./") if os.path.isfile(f)]


#if the output file isn't already made, make it
if not (outputFileName in files):
    print("making new df")
    emptydf = pd.DataFrame(columns = dataFileColumns)
    emptydf.set_index('timestamp',inplace=True)
    print(emptydf)
        
    #save as a parquet file
    emptydf.to_parquet(outputFileName, compression='gzip') 


df = pd.read_parquet(outputFileName)

print(df.head(100))

with open(datDir) as f:
  jsonData = json.load(f)


1.2.4
making new df
Empty DataFrame
Columns: [stage, endTimestamp]
Index: []
Empty DataFrame
Columns: [stage, endTimestamp]
Index: []


In [9]:
def ouraSleepStageToStandard(ouraNum):
    if ouraNum == 4: #awake
        return 0
    if ouraNum == 2: #light sleep
        return 2
    if ouraNum == 1: #deep sleep
        return 3
    if ouraNum == 3: #REM sleep
        return 4
    

In [10]:

#seperate out the list of sleep data
for SleepJSON in jsonData['sleep']:
    startTime = datetime.datetime.fromisoformat(SleepJSON['bedtime_start']).astimezone(utc)
    stageStartTime = startTime

    hypnogram_5min = SleepJSON['hypnogram_5min']

    toadd = []
    #toadd.append(stageStartTime)
    toadd.append(ouraSleepStageToStandard(int(hypnogram_5min[0])))

    for i in range(1, len(hypnogram_5min)):
        if(hypnogram_5min[i-1] != hypnogram_5min[i]):
            #save the current time as the end of the last stage
            toadd.append(startTime + datetime.timedelta(minutes=i*5))
            #add to the df
            df.loc[stageStartTime] = toadd
            #reset toadd
            toadd = []
            #set the current time to the start of the next one
            stageStartTime = startTime + datetime.timedelta(minutes=i*5)
            #toadd.append(stageStartTime)
            #set the lable of the next stage to the value of the current index

            toadd.append(ouraSleepStageToStandard(int(hypnogram_5min[i])))
    #set the last end time to the last toadd
    toadd.append(startTime + datetime.timedelta(minutes=len(hypnogram_5min)))
    #add it to the df
    df.loc[stageStartTime] = toadd

#sperate out the bedtime_start and the hypnogram_5min

#parse it out in a very similar way to the withings data

In [11]:
print(df)

                          stage              endTimestamp
timestamp                                                
2020-10-15 08:03:59+00:00     0 2020-10-15 08:08:59+00:00
2020-10-15 08:08:59+00:00     2 2020-10-15 08:13:59+00:00
2020-10-15 08:13:59+00:00     0 2020-10-15 08:18:59+00:00
2020-10-15 08:18:59+00:00     2 2020-10-15 08:28:59+00:00
2020-10-15 08:28:59+00:00     3 2020-10-15 08:43:59+00:00
...                         ...                       ...
2021-08-09 14:05:48+00:00     2 2021-08-09 14:55:48+00:00
2021-08-09 14:55:48+00:00     3 2021-08-09 15:00:48+00:00
2021-08-09 15:00:48+00:00     2 2021-08-09 15:25:48+00:00
2021-08-09 15:25:48+00:00     4 2021-08-09 15:40:48+00:00
2021-08-09 15:40:48+00:00     0 2021-08-09 10:18:48+00:00

[7881 rows x 2 columns]


In [12]:
for restfulJSON in jsonData['restful_periods']:
    startTime = datetime.datetime.fromisoformat(restfulJSON['bedtime_start']).astimezone(utc)
    endTime = datetime.datetime.fromisoformat(restfulJSON['bedtime_end']).astimezone(utc)
    df.loc[startTime] = [2,endTime]

In [13]:
print(df)

                          stage              endTimestamp
timestamp                                                
2020-10-15 08:03:59+00:00     0 2020-10-15 08:08:59+00:00
2020-10-15 08:08:59+00:00     2 2020-10-15 08:13:59+00:00
2020-10-15 08:13:59+00:00     0 2020-10-15 08:18:59+00:00
2020-10-15 08:18:59+00:00     2 2020-10-15 08:28:59+00:00
2020-10-15 08:28:59+00:00     3 2020-10-15 08:43:59+00:00
...                         ...                       ...
2021-08-04 00:15:03+00:00     2 2021-08-04 00:15:03+00:00
2021-08-05 22:01:01+00:00     2 2021-08-05 22:01:01+00:00
2021-08-07 20:43:32+00:00     2 2021-08-07 20:43:32+00:00
2021-08-08 20:27:00+00:00     2 2021-08-08 20:27:00+00:00
2021-08-09 07:38:01+00:00     2 2021-08-09 07:38:01+00:00

[8209 rows x 2 columns]


In [14]:
df.sort_index(inplace=True)
df.to_parquet(outputFileName, compression='gzip') 