In [56]:
import datetime as dt
import streamlit as st
import pandas as pd
import pymongo as mongo
import plotly.express as px
import plotly.graph_objs as go
import pprint as p

# # Options to be able to see all columns when printing
# pd.options.display.width= None
# pd.options.display.max_columns= None
# pd.set_option('display.max_rows', 3000)
# pd.set_option('display.max_columns', 3000)

USER_UUID = "3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44"
DB_NAME = "fitbit"
DATA_COLLECTION_NAME = "fitbitCollection"


def connect_to_db():
    """
    Returns the collection specified by the DATA_COLLECTION_NAME global variable of the DB_NAME MongoDb.
    If it does not exist it throws an error.
    """
    global DB_NAME
    global DATA_COLLECTION_NAME

    client = mongo.MongoClient('localhost', 27017)
    fitbitDb = client[DB_NAME]
    if DATA_COLLECTION_NAME in fitbitDb.list_collection_names():
        return fitbitDb[DATA_COLLECTION_NAME]
    else:
        raise Exception(f"Collection {DATA_COLLECTION_NAME} does not exist.")


def to_datetime(date, time=""):
    """
    Converts date (str or datetime.date) into datetime.datetime. If a time argument is given, it includes it
    in the datetime.datetime object it returns.
    """

    if isinstance(date, str):
        datetimeObj = dt.datetime.fromisoformat(date)
    elif isinstance(date, dt.date):
        datetimeObj = dt.datetime.combine(date, dt.datetime.min.time())
    else:
        raise ValueError("Unsupported type for date. It should be either a string or a datetime.date object.")

    if time != "":
        datetimeObj = dt.datetime.combine(datetimeObj, dt.datetime.strptime(time, '%H:%M:%S').time())

    return datetimeObj


def check_dType(dType):
    fitbitCollection = connect_to_db()
    distinctTypes = fitbitCollection.distinct("type")
    if dType is not None:
        if dType not in distinctTypes:
            raise ValueError(f"dType needs to be one of {distinctTypes}.")


def load_data(dType=None, date=None):
    """
    Inputs:
        - dType <str>: The 'type' key we want to pull from MongoDB.
        - date <str, datetime>: The date we want the data of.

    Creates a query based on dType and date and returns its result.
    """
    fitbitCollection = connect_to_db()
    check_dType(dType)

    if date is None and dType is None:
        raise ValueError("One of dType or date must be specified.")
    elif date is None:
        myquery = {'type': dType}
    elif dType is None:
        myquery = {'data.dateTime': to_datetime(date)}
    else:
        myquery = {
            'type': dType,
            'data.dateTime': to_datetime(date)
        }

    return fitbitCollection.find(myquery)


def add_datetime_columns(df):
    if "dateTime" in df.columns:
        df["month"] = df["dateTime"].dt.month
        df["day"] = df["dateTime"].dt.day
        df["hour"] = df["dateTime"].dt.hour
        df["minute"] = df["dateTime"].dt.minute
        df["second"] = df["dateTime"].dt.second
        # Get month name
        df["month_name"] = df["dateTime"].dt.month_name()
        month_cat_dtype = pd.CategoricalDtype(
            categories=['January', 'February', 'March', 'April',
                        'May', 'June', 'July', 'August',
                        'September', 'October', 'November', 'December'],
            ordered=True)
        df['month_name'] = df['month_name'].astype(month_cat_dtype)
        # Get day name
        df["day_name"] = df["dateTime"].dt.day_name()
        day_cat_dtype = pd.CategoricalDtype(
            categories=['Monday', 'Tuesday', 'Wednesday',
                        'Thursday', 'Friday', 'Saturday', 'Sunday'],
            ordered=True)

        df['day_name'] = df['day_name'].astype(day_cat_dtype)
        return df
    else:
        raise Exception("Need a dateTime column in df.")


def get_df(dType=None, date=None, addDateTimeCols=False):
    """
    Loads data from MongoDB into a dataframe.
    """
    query_result = load_data(dType=dType, date=date)
    query_sample_data = query_result[0]['data']
    cols = [key for key in query_sample_data.keys()]
    data = [doc['data'] for doc in query_result]
    df = pd.DataFrame(data,
                      columns=cols)
    if addDateTimeCols:
        df = add_datetime_columns(df)
    return df

# Connect to MongoDB collection where the data are stored
fitbitCollection = connect_to_db()
testDate = '2023-03-30'

In [57]:


distinctTypes = fitbitCollection.distinct("type")

for dType in distinctTypes:
    print(dType)
    query_result = list(load_data(dType=dType, date=testDate))
    p.pprint(query_result)
    # doc = query_result
    # print(f"Type: {doc['type']}")
    # p.pprint(doc['data'])
    print('-'*70)

minutesFairlyActive
[{'_id': ObjectId('6454cb835e94bea6977ef8de'),
  'data': {'dateTime': datetime.datetime(2023, 3, 30, 0, 0), 'value': 19},
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesFairlyActive'}]
----------------------------------------------------------------------
minutesFairlyActive-intraday
[{'_id': ObjectId('6454cb835e94bea6977ef8df'),
  'data': {'dateTime': datetime.datetime(2023, 3, 30, 0, 0), 'value': 6},
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesFairlyActive-intraday'}]
----------------------------------------------------------------------
minutesLightlyActive
[{'_id': ObjectId('6454cb835e94bea6977ef87d'),
  'data': {'dateTime': datetime.datetime(2023, 3, 30, 0, 0), 'value': 311},
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesLightlyActive'}]
----------------------------------------------------------------------
minutesLightlyActive-intraday
[{'_id': ObjectId('6454cb835e94bea6977ef87e'),
  'data': {'dateTi

In [58]:

list(load_data(dType=dTypes[0]))

[{'_id': ObjectId('6454cb805e94bea6977ef0f8'),
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesSedentary-intraday',
  'data': {'dateTime': datetime.datetime(2023, 3, 29, 0, 0), 'value': 15}},
 {'_id': ObjectId('6454cb805e94bea6977ef0f9'),
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesSedentary-intraday',
  'data': {'dateTime': datetime.datetime(2023, 3, 29, 0, 15), 'value': 14}},
 {'_id': ObjectId('6454cb805e94bea6977ef0fa'),
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesSedentary-intraday',
  'data': {'dateTime': datetime.datetime(2023, 3, 29, 0, 30), 'value': 12}},
 {'_id': ObjectId('6454cb805e94bea6977ef0fb'),
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesSedentary-intraday',
  'data': {'dateTime': datetime.datetime(2023, 3, 29, 0, 45), 'value': 14}},
 {'_id': ObjectId('6454cb805e94bea6977ef0fc'),
  'id': '3cc4e2ee-8c2f-4c25-955b-fe7f6ffcbe44',
  'type': 'minutesSedentary-intraday',
  'data': {'dateTime': 

In [59]:
dateTimeEnd = dt.datetime.strptime(testDate, '%Y-%m-%d')
t = dt.time(hour=23, minute=59)
dateTimeEnd = dt.datetime.combine(dateTimeEnd.date(), t)
dateTimeEnd

datetime.datetime(2023, 3, 30, 23, 59)

In [60]:
def get_activity_detail_timeseries(date):

    dateTimeStart = dt.datetime.strptime(date, '%Y-%m-%d')
    dateTimeEnd = dt.datetime.strptime(date, '%Y-%m-%d')
    t = dt.time(hour=23, minute=59)
    dateTimeEnd = dt.datetime.combine(dateTimeEnd.date(), t)

    activityTypeTimeseries = {}
    for dType in dTypes:

        query = {
            'type': dType,
            'data.dateTime': {
                '$gte': dateTimeStart,
                '$lte': dateTimeEnd
            }
        }

        # Get time series with time spent in each sleep level
        query_result = fitbitCollection.find(query)
        activityTimeseries = []
        for doc in query_result:
            activityType = doc['type'].split('-')[0].replace('minutes', '').strip()
            docData = doc['data']

            dateTime = docData['dateTime']
            minutes = docData['value']
            if minutes != 0:
                # First element is zero so that the first value is the datetime when level
                # was initially entered (i.e. dateTime)
                timePeriods = [0] + [1]*(minutes-1)
                for min in timePeriods:
                    dateTime += dt.timedelta(minutes = min)
                    dataPoint = (dateTime, activityType)
                    activityTimeseries.append(dataPoint)
            else:
                continue
        activityTypeTimeseries[activityType] = activityTimeseries

    fullActivityTimeseries = []
    for key in fullActivityTimeseries.keys():
        activityTimeseries += fullActivityTimeseries[key]
    
    return fullActivityTimeseries


def get_activity_timeseries_df(fullActivityTimeseries):
    activity_timeseries_df = pd.DataFrame(fullActivityTimeseries, columns=["dateTime", "activityLevel"]).sort_values(by='dateTime')
    # Change level names
    new_level_names = {
        "Sedentary": "Sedentary",
        "LightlyActive": "Lightly Active",
        "FairlyActive": "Fairly Active",
        "VeryActive": "Very Active"
    }
    activity_timeseries_df["activityLevel"] = activity_timeseries_df["activityLevel"].apply(lambda x: new_level_names[x])
    # Define sleepStage as a categorical variable
    cat_dtype = pd.CategoricalDtype(
        categories=['Sedentary', 'Lightly Active', 'Fairly Active', 'Very Active'], ordered=True)
    activity_timeseries_df['activityLevel'] = activity_timeseries_df['activityLevel'].astype(cat_dtype)
    # Create numeric column for sleep stages for plotting
    activity_timeseries_df['activityLevelNum'] = activity_timeseries_df['activityLevel'].cat.codes

    return activity_timeseries_df


def plot_activity_level_timeseries(activity_timeseries_df):

    colors = {'Sedentary': 'red', 'Lightly Active': 'lightblue', 'Fairly Active': 'blue', 'Very Active': 'darkblue'}

    fig = px.scatter(activity_timeseries_df, x="dateTime", y="activityLevelNum",
                    color="activityLevel", color_discrete_map=colors)

    # Sleep date
    date = activity_timeseries_df['dateTime'].iloc[0].strftime("%d %B %Y")

    fig.update_layout(
        title=f'Activity Levels for {date}',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Activity Level',
                tickmode='array',
                tickvals=[0, 1, 2, 3],
                ticktext=['Sedentary', 'Lightly Active', 'Fairly Active', 'Very Active']),
        plot_bgcolor='white',
        height=500
    )

    # fig.show()
    return fig

In [61]:
date = testDate
dTypes = ['minutesSedentary-intraday', 'minutesLightlyActive-intraday', 'minutesFairlyActive-intraday', 'minutesVeryActive-intraday']

fullActivityTimeseries = get_activity_detail_timeseries(date)
activity_timeseries_df = get_activity_timeseries_df(fullActivityTimeseries)
fig = plot_activity_level_timeseries(activity_timeseries_df)


Unnamed: 0,dateTime,activityLevel,activityLevelNum
1100,2023-03-30 00:00:00,Lightly Active,1
1411,2023-03-30 00:00:00,Fairly Active,2
1430,2023-03-30 00:00:00,Very Active,3
1412,2023-03-30 00:01:00,Fairly Active,2
1431,2023-03-30 00:01:00,Very Active,3


In [31]:
activity_timeseries_df.activityLevel.value_counts()

Sedentary        15201
LightlyActive     2483
VeryActive         739
FairlyActive       297
Name: activityLevel, dtype: int64

In [3]:
def get_sleep_start_end(dates):
    """ 
    Input:
        > dates <list>: Contains the dates for which we want to gather data.

    Returns a list of dictionaries, where each dictionary contains the sleep start and end time
    for the given date.
    """

    dTypes = ['sleep-startTime', 'sleep-endTime']
    sleepStartEnd_list = []
    for date in dates:
        sleepStartEnd = {}
        sleepStartEnd["date"] = date
        for dType in dTypes:

            query_result = load_data(dType=dType, date=date)
            query_sample_data = query_result[0]['data']
            
            sleepStartEnd[dType] = query_sample_data['value']
        
        sleepStartEnd_list.append(sleepStartEnd)

    return sleepStartEnd_list


def expand_time_series(query_result, step=10):
    """ 
    Inputs:
        - query_result: The result of querying the MongoDB to get the data we want.
        - step <int>: 
    Since the query_result contains information in the form 
    (<time the sleep level was entered>, <sleep level>, <duration in the sleep level (sec)>)
    we use this information to get a proper time series of the form (<dateTime>, <sleep level>).
    The expansion takes place so that for each stage, we add a point every step seconds.
    """

    # Construct sleep level time series
    sleepLevelTimeSeries = []
    for doc in query_result:
        docData = doc['data']
        # Sleep level
        level = docData['level']
        # Datetime when level was initially entered
        dateTime = docData['dateTime']
        # Total seconds spent in level
        totalSecInLevel = docData['value']
        # Create new point every sec seconds
        step = 10
        # Number of points that will be added in the time series
        nTimePeriods = int(totalSecInLevel/step) - 1

        # First element is zero so that the first value is the datetime when level
        # was initially entered (i.e. dateTime)
        timePeriods = [0] + [step]*(nTimePeriods)
        for sec in timePeriods:
            # print(f"sec: {sec}")
            dateTime += dt.timedelta(seconds = sec)
            # print(type(dateTime))
            dataPoint = (dateTime, level)
            sleepLevelTimeSeries.append(dataPoint)

    return sleepLevelTimeSeries


def create_sleep_level_ts_df(sleepLevelTimeSeries):
    """ 
    Creates a dataframe from the sleep level time series.
    """
    # Create sleep level time series dataframe
    sleepLevelTimeSeries_df = pd.DataFrame(sleepLevelTimeSeries, columns=["dateTime", "sleepStage"])
    # Change level names
    new_level_names = {
        "wake": "Awake",
        "rem": "REM",
        "light": "Light",
        "deep": "Deep"
    }
    sleepLevelTimeSeries_df["sleepStage"] = sleepLevelTimeSeries_df["sleepStage"].apply(lambda x: new_level_names[x])
    # Define sleepStage as a categorical variable
    cat_dtype = pd.CategoricalDtype(
        categories=['Deep', 'Light', 'REM', 'Awake'], ordered=True)
    sleepLevelTimeSeries_df['sleepStage'] = sleepLevelTimeSeries_df['sleepStage'].astype(cat_dtype)
    # Create numeric column for sleep stages for plotting
    sleepLevelTimeSeries_df['sleepStageNum'] = sleepLevelTimeSeries_df['sleepStage'].cat.codes

    return sleepLevelTimeSeries_df


def plot_sleep_level_time_series(sleepLevelTimeSeries_df):

    colors = {'Awake': 'red', 'REM': 'lightblue', 'Light': 'blue', 'Deep': 'darkblue'}

    fig = px.scatter(sleepLevelTimeSeries_df, x="dateTime", y="sleepStageNum",
                    color="sleepStage", color_discrete_map=colors)

    # Sleep date
    date = sleepLevelTimeSeries_df['dateTime'].iloc[0].strftime("%d %B %Y")

    fig.update_layout(
        title=f'Sleep Stages for {date}',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Sleep Stage',
                tickmode='array',
                tickvals=[0, 1, 2, 3],
                ticktext=['Deep', 'Light', 'REM', 'Awake']),
        plot_bgcolor='white',
        height=500
    )


    fig.show()


def get_sleep_level_timeseries_data(sleepStartEnd_list):
    """
    Input:
        - sleepStartEnd_list <list>: List of dictionaries each containing three keys 
                                     (date, sleep-startTime, sleep-endTime).
    
    Returns for each element of the input list the time series of sleep levels. First,
    the MongoDB is queried to get the relevant data. The data we get from Mongo contain
    the information as (<time the sleep level was entered>, <sleep level>, 
    <duration in the sleep level (sec)>). For this reason, we then expand the time series
    with the expand_time_series() function.
    """
    
    dType = 'sleepLevelsData-data'
    # For each dictionary in sleepStartEnd_list (i.e. for each date)
    for sleepStartEnd in sleepStartEnd_list:
        # Create the query that will give us the time series data
        sleepStartTime = sleepStartEnd['sleep-startTime']
        sleepEndTime = sleepStartEnd['sleep-endTime']
        query = {
            'type': dType,
            'data.dateTime': {
                '$gte': sleepStartTime,
                '$lte': sleepEndTime
            }
        }

        # Get time series with time spent in each sleep level
        query_result = fitbitCollection.find(query)
        
        # Expand the time series
        sleepLevelTimeSeries = expand_time_series(query_result, step=10)
        # Create dataframe for expanded time serires
        sleepLevelTimeSeries_df = create_sleep_level_ts_df(sleepLevelTimeSeries)

        # Plot the timeseries
        plot_sleep_level_time_series(sleepLevelTimeSeries_df)
        

date = testDate
dates = [date]
sleepStartEnd_list = get_sleep_start_end(dates)
sleepStartEnd_list
get_sleep_level_timeseries_data(sleepStartEnd_list)

In [92]:
sleepLevelTimeSeries_df

Unnamed: 0,dateTime,sleepStage,sleepStageNum
0,2023-04-21 01:48:00,Awake,3
1,2023-04-21 01:48:10,Awake,3
2,2023-04-21 01:48:20,Awake,3
3,2023-04-21 01:48:30,Awake,3
4,2023-04-21 01:48:40,Awake,3
...,...,...,...
2422,2023-04-21 08:31:40,Deep,0
2423,2023-04-21 08:31:50,Deep,0
2424,2023-04-21 08:32:00,Deep,0
2425,2023-04-21 08:32:10,Deep,0
