In [1]:
import matplotlib
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt # default library for making plots
import seaborn as sns # for making prettier plots!
import datetime
import math

%config InlineBackend.figure_format = 'retina' # for the crispier version of plots for retina screens
matplotlib.style.use('ggplot')
plt.rcParams['figure.figsize'] = [12, 6]



In [2]:
filename = '../HealthDatasets/sample_dataset.csv'

data = pd.read_csv(filename, converters= {'Date': pd.to_datetime})
data.set_index(pd.to_datetime(data.Date),inplace=True)

columns_dropped = ['Unnamed: 0', 'Is Weekday', 'Elevation (Ft)', 'Day of Week', 'Floors', 
                   'Minutes Lightly Active', 'Minutes Lightly Active', 'Minutes Fairly Active', 
                   'Minutes Very Active', 'Activity Calories', 'Active Score', 'Cardio minutes','Fat Burn calories', 
                   'Deep sleep count', 'Minutes Light sleep', 'Light sleep count', 'REM sleep count', 
                   'Fat Burn minutes', 'Sleep Start time', 'Sleep End time','Time in bed', 'Minutes to fall asleep',
                  'Minutes Awake count', 'Minutes Deep sleep', 'Peak minutes', 'Cardio calories', 'Peak calories',
                  'Normal Cardio calories','Normal Cardio minutes', 'Date']

data.drop(columns_dropped, axis = 1, inplace=True)

data.head(5)

Unnamed: 0_level_0,Is Weekend,Calories Burned,Calories BMR,Steps,Distance (Km),Resting Heart Rate,Minutes Sedentary,Sleep Efficiency,Minutes Asleep,Minutes REM sleep,Minutes Awake,% Deep sleep,% Light sleep,% REM sleep
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2018-06-24,True,1996,1690,3367,3.01,59.0,1377,92.0,379.0,79.0,52.0,13.0,57.0,19.0
2018-06-25,False,2863,1685,8394,5.55,58.0,709,95.0,333.0,44.0,54.0,20.0,56.0,12.0
2018-06-26,False,3398,1683,13569,9.68,57.0,687,89.0,351.0,75.0,51.0,14.0,56.0,19.0
2018-06-27,False,3442,1682,13337,9.33,57.0,665,97.0,365.0,102.0,42.0,17.0,49.0,26.0
2018-06-28,False,1701,1682,559,0.36,57.0,1025,,,,,,,


In [None]:
dayCodes = ['', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
minDayCodes = ['', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

days = {1: 'Mon', 2: 'Tue', 3: 'Wed', 4: 'Thu', 5: 'Fri', 6: 'Sat', 7: 'Sun'}

def is_nan(x):
    return (x is np.nan or x != x)

def defineSleepBucket(row):
    sleepEntry = row['Sleep Start time']
    if not is_nan(sleepEntry):
        sleepTimeO = datetime.datetime.strptime(sleepEntry, '%Y-%m-%dT%H:%M:%S.000').time()        
        if sleepTimeO.minute > 30:
            return (sleepTimeO.hour + 1)
            #return '{}:{}'.format(str((sleepTimeO.hour + 1)), '00')
        elif sleepTimeO.minute > 0:
            return sleepTimeO.hour + 0.5
            #return '{}:{}'.format(str(sleepTimeO.hour), ':30')
    else:
        return np.nan
    
def defineAwakeBucket(row):
    awakeEntry = row['Sleep End time']
    if not is_nan(awakeEntry):
        awakeEntryO = datetime.datetime.strptime(awakeEntry, '%Y-%m-%dT%H:%M:%S.000').time()        
        if awakeEntryO.minute > 30:
            return (awakeEntryO.hour + 1)
            #return '{}:{}'.format(str((awakeEntryO.hour + 1)), '00')
        elif awakeEntryO.minute > 0:
            return awakeEntryO.hour + 0.5
            #return '{}:{}'.format(str(awakeEntryO.hour), ':30')
    else:
        return np.nan    

#remove all entries where there was no steps recorded. i.e no activity
data = data[data['Steps'].notnull()]

data['Day Label'] = data['Day of Week'].apply(lambda x: days[x])
data['Active exercise'] = data['Minutes Very Active'] > 40

#Build additional Sleep columns
data['Sleep Bucket'] = data.apply(defineSleepBucket, axis=1)
data['Awake Bucket'] = data.apply(defineAwakeBucket, axis=1)

data['% Awake'] = 100 - (data['% Deep sleep'] + data['% REM sleep'] + data['% Light sleep'])
data['% Restorative sleep'] = data['% Deep sleep'] + data['% REM sleep']
data['Restorative sleep mins'] = data['Minutes Deep sleep'] + data['Minutes REM sleep']

#remove all entries where there was no Deep sleep recorded
sleepData = data[data['% Deep sleep'].notnull()]

