In [1]:
# Import the necessary dependencies.
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.stats as sts
import plotly.express as plt
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

## The Data

These data sets are from fitbit. They contain 10K+ rows of data regarding the activities and sleep activites of about 20+ people. To clean the data, we took the daily averages of their sleep, steps, calories, and distance walked/ran. Using that condensed data, we created scatterplots analyzing the amount of sleep, amount of minutes sedentary, and daily steps. We also analyzed the time in bed, time asleep, and steps. 

Questions: 
- Does being more active (i.e. daily steps, active minutes) reduce the amount of time it takes to fall asleep?
- Does sedentary time affect the amount of sleep?

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Question: Does waking up in the middle of the night affect your stress levels the next day?

Hypothesis: If disrupting your sleep is related to your stress levels the next day, waking up more than 10 times will result in higher stress levels the next day.

Null Hypothesis: Waking up more than 10 times wil result in no increase in the amount of stress the next day.

Alternative Hypothesis: Waking up more than 10 times wil result in an increase in the amount of stress the next day.

Significance Level:  Our p value for daily stress level is high for Daily stress levels between 10 - 30, sufficient evidence to NOT reject the null hypothesis.



In [2]:
# Get dailyActivty_merged dataset from fitbit data
# To clean the data we groupby and get the mean
sedentary_df = pd.read_csv('fitbit/dailyActivity_merged.csv')
sedentary_df = sedentary_df.groupby('Id').mean()
sedentary_df.head()

Unnamed: 0_level_0,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1503960366,12116.741935,7.809677,7.809677,0.0,2.858387,0.794194,4.152903,0.0,38.709677,19.16129,219.935484,848.16129,1816.419355
1624580081,5743.903226,3.914839,3.914839,0.0,0.939355,0.360645,2.606774,0.006129,8.677419,5.806452,153.483871,1257.741935,1483.354839
1644430081,7282.966667,5.295333,5.295333,0.0,0.73,0.951,3.609,0.004,9.566667,21.366667,178.466667,1161.866667,2811.3
1844505072,2580.064516,1.706129,1.706129,0.0,0.008387,0.049032,1.647419,0.0,0.129032,1.290323,115.451613,1206.612903,1573.483871
1927972279,916.129032,0.634516,0.634516,0.0,0.095806,0.03129,0.507097,0.0,1.322581,0.774194,38.580645,1317.419355,2172.806452


In [3]:
# Get sleepDay_merged dataset from fitbit data
# To clean the data we groupby and get the mean
sleep_df = pd.read_csv('fitbit/sleepDay_merged.csv')
sleep_df = sleep_df.groupby('Id').mean()

# Here create a column with the amount of time it took a user to fall asleep
sleep_df['TimeToFallAsleep'] = sleep_df['TotalTimeInBed'] - sleep_df['TotalMinutesAsleep']
sleep_df.head()

Unnamed: 0_level_0,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed,TimeToFallAsleep
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1503960366,1.08,360.28,383.2,22.92
1644430081,1.0,294.0,346.0,52.0
1844505072,1.0,652.0,961.0,309.0
1927972279,1.6,417.0,437.8,20.8
2026352035,1.0,506.178571,537.642857,31.464286


In [4]:
# Merge sleep_df with sedentary_df
combined_df = sleep_df.merge(sedentary_df, on='Id')
combined_df.drop_duplicates()
combined_df.head()

Unnamed: 0_level_0,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed,TimeToFallAsleep,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1503960366,1.08,360.28,383.2,22.92,12116.741935,7.809677,7.809677,0.0,2.858387,0.794194,4.152903,0.0,38.709677,19.16129,219.935484,848.16129,1816.419355
1644430081,1.0,294.0,346.0,52.0,7282.966667,5.295333,5.295333,0.0,0.73,0.951,3.609,0.004,9.566667,21.366667,178.466667,1161.866667,2811.3
1844505072,1.0,652.0,961.0,309.0,2580.064516,1.706129,1.706129,0.0,0.008387,0.049032,1.647419,0.0,0.129032,1.290323,115.451613,1206.612903,1573.483871
1927972279,1.6,417.0,437.8,20.8,916.129032,0.634516,0.634516,0.0,0.095806,0.03129,0.507097,0.0,1.322581,0.774194,38.580645,1317.419355,2172.806452
2026352035,1.0,506.178571,537.642857,31.464286,5566.870968,3.454839,3.454839,0.0,0.006129,0.01129,3.436129,0.0,0.096774,0.258065,256.645161,689.419355,1540.645161


In [5]:
combined_df.corr()

Unnamed: 0,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed,TimeToFallAsleep,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories
TotalSleepRecords,1.0,0.180406,0.118174,-0.063441,-0.182006,-0.158641,-0.155259,-0.107772,-0.15917,-0.087964,-0.076454,-0.151639,-0.154466,-0.141955,-0.121234,-0.053196,0.034278
TotalMinutesAsleep,0.180406,1.0,0.94023,0.411569,-0.218571,-0.207983,-0.19388,-0.371701,-0.156875,-0.169274,-0.139836,-0.042635,-0.064302,-0.144973,-0.202731,-0.376279,-0.040646
TotalTimeInBed,0.118174,0.94023,1.0,0.697332,-0.240679,-0.236997,-0.224672,-0.342326,-0.194964,-0.066477,-0.183736,-0.063349,-0.122541,-0.057641,-0.230326,-0.270743,-0.167031
TimeToFallAsleep,-0.063441,0.411569,0.697332,1.0,-0.184087,-0.196517,-0.193217,-0.133822,-0.191594,0.178366,-0.197408,-0.079803,-0.192612,0.150867,-0.189718,0.067387,-0.361469
TotalSteps,-0.182006,-0.218571,-0.240679,-0.184087,1.0,0.985777,0.984553,0.256617,0.729549,0.581238,0.690334,-0.059438,0.632106,0.537021,0.563201,-0.50639,0.321572
TotalDistance,-0.158641,-0.207983,-0.236997,-0.196517,0.985777,1.0,0.99946,0.267496,0.763837,0.551887,0.682294,-0.072,0.669048,0.511986,0.514461,-0.462965,0.443382
TrackerDistance,-0.155259,-0.19388,-0.224672,-0.193217,0.984553,0.99946,1.0,0.239702,0.765356,0.553787,0.678045,-0.076275,0.671056,0.514568,0.508139,-0.469016,0.444022
LoggedActivitiesDistance,-0.107772,-0.371701,-0.342326,-0.133822,0.256617,0.267496,0.239702,1.0,0.169885,0.04093,0.288349,0.060285,0.22757,-0.000241,0.217905,-0.014431,0.27531
VeryActiveDistance,-0.15917,-0.156875,-0.194964,-0.191594,0.729549,0.763837,0.765356,0.169885,1.0,0.183003,0.091617,-0.091063,0.87763,0.235211,-0.013398,-0.058888,0.455649
ModeratelyActiveDistance,-0.087964,-0.169274,-0.066477,0.178366,0.581238,0.551887,0.553787,0.04093,0.183003,1.0,0.40231,0.047208,0.173267,0.95538,0.241151,-0.425531,0.032014


In [9]:
# Note: Average data
px.scatter(combined_df,x='TotalMinutesAsleep', y='SedentaryMinutes', 
           title='Amount of Sedentary Minutes and Total Minutes Asleep',
           color='TotalSteps')

In this chart we see that there is no correlation between Sedentary minutes and Total Minutes Asleep. We thought that being less sedentary throughout the day meant that you will get more of sleep, but we can see some users not getting the recommend 7-9 to

In [7]:
# Note: Average data
px.scatter(combined_df,x='TotalMinutesAsleep', y='TotalTimeInBed', color='SedentaryMinutes')

In [8]:
# Note: Average data
px.scatter(combined_df,x='LightlyActiveMinutes', y='TimeToFallAsleep')