In [2]:
# Import the necessary dependencies.
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.stats as sts
import plotly.express as plt
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Question: Does waking up in the middle of the night affect your stress levels the next day?

Hypothesis: If disrupting your sleep is related to your stress levels the next day, waking up more than 10 times will result in higher stress levels the next day.

Null Hypothesis: Waking up more than 10 times wil result in no increase in the amount of stress the next day.

Alternative Hypothesis: Waking up more than 10 times wil result in an increase in the amount of stress the next day.

Significance Level:  Our p value for daily stress level is high for Daily stress levels between 10 - 30, sufficient evidence to NOT reject the null hypothesis.



In [27]:
# Get actvity DF for all users
sleep_list = []

for num in range(1,23):
    user_sleep_df = pd.read_csv(f'DataPaper/user_{num}/sleep.csv')
    stress_df = pd.read_csv(f'DataPaper/user_{num}/questionnaire.csv')

    user_sleep_df = user_sleep_df.merge(stress_df)
    sleep_list.append(user_sleep_df)
    
sleep_df = pd.concat(sleep_list)
sleep_df.head()

Unnamed: 0.1,Unnamed: 0,In Bed Date,In Bed Time,Out Bed Date,Out Bed Time,Onset Date,Onset Time,Latency,Efficiency,Total Minutes in Bed,...,panas_pos_10,panas_pos_14,panas_pos_18,panas_pos_22,panas_pos_9+1,panas_neg_10,panas_neg_14,panas_neg_18,panas_neg_22,panas_neg_9+1
0,0,2,00:46,2,03:31,2,00:46,0,87.27,165,...,21.0,17.0,12.0,18.0,17.0,11.0,13.0,13.0,10.0,10.0
0,0,2,00:50,2,06:22,2,00:54,4,73.49,332,...,37.0,32.0,24.0,27.0,33.0,11.0,10.0,16.0,17.0,18.0
0,0,1,22:29,1,05:52,1,22:32,3,79.23,443,...,35.0,34.0,31.0,28.0,35.0,11.0,12.0,11.0,12.0,11.0
0,0,2,00:57,2,07:10,2,01:01,4,85.52,373,...,30.0,27.0,22.0,19.0,26.0,11.0,13.0,15.0,14.0,14.0
0,0,1,23:56,1,06:42,1,23:56,0,85.71,406,...,30.0,25.0,31.0,27.0,31.0,26.0,17.0,17.0,15.0,16.0


In [4]:
# Sleeping less causes stress
sleep_df.columns

Index(['Unnamed: 0', 'In Bed Date', 'In Bed Time', 'Out Bed Date',
       'Out Bed Time', 'Onset Date', 'Onset Time', 'Latency', 'Efficiency',
       'Total Minutes in Bed', 'Total Sleep Time (TST)',
       'Wake After Sleep Onset (WASO)', 'Number of Awakenings',
       'Average Awakening Length', 'Movement Index', 'Fragmentation Index',
       'Sleep Fragmentation Index', 'MEQ', 'STAI1', 'STAI2', 'Pittsburgh',
       'Daily_stress', 'BISBAS_bis', 'BISBAS_reward', 'BISBAS_drive',
       'BISBAS_fun', 'panas_pos_10', 'panas_pos_14', 'panas_pos_18',
       'panas_pos_22', 'panas_pos_9+1', 'panas_neg_10', 'panas_neg_14',
       'panas_neg_18', 'panas_neg_22', 'panas_neg_9+1'],
      dtype='object')

In [1]:
# Scatter plot showing the quantity of sleep and stress level
plt = px.scatter(sleep_df, x='Total Sleep Time (TST)', y='Number of Awakenings', 
        color='Daily_stress', title='Total Sleep time and Number of Awakenings to Daily Stress the next Day')
plt.show()


NameError: name 'px' is not defined

In [29]:
# X = sleep_df[['Total Sleep Time (TST)', 'Number of Awakenings']]
# y = sleep_df['Daily_stress']
# X = sm.add_constant(X)
# model = sm.OLS(y, X)
# results = model.fit()

# print(results.summary())

In [30]:
# Sleeping less causes stress
g1 = sleep_df[(sleep_df['Daily_stress'] >= 10) & (sleep_df['Daily_stress'] < 30)]['Daily_stress']
g2 = sleep_df[(sleep_df['Daily_stress'] > 30) & (sleep_df['Daily_stress'] < 50)]['Daily_stress']
g3 = sleep_df[(sleep_df['Daily_stress'] > 50) & (sleep_df['Daily_stress'] <= 70)]['Daily_stress']

In [31]:
ranges = ["(10 - 25)", "(25 - 40)", "(40 - 70)"]
all = [sts.f_oneway(g1,g2,g3), 
    sts.f_oneway(g2,g3),
    sts.f_oneway(g1,g3)]

for i in range(len(ranges)):
    print(f"Daily Stress {ranges[i]}\t {all[i]}")
    

Daily Stress (10 - 25)	 F_onewayResult(statistic=38.951454065408875, pvalue=7.104401907634512e-07)
Daily Stress (25 - 40)	 F_onewayResult(statistic=27.612499999999997, pvalue=0.0007693775301233502)
Daily Stress (40 - 70)	 F_onewayResult(statistic=49.490563647878446, pvalue=0.00010878373349810067)


In [32]:
tukey = pairwise_tukeyhsd(endog=sleep_df["Daily_stress"],
                          groups=sleep_df["Total Sleep Time (TST)"],
                          alpha=0.05)
print(tukey)

  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
   144    236     12.0 0.9888 -535.4763 559.4763  False
   144    244      3.0    1.0 -544.4763 550.4763  False
   144    247     -2.5    1.0 -476.6284 471.6284  False
   144    273      7.0 0.9998 -540.4763 554.4763  False
   144    302      9.0 0.9985 -538.4763 556.4763  False
   144    304     18.0 0.9272 -529.4763 565.4763  False
   144    306     -3.0    1.0 -550.4763 544.4763  False
   144    319    -13.0 0.9825 -560.4763 534.4763  False
   144    333     18.0 0.9272 -529.4763 565.4763  False
   144    336     51.0 0.4955 -496.4763 598.4763  False
   144    339      9.0 0.9985 -538.4763 556.4763  False
   144    340      8.0 0.9994 -539.4763 555.4763  False
   144    342     46.0 0.5398 -501.4763 593.4763  False
   144    343     25.0 0.8157 -522.4763 572.4763  False
   144    348     18.0 0.9272 -529.4763 565.4763

In [33]:
tukey = pairwise_tukeyhsd(endog=sleep_df["Daily_stress"],
                          groups=sleep_df["Number of Awakenings"],
                          alpha=0.05)
print(tukey)

  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
     4      9    -10.5 0.9992  -72.7442  51.7442  False
     4     12     -3.0    1.0  -65.2442  59.2442  False
     4     13    -24.0 0.8808  -95.8735  47.8735  False
     4     15     -3.0    1.0  -74.8735  68.8735  False
     4     16    -27.0 0.8002  -98.8735  44.8735  False
     4     18    -10.0 0.9995  -72.2442  52.2442  False
     4     19    -11.5 0.9981  -73.7442  50.7442  False
     4     20     -7.5    1.0  -69.7442  54.7442  False
     4     21      3.0    1.0  -59.2442  65.2442  False
     4     27     36.0 0.5182  -35.8735 107.8735  False
     4     28    -28.0 0.7702  -99.8735  43.8735  False
     4     31     31.0 0.6753  -40.8735 102.8735  False
     4     39     -6.0    1.0  -77.8735  65.8735  False
     4     44     -7.0    1.0  -78.8735  64.8735  False
     9     12      7.5 0.9998  -43.3222  58.3222

In [40]:
# Correlation between Daily_stress and Number of Awakenings
sleep_df['Daily_stress'] = sleep_df['Daily_stress'].astype(float)
sleep_df['Number of Awakenings'] = sleep_df['Number of Awakenings'].astype(float)

sleep_df['Daily_stress'].corr(sleep_df['Number of Awakenings'])

0.2201387169658961

In [42]:
# Correlation between Daily_stress and Total amount of Sleep (TST) in minutes
sleep_df['Daily_stress'] = sleep_df['Daily_stress'].astype(float)
sleep_df['Total Sleep Time (TST)'] = sleep_df['Total Sleep Time (TST)'].astype(float)

sleep_df['Daily_stress'].corr(sleep_df['Total Sleep Time (TST)'])

0.21504706810119653

In [44]:
sleep_df['Daily_stress'].unique() 

array([23., 26., 11., 10., 41., 69., 74., 38., 14., 48., 27., 35., 32.,
       20., 31., 22., 30.])