In [2]:
# Import the necessary dependencies.
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.stats as sts
import plotly.express as plt
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Question: Does waking up in the middle of the night affect your stress levels the next day?

Hypothesis: If disrupting your sleep is related to your stress levels the next day, waking up more than 10 times will result in higher stress levels the next day.

Null Hypothesis: Waking up more than 10 times wil result in no increase in the amount of stress the next day.

Alternative Hypothesis: Waking up more than 10 times wil result in an increase in the amount of stress the next day.

Significance Level:  Our p value for daily stress level is high for Daily stress levels between 10 - 30, sufficient evidence to NOT reject the null hypothesis.


Other notes:
There is a slight correlation between these factors:
- Daily Stress vs. Number of Awakenings
- Daily stress and Total amount of Sleep (TST) in minutes

In [3]:
# Get actvity DF for all users
sleep_list = []

for num in range(1,23):
    user_sleep_df = pd.read_csv(f'DataPaper/user_{num}/sleep.csv')
    stress_df = pd.read_csv(f'DataPaper/user_{num}/questionnaire.csv')

    user_sleep_df = user_sleep_df.merge(stress_df)
    sleep_list.append(user_sleep_df)
    
sleep_df = pd.concat(sleep_list)
sleep_df.head()

Unnamed: 0.1,Unnamed: 0,In Bed Date,In Bed Time,Out Bed Date,Out Bed Time,Onset Date,Onset Time,Latency,Efficiency,Total Minutes in Bed,...,panas_pos_10,panas_pos_14,panas_pos_18,panas_pos_22,panas_pos_9+1,panas_neg_10,panas_neg_14,panas_neg_18,panas_neg_22,panas_neg_9+1
0,0,2,00:46,2,03:31,2,00:46,0,87.27,165,...,21.0,17.0,12.0,18.0,17.0,11.0,13.0,13.0,10.0,10.0
0,0,2,00:50,2,06:22,2,00:54,4,73.49,332,...,37.0,32.0,24.0,27.0,33.0,11.0,10.0,16.0,17.0,18.0
0,0,1,22:29,1,05:52,1,22:32,3,79.23,443,...,35.0,34.0,31.0,28.0,35.0,11.0,12.0,11.0,12.0,11.0
0,0,2,00:57,2,07:10,2,01:01,4,85.52,373,...,30.0,27.0,22.0,19.0,26.0,11.0,13.0,15.0,14.0,14.0
0,0,1,23:56,1,06:42,1,23:56,0,85.71,406,...,30.0,25.0,31.0,27.0,31.0,26.0,17.0,17.0,15.0,16.0


In [4]:
# Sleeping less causes stress
sleep_df.columns

Index(['Unnamed: 0', 'In Bed Date', 'In Bed Time', 'Out Bed Date',
       'Out Bed Time', 'Onset Date', 'Onset Time', 'Latency', 'Efficiency',
       'Total Minutes in Bed', 'Total Sleep Time (TST)',
       'Wake After Sleep Onset (WASO)', 'Number of Awakenings',
       'Average Awakening Length', 'Movement Index', 'Fragmentation Index',
       'Sleep Fragmentation Index', 'MEQ', 'STAI1', 'STAI2', 'Pittsburgh',
       'Daily_stress', 'BISBAS_bis', 'BISBAS_reward', 'BISBAS_drive',
       'BISBAS_fun', 'panas_pos_10', 'panas_pos_14', 'panas_pos_18',
       'panas_pos_22', 'panas_pos_9+1', 'panas_neg_10', 'panas_neg_14',
       'panas_neg_18', 'panas_neg_22', 'panas_neg_9+1'],
      dtype='object')

In [5]:
# Scatter plot showing the quantity of sleep and stress level
plt = px.scatter(sleep_df, x='Total Sleep Time (TST)', y='Number of Awakenings', 
        color='Daily_stress', title='Total Sleep time and Number of Awakenings to Daily Stress the next Day')
plt.show()


In [33]:
# Sleeping less causes stress
g1 = sleep_df[(sleep_df['Daily_stress'] >= 10) & (sleep_df['Daily_stress'] < 30)]['Total Sleep Time (TST)']
g2 = sleep_df[(sleep_df['Daily_stress'] > 30) & (sleep_df['Daily_stress'] < 50)]['Total Sleep Time (TST)']
g3 = sleep_df[(sleep_df['Daily_stress'] > 50) & (sleep_df['Daily_stress'] <= 70)]['Total Sleep Time (TST)']

ranges = ["All", "g1 (10 - 30)", "g2 (30 - 50)", "g3 (40 - 70)"]
all = [sts.f_oneway(g1,g2,g3), 
    sts.f_oneway(g2,g3),
    sts.f_oneway(g1,g3),
    sts.f_oneway(g1,g2)]

for i in range(len(ranges)):
    print(f"Daily Stress {ranges[i]}\t {all[i]}")
    

Daily Stress All	 F_onewayResult(statistic=1.3718166383701191, pvalue=0.2819224314905824)
Daily Stress g1 (10 - 30)	 F_onewayResult(statistic=0.2117647058823529, pvalue=0.6576369864791527)
Daily Stress g2 (30 - 50)	 F_onewayResult(statistic=1.4, pvalue=0.2706900128603841)
Daily Stress g3 (40 - 70)	 F_onewayResult(statistic=2.064516129032258, pvalue=0.17002963876130742)


In [34]:
tukey = pairwise_tukeyhsd(endog=sleep_df["Daily_stress"],
                          groups=sleep_df["Total Sleep Time (TST)"],
                          alpha=0.05)
print(tukey)

  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
 144.0  236.0     12.0 0.9888 -535.4763 559.4763  False
 144.0  244.0      3.0    1.0 -544.4763 550.4763  False
 144.0  247.0     -2.5    1.0 -476.6284 471.6284  False
 144.0  273.0      7.0 0.9998 -540.4763 554.4763  False
 144.0  302.0      9.0 0.9985 -538.4763 556.4763  False
 144.0  304.0     18.0 0.9272 -529.4763 565.4763  False
 144.0  306.0     -3.0    1.0 -550.4763 544.4763  False
 144.0  319.0    -13.0 0.9825 -560.4763 534.4763  False
 144.0  333.0     18.0 0.9272 -529.4763 565.4763  False
 144.0  336.0     51.0 0.4955 -496.4763 598.4763  False
 144.0  339.0      9.0 0.9985 -538.4763 556.4763  False
 144.0  340.0      8.0 0.9994 -539.4763 555.4763  False
 144.0  342.0     46.0 0.5398 -501.4763 593.4763  False
 144.0  343.0     25.0 0.8157 -522.4763 572.4763  False
 144.0  348.0     18.0 0.9272 -529.4763 565.4763

In [35]:
# Sleeping less causes stress
g1 = sleep_df[(sleep_df['Daily_stress'] >= 10) & (sleep_df['Daily_stress'] < 30)]['Number of Awakenings']
g2 = sleep_df[(sleep_df['Daily_stress'] > 30) & (sleep_df['Daily_stress'] < 50)]['Number of Awakenings']
g3 = sleep_df[(sleep_df['Daily_stress'] > 50) & (sleep_df['Daily_stress'] <= 70)]['Number of Awakenings']

ranges = ["All", "g1 (10 - 30)", "g2 (30 - 50)", "g3 (40 - 70)"]
all = [sts.f_oneway(g1,g2,g3), 
    sts.f_oneway(g2,g3),
    sts.f_oneway(g1,g3),
    sts.f_oneway(g1,g2)]

for i in range(len(ranges)):
    print(f"Daily Stress {ranges[i]}\t {all[i]}")
    

Daily Stress All	 F_onewayResult(statistic=0.9404379810833072, pvalue=0.4110073013402771)
Daily Stress g1 (10 - 30)	 F_onewayResult(statistic=0.5601267828843106, pvalue=0.4756339637829176)
Daily Stress g2 (30 - 50)	 F_onewayResult(statistic=5.671506352087115, pvalue=0.04444203198645557)
Daily Stress g3 (40 - 70)	 F_onewayResult(statistic=0.5186184565569347, pvalue=0.48182053346742504)


In [36]:
tukey = pairwise_tukeyhsd(endog=sleep_df["Daily_stress"],
                          groups=sleep_df["Number of Awakenings"],
                          alpha=0.05)
print(tukey)

  Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj    lower    upper   reject
-------------------------------------------------------
   4.0    9.0    -10.5 0.9992  -72.7442  51.7442  False
   4.0   12.0     -3.0    1.0  -65.2442  59.2442  False
   4.0   13.0    -24.0 0.8808  -95.8735  47.8735  False
   4.0   15.0     -3.0    1.0  -74.8735  68.8735  False
   4.0   16.0    -27.0 0.8002  -98.8735  44.8735  False
   4.0   18.0    -10.0 0.9995  -72.2442  52.2442  False
   4.0   19.0    -11.5 0.9981  -73.7442  50.7442  False
   4.0   20.0     -7.5    1.0  -69.7442  54.7442  False
   4.0   21.0      3.0    1.0  -59.2442  65.2442  False
   4.0   27.0     36.0 0.5182  -35.8735 107.8735  False
   4.0   28.0    -28.0 0.7702  -99.8735  43.8735  False
   4.0   31.0     31.0 0.6753  -40.8735 102.8735  False
   4.0   39.0     -6.0    1.0  -77.8735  65.8735  False
   4.0   44.0     -7.0    1.0  -78.8735  64.8735  False
   9.0   12.0      7.5 0.9998  -43.3222  58.3222

In [10]:
# Correlation between Daily_stress and Number of Awakenings
sleep_df['Daily_stress'] = sleep_df['Daily_stress'].astype(float)
sleep_df['Number of Awakenings'] = sleep_df['Number of Awakenings'].astype(float)

sleep_df['Daily_stress'].corr(sleep_df['Number of Awakenings'])

0.2201387169658961

In [11]:
# Correlation between Daily_stress and Total amount of Sleep (TST) in minutes
sleep_df['Daily_stress'] = sleep_df['Daily_stress'].astype(float)
sleep_df['Total Sleep Time (TST)'] = sleep_df['Total Sleep Time (TST)'].astype(float)

sleep_df['Daily_stress'].corr(sleep_df['Total Sleep Time (TST)'])

0.21504706810119653

In [12]:
# Correlation between Daily_stress and Total amount of Sleep (TST) in minutes
sleep_df.corr()

Unnamed: 0.1,Unnamed: 0,Total Sleep Time (TST),Number of Awakenings,MEQ,STAI1,STAI2,Pittsburgh,Daily_stress,BISBAS_bis,BISBAS_reward,...,panas_pos_10,panas_pos_14,panas_pos_18,panas_pos_22,panas_pos_9+1,panas_neg_10,panas_neg_14,panas_neg_18,panas_neg_22,panas_neg_9+1
Unnamed: 0,,,,,,,,,,,...,,,,,,,,,,
Total Sleep Time (TST),,1.0,-0.02663,-0.074965,-0.037475,-0.115043,0.348963,0.215047,-0.23194,-0.211379,...,0.198397,0.269777,0.457295,0.202741,0.13052,0.12222,-0.066929,-0.169659,0.10761,0.277638
Number of Awakenings,,-0.02663,1.0,0.009703,-0.325892,-0.285898,-0.2025,0.220139,0.068283,-0.062718,...,-0.145688,-0.209193,-0.191308,-0.050105,-0.172423,-0.118478,-0.317334,-0.067639,-0.231971,0.17393
MEQ,,-0.074965,0.009703,1.0,0.064761,0.016279,-0.209774,0.126979,0.338104,-0.119249,...,0.437591,0.113905,0.182841,0.037962,0.198356,-0.1117,-0.328746,-0.298023,-0.484721,-0.336541
STAI1,,-0.037475,-0.325892,0.064761,1.0,0.320353,-0.038403,0.01766,0.296743,0.22585,...,-0.21054,-0.232667,0.150528,-0.058234,-0.172416,0.556821,0.503186,0.272083,-0.021982,-0.192108
STAI2,,-0.115043,-0.285898,0.016279,0.320353,1.0,-0.208806,-0.415194,0.384374,0.121036,...,0.029984,0.226447,0.348308,0.327436,0.134947,0.366462,0.174431,0.085288,0.041399,0.0172
Pittsburgh,,0.348963,-0.2025,-0.209774,-0.038403,-0.208806,1.0,0.10953,-0.223407,0.148965,...,0.279767,0.441286,0.268345,0.479459,0.11187,0.032929,-0.109893,0.043959,0.027883,0.571732
Daily_stress,,0.215047,0.220139,0.126979,0.01766,-0.415194,0.10953,1.0,0.090844,0.105119,...,0.055891,-0.178304,0.048792,-0.184617,-0.354012,0.064892,-0.198414,-0.114323,-0.082023,0.00906
BISBAS_bis,,-0.23194,0.068283,0.338104,0.296743,0.384374,-0.223407,0.090844,1.0,0.231605,...,0.190807,-0.067705,0.102082,0.07358,0.211445,0.225481,-0.033812,0.273934,-0.223298,0.138724
BISBAS_reward,,-0.211379,-0.062718,-0.119249,0.22585,0.121036,0.148965,0.105119,0.231605,1.0,...,-0.077196,0.294002,0.350398,0.336434,0.197735,0.367624,0.214155,0.161039,-0.050582,0.110942


In [31]:
# Sleeping less causes stress
g1 = sleep_df[(sleep_df['Daily_stress'] >= 10) & (sleep_df['Daily_stress'] < 30)]['Pittsburgh']
g2 = sleep_df[(sleep_df['Daily_stress'] > 30) & (sleep_df['Daily_stress'] < 50)]['Pittsburgh']
g3 = sleep_df[(sleep_df['Daily_stress'] > 50) & (sleep_df['Daily_stress'] <= 70)]['Pittsburgh']
ranges = ["All", "g1 (10 - 30)", "g2 (30 - 50)", "g3 (40 - 70)"]

all = [sts.f_oneway(g1,g2,g3), 
    sts.f_oneway(g2,g3),
    sts.f_oneway(g1,g3),
    sts.f_oneway(g1,g2)]

for i in range(len(ranges)):
    print(f"Daily Stress {ranges[i]}\t {all[i]}")
    

Daily Stress All	 F_onewayResult(statistic=1.3718166383701191, pvalue=0.2819224314905824)
Daily Stress g1 (10 - 30)	 F_onewayResult(statistic=0.2117647058823529, pvalue=0.6576369864791527)
Daily Stress g2 (30 - 50)	 F_onewayResult(statistic=1.4, pvalue=0.2706900128603841)
Daily Stress g3 (40 - 70)	 F_onewayResult(statistic=2.064516129032258, pvalue=0.17002963876130742)


In [50]:
# tukey = pairwise_tukeyhsd(endog=copy1["Pittsburgh"],
#                           groups=copy1['Daily_stress'],
#                           alpha=0.05)
# print(tukey)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
px.scatter(sleep_df, 'Pittsburgh', )