In [1]:
# Import the necessary dependencies.
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.stats as sts
import plotly.express as plt
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

## Negative Emotions before Sleep Hypothesis

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Notes: Pittsburgh score represents quality of sleep. Pittsburgh score below 6 -> better sleep. The lower the better quality of sleep.

Question: Do negative emotions influence the quality of our sleep? 

Hypothesis: If you have negative emotions the night before going to sleep, your sleep quality will decrease (Pittsburgh score > 6)

Null Hypothesis: Having negative emotions before going to bed will result in no decrease in quality of sleep.

Alternative Hypothesis: Having negative emotion score of above 14 will result in a decrease in quality of sleep.

Significance Level:  Our p value for negative emotions are above 0.05, so accept the null for negative emotions

Other notes:

Negative emotions do not have a significant impact on quality of sleep based on the linear regression. 

## Positive Emotions before Sleep Hypothesis

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Notes: Pittsburgh score represents quality of sleep. Pittsburgh score below 6 -> better sleep. The lower the better quality of sleep.

Question: Does positive emotions influence the quality of our sleep? 

Hypothesis: If you have positive emotions the night before going to sleep, your sleep quality will decrease (Pittsburgh score > 6)

Null Hypothesis: Having positive emotions before going to bed will result in no decrease in quality of sleep.

Alternative Hypothesis: Having positive emotion score of above 14 will result in a decrease in quality of sleep.

Significance Level:  Our p value for positive emotions are below 0.05, so we reject the null for positive emotions

FINDINGS:

Positive emotions have a significant impact on quality of sleep based on the linear regression. 

## Negative Emotions after Sleep Hypothesis

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Notes: Pittsburgh score represents quality of sleep. Pittsburgh score below 6 -> better sleep. The lower the better quality of sleep.

Question: Does sleep quality affect negative emotions the morning after?

Hypothesis: If your sleep quality is bad (Pittsburgh score > 6), then you will have more negative emotions (pas_neg_9+1 > 14)

Null Hypothesis: Bad sleep quality (Pittsburgh score > 6) will not affect your negative emotions.

Alternative Hypothesis: Good sleep quality (Pittsburgh score > 6) will result in more negative emotions (pas_neg_9+1 > 14)

Significance Level:  Based on the linear regression model, our p value for sleep quality is below 0.05, so we reject the null for sleep quality and negative emotions the morning after. 

## Positive Emotions after Sleep Hypothesis

**Create a null hypothesis, an alternative hypothesis, and choose a significance level. Use this cell to document your decisions.**

Notes: Pittsburgh score represents quality of sleep. Pittsburgh score below 6 -> better sleep. The lower the better quality of sleep.

Question: Does sleep quality affect positive emotions the morning after?

Hypothesis: If your sleep quality is bad (Pittsburgh score > 6), then you will have more positive emotions (pas_neg_9+1 > 14)

Null Hypothesis: Bad sleep quality (Pittsburgh score > 6) will not affect your positive emotions.

Alternative Hypothesis: Good sleep quality (Pittsburgh score > 6) will result in more positive emotions (pas_neg_9+1 > 14)

Significance Level:  Based on the linear regression model, our p value for sleep quality is above 0.05, so we accept the null for sleep quality and positive emotions the morning after. 

In [2]:
# Get actvity DF for all users
sleep_list = []

for num in range(1,23):
    user_sleep_df = pd.read_csv(f'DataPaper/user_{num}/sleep.csv')
    stress_df = pd.read_csv(f'DataPaper/user_{num}/questionnaire.csv')

    user_sleep_df = user_sleep_df.merge(stress_df)
    sleep_list.append(user_sleep_df)
    
sleep_df = pd.concat(sleep_list)
sleep_df.head()

Unnamed: 0.1,Unnamed: 0,In Bed Date,In Bed Time,Out Bed Date,Out Bed Time,Onset Date,Onset Time,Latency,Efficiency,Total Minutes in Bed,...,panas_pos_10,panas_pos_14,panas_pos_18,panas_pos_22,panas_pos_9+1,panas_neg_10,panas_neg_14,panas_neg_18,panas_neg_22,panas_neg_9+1
0,0,2,00:46,2,03:31,2,00:46,0,87.27,165,...,21.0,17.0,12.0,18.0,17.0,11.0,13.0,13.0,10.0,10.0
0,0,2,00:50,2,06:22,2,00:54,4,73.49,332,...,37.0,32.0,24.0,27.0,33.0,11.0,10.0,16.0,17.0,18.0
0,0,1,22:29,1,05:52,1,22:32,3,79.23,443,...,35.0,34.0,31.0,28.0,35.0,11.0,12.0,11.0,12.0,11.0
0,0,2,00:57,2,07:10,2,01:01,4,85.52,373,...,30.0,27.0,22.0,19.0,26.0,11.0,13.0,15.0,14.0,14.0
0,0,1,23:56,1,06:42,1,23:56,0,85.71,406,...,30.0,25.0,31.0,27.0,31.0,26.0,17.0,17.0,15.0,16.0


In [3]:
# Sleeping less causes stress
sleep_df.columns

Index(['Unnamed: 0', 'In Bed Date', 'In Bed Time', 'Out Bed Date',
       'Out Bed Time', 'Onset Date', 'Onset Time', 'Latency', 'Efficiency',
       'Total Minutes in Bed', 'Total Sleep Time (TST)',
       'Wake After Sleep Onset (WASO)', 'Number of Awakenings',
       'Average Awakening Length', 'Movement Index', 'Fragmentation Index',
       'Sleep Fragmentation Index', 'MEQ', 'STAI1', 'STAI2', 'Pittsburgh',
       'Daily_stress', 'BISBAS_bis', 'BISBAS_reward', 'BISBAS_drive',
       'BISBAS_fun', 'panas_pos_10', 'panas_pos_14', 'panas_pos_18',
       'panas_pos_22', 'panas_pos_9+1', 'panas_neg_10', 'panas_neg_14',
       'panas_neg_18', 'panas_neg_22', 'panas_neg_9+1'],
      dtype='object')

In [4]:
# Correlation between Daily_stress and Total amount of Sleep (TST) in minutes
pd.set_option('display.max_columns', None)
sleep_df.corr()

Unnamed: 0.1,Unnamed: 0,MEQ,STAI1,STAI2,Pittsburgh,Daily_stress,BISBAS_bis,BISBAS_reward,BISBAS_drive,BISBAS_fun,panas_pos_10,panas_pos_14,panas_pos_18,panas_pos_22,panas_pos_9+1,panas_neg_10,panas_neg_14,panas_neg_18,panas_neg_22,panas_neg_9+1
Unnamed: 0,,,,,,,,,,,,,,,,,,,,
MEQ,,1.0,0.064761,0.016279,-0.209774,0.126979,0.338104,-0.119249,-0.242214,-0.22522,0.437591,0.113905,0.182841,0.037962,0.198356,-0.1117,-0.328746,-0.298023,-0.484721,-0.336541
STAI1,,0.064761,1.0,0.320353,-0.038403,0.01766,0.296743,0.22585,0.262892,0.287548,-0.21054,-0.232667,0.150528,-0.058234,-0.172416,0.556821,0.503186,0.272083,-0.021982,-0.192108
STAI2,,0.016279,0.320353,1.0,-0.208806,-0.415194,0.384374,0.121036,0.147198,0.53986,0.029984,0.226447,0.348308,0.327436,0.134947,0.366462,0.174431,0.085288,0.041399,0.0172
Pittsburgh,,-0.209774,-0.038403,-0.208806,1.0,0.10953,-0.223407,0.148965,0.149766,0.061377,0.279767,0.441286,0.268345,0.479459,0.11187,0.032929,-0.109893,0.043959,0.027883,0.571732
Daily_stress,,0.126979,0.01766,-0.415194,0.10953,1.0,0.090844,0.105119,-0.203145,-0.184862,0.055891,-0.178304,0.048792,-0.184617,-0.354012,0.064892,-0.198414,-0.114323,-0.082023,0.00906
BISBAS_bis,,0.338104,0.296743,0.384374,-0.223407,0.090844,1.0,0.231605,-0.290883,0.044658,0.190807,-0.067705,0.102082,0.07358,0.211445,0.225481,-0.033812,0.273934,-0.223298,0.138724
BISBAS_reward,,-0.119249,0.22585,0.121036,0.148965,0.105119,0.231605,1.0,0.215584,0.383075,-0.077196,0.294002,0.350398,0.336434,0.197735,0.367624,0.214155,0.161039,-0.050582,0.110942
BISBAS_drive,,-0.242214,0.262892,0.147198,0.149766,-0.203145,-0.290883,0.215584,1.0,0.544991,-0.05994,0.212834,0.220702,0.11781,-0.101991,-0.118828,0.123986,-0.068761,0.078696,-0.112028
BISBAS_fun,,-0.22522,0.287548,0.53986,0.061377,-0.184862,0.044658,0.383075,0.544991,1.0,-0.226014,0.112428,0.188805,0.113224,-0.192862,0.179195,0.216289,0.315479,0.041451,0.056343


In [28]:
# Scatter plot showing the quantity of sleep and stress level
plt = px.scatter(sleep_df, 
        x='panas_neg_22', y='Pittsburgh', 
        color='panas_neg_9+1', title='Sleep quality and Negative emotions at 10pm')
plt.show()


In [29]:
# Scatter plot showing the quantity of sleep and stress level
plt = px.scatter(sleep_df, 
        x='panas_pos_22', y='Pittsburgh',
        color='panas_pos_9+1', title='Sleep quality and Positive emotions at 10pm')
plt.show()


In [7]:
# Sleeping less causes stress
g1 = sleep_df[(sleep_df['panas_neg_22'] >= 10) & (sleep_df['panas_neg_22'] < 15)]['Pittsburgh']
g2 = sleep_df[(sleep_df['panas_neg_22'] > 15) & (sleep_df['panas_neg_22'] < 20)]['Pittsburgh']
g3 = sleep_df[(sleep_df['panas_neg_22'] > 20) & (sleep_df['panas_neg_22'] <= 25)]['Pittsburgh']
g4 = sleep_df[(sleep_df['panas_neg_22'] >= 25)]['Pittsburgh']


ranges = ["All", "g1 (10 - 15)", "g2 (15 - 20)", "g3 (20 - 25)", "g4 (25+)"]
all = [sts.f_oneway(g1,g2,g3,g4), 
    sts.f_oneway(g2,g3,g4),
    sts.f_oneway(g1,g3,g4),
    sts.f_oneway(g1,g2,g4),
    sts.f_oneway(g1,g2,g3)]

for i in range(len(ranges)):
    print(f"Daily Stress {ranges[i]}\t {all[i]}")
    

Daily Stress All	 F_onewayResult(statistic=1.5650793650793644, pvalue=0.25335817571891367)
Daily Stress g1 (10 - 15)	 F_onewayResult(statistic=1.35, pvalue=0.425531914893617)
Daily Stress g2 (15 - 20)	 F_onewayResult(statistic=0.06147540983606558, pvalue=0.9407674680846388)
Daily Stress g3 (20 - 25)	 F_onewayResult(statistic=2.2142857142857144, pvalue=0.1555493975016589)
Daily Stress g4 (25+)	 F_onewayResult(statistic=2.2142857142857144, pvalue=0.1555493975016589)


In [8]:
tukey = pairwise_tukeyhsd(endog=sleep_df["panas_neg_22"],
                          groups=sleep_df["Pittsburgh"],
                          alpha=0.05)
print(tukey)

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
   2.0    3.0     -2.0 0.9997 -19.8713 15.8713  False
   2.0    4.0   4.3333 0.9593 -11.4277 20.0944  False
   2.0    5.0   1.8333 0.9996 -13.9277 17.5944  False
   2.0    7.0      2.5 0.9988 -15.3713 20.3713  False
   2.0    8.0      0.5    1.0 -17.3713 18.3713  False
   2.0    9.0      2.5 0.9988 -15.3713 20.3713  False
   3.0    4.0   6.3333 0.5595  -5.5809 18.2476  False
   3.0    5.0   3.8333 0.9184  -8.0809 15.7476  False
   3.0    7.0      4.5 0.9319 -10.0919 19.0919  False
   3.0    8.0      2.5 0.9963 -12.0919 17.0919  False
   3.0    9.0      4.5 0.9319 -10.0919 19.0919  False
   4.0    5.0     -2.5 0.9425 -10.9246  5.9246  False
   4.0    7.0  -1.8333 0.9979 -13.7476 10.0809  False
   4.0    8.0  -3.8333 0.9184 -15.7476  8.0809  False
   4.0    9.0  -1.8333 0.9979 -13.7476 10.0809  False
   5.0    7.0   0.6667    1.

In [9]:
# Sleeping less causes stress
g1 = sleep_df[(sleep_df['panas_pos_22'] >= 10) & (sleep_df['panas_pos_22'] < 15)]['Pittsburgh']
g2 = sleep_df[(sleep_df['panas_pos_22'] > 15) & (sleep_df['panas_pos_22'] < 20)]['Pittsburgh']
g3 = sleep_df[(sleep_df['panas_pos_22'] > 20) & (sleep_df['panas_pos_22'] <= 25)]['Pittsburgh']
g4 = sleep_df[(sleep_df['panas_pos_22'] >= 25)]['Pittsburgh']


ranges = ["All", "g1 (10 - 15)", "g2 (15 - 20)", "g3 (20 - 25)", "g4 (25+)"]
all = [sts.f_oneway(g1,g2,g3,g4), 
    sts.f_oneway(g2,g3,g4),
    sts.f_oneway(g1,g3,g4),
    sts.f_oneway(g1,g2,g4),
    sts.f_oneway(g1,g2,g3)]

for i in range(len(ranges)):
    print(f"Daily Stress {ranges[i]}\t {all[i]}")
    

Daily Stress All	 F_onewayResult(statistic=3.8308333333333344, pvalue=0.03047179834320344)
Daily Stress g1 (10 - 15)	 F_onewayResult(statistic=5.539238433192302, pvalue=0.01689616555388915)
Daily Stress g2 (15 - 20)	 F_onewayResult(statistic=2.227736051502146, pvalue=0.15406615807310292)
Daily Stress g3 (20 - 25)	 F_onewayResult(statistic=20.480769230769212, pvalue=0.0002909292239550761)
Daily Stress g4 (25+)	 F_onewayResult(statistic=0.7766544117647058, pvalue=0.4801538836357352)


In [10]:
tukey = pairwise_tukeyhsd(endog=sleep_df["panas_pos_22"],
                          groups=sleep_df["Pittsburgh"],
                          alpha=0.05)
print(tukey)

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
   2.0    3.0      7.0 0.8083 -10.4469 24.4469  False
   2.0    4.0   2.3333 0.9981 -13.0534   17.72  False
   2.0    5.0   4.6667 0.9367   -10.72 20.0534  False
   2.0    7.0      4.0  0.983 -13.4469 21.4469  False
   2.0    8.0     12.5 0.2497  -4.9469 29.9469  False
   2.0    9.0      8.5  0.648  -8.9469 25.9469  False
   3.0    4.0  -4.6667 0.8083 -16.2979  6.9646  False
   3.0    5.0  -2.3333 0.9914 -13.9646  9.2979  False
   3.0    7.0     -3.0  0.989 -17.2453 11.2453  False
   3.0    8.0      5.5 0.8328  -8.7453 19.7453  False
   3.0    9.0      1.5 0.9998 -12.7453 15.7453  False
   4.0    5.0   2.3333  0.953  -5.8912 10.5579  False
   4.0    7.0   1.6667 0.9986  -9.9646 13.2979  False
   4.0    8.0  10.1667 0.1057  -1.4646 21.7979  False
   4.0    9.0   6.1667 0.5623  -5.4646 17.7979  False
   5.0    7.0  -0.6667    1.

In [16]:
import statsmodels.api as sm
# Select your independent X terms, and your dependent y term. 
X = sleep_df[['panas_neg_22', 'panas_pos_22']]
y = sleep_df['Pittsburgh']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
# Print the linear regression results
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             Pittsburgh   R-squared:                       0.230
Model:                            OLS   Adj. R-squared:                  0.145
Method:                 Least Squares   F-statistic:                     2.691
Date:                Sat, 23 Apr 2022   Prob (F-statistic):             0.0949
Time:                        14:00:22   Log-Likelihood:                -41.069
No. Observations:                  21   AIC:                             88.14
Df Residuals:                      18   BIC:                             91.27
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            1.0981      2.325      0.472   

In [23]:
import statsmodels.api as sm
# Select your independent X terms, and your dependent y term. 
X = sleep_df[['panas_neg_9+1']]
y = sleep_df['Pittsburgh']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
# Print the linear regression results
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             Pittsburgh   R-squared:                       0.327
Model:                            OLS   Adj. R-squared:                  0.291
Method:                 Least Squares   F-statistic:                     9.227
Date:                Sat, 23 Apr 2022   Prob (F-statistic):            0.00677
Time:                        14:08:27   Log-Likelihood:                -39.660
No. Observations:                  21   AIC:                             83.32
Df Residuals:                      19   BIC:                             85.41
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -0.9776      2.079     -0.470

In [24]:
import statsmodels.api as sm
# Select your independent X terms, and your dependent y term. 
X = sleep_df[['panas_pos_9+1']]
y = sleep_df['Pittsburgh']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
# Print the linear regression results
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             Pittsburgh   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                 -0.039
Method:                 Least Squares   F-statistic:                    0.2408
Date:                Sat, 23 Apr 2022   Prob (F-statistic):              0.629
Time:                        14:08:48   Log-Likelihood:                -43.684
No. Observations:                  21   AIC:                             91.37
Df Residuals:                      19   BIC:                             93.46
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const             4.3009      1.961      2.193

In [30]:
# Scatter plot showing the quantity of sleep and stress level
plt = px.scatter(sleep_df, 
        x='Pittsburgh', y='panas_neg_9+1',
        title='Sleep quality and Negative emotions the morning after'
    )
plt.show()


In [31]:
# Scatter plot showing the quantity of sleep and stress level
plt = px.scatter(sleep_df, 
        x='Pittsburgh', y='panas_pos_9+1',
        title='Sleep quality and Positive emotions the morning after')
plt.show()
