In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

path = '../Datasets/cleaned-dataset.csv'
df = pd.read_csv(path)

In [2]:
df.head(3)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP High,BP Low
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,No Disorder,126,83
1,2,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80
2,3,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80


### How is the relation between variables and stress level

In [3]:
fig = px.scatter(df, x="Age", y="Stress Level", color="Gender")
fig.update_layout(
    title="Age vs Stress Level",
    xaxis_title="Age",
    yaxis_title="Stress Level",
    font=dict(
        size=12
    )
)
fig.show()

✨ There is no relation between age and stress level

❌ No Relation

In [4]:
fig = px.scatter(df, x='Sleep Duration', y='Stress Level', color='Gender')
fig.update_layout(
    title='Sleep Duration vs Stress Level',
    xaxis_title='Sleep Duration',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    )
)
fig.show()

✨ Get much sleep is make lower the stress level 

❎ There is a relation

In [5]:
fig = px.scatter(df, x='Quality of Sleep', y='Stress Level', color='Gender', marginal_y='box', marginal_x='box')
fig.update_layout(
    title='Quality of Sleep vs Stress Level',
    xaxis_title='Quality of Sleep',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    )
)
fig.show()

✨ Quality of the sleep can effect the stress level    
✨ Womans has better quality of sleep than mans  
✨ Womans's stress level is lower than mans  

❎ There is a relation

In [6]:
fig = px.scatter(df, x='Physical Activity Level', y='Stress Level', color='Gender', marginal_x='box')
fig.update_layout(
    title='Activity vs Stress Level',
    xaxis_title='Activity Level',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    )
)
fig.show()

✨ Seems like more activity level is make better stress level  
✨ Genders have equal activity level 

❎ There is a relation

In [7]:
fig = px.scatter(df, x='Stress Level', y=['BP High', 'BP Low'], size='Heart Rate')
fig.update_layout(
    title='Stress Level vs Blood Pressure',
    xaxis_title='Stress Level',
    yaxis_title='Blood Pressure',
    font = dict(
        size=12
    )
)
fig.show()

✨ I expected much higher blood pressure while stress level is getting higher but it didn't

❌ No relation

In [8]:
fig = px.scatter(df, x='Stress Level', y='Heart Rate', color='Gender', marginal_y='box')
fig.update_layout(
    title='Stress Level vs Heart Rate',
    xaxis_title='Stress Level',
    yaxis_title='Heart Rate',
    font = dict(
        size=12
    )
)
fig.show()

✨ As I expected high heart rate is make higher to stress level  
✨ Also womans's have lower heart rate mean than mans. 

❎ There is a relation

In [9]:
df.head(3)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP High,BP Low
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,No Disorder,126,83
1,2,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80
2,3,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80


<h3 align="center">Demographic Analysis<h3>

In [10]:
gender = df.groupby('Gender').size().reset_index(name='Count')

fig = px.pie(gender, values='Count', names='Gender',
            hole=.5,
            color_discrete_sequence=px.colors.sequential.RdBu
)
fig.update_layout(
    title='Gender Distribution',
    font = dict(
        size=12
    ),
    width=500,
    title_x=.5
)

# show label name and percentage as a name
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [11]:
fig = px.histogram(df, x='Age', nbins=20, color_discrete_sequence=px.colors.sequential.RdBu, color='Gender')
fig.update_traces(opacity=0.7)
fig.update_layout(
    title='Age Distribution',
    xaxis_title='Age',
    yaxis_title=None,
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [12]:
fig = px.histogram(df, x='Stress Level', color='Gender', color_discrete_sequence=px.colors.sequential.RdBu, nbins=10)
fig.update_traces(opacity=.7)
fig.update_layout(
    title='Stress Level Distribution',
    xaxis_title='Stress Level',
    yaxis_title=None,
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [13]:
occupation_stress_level = df.groupby('Occupation')['Stress Level'].mean().reset_index(name='Stress Level').sort_values(by='Stress Level', ascending=False)

fig = px.bar(occupation_stress_level, x='Occupation', y='Stress Level', color='Occupation', color_discrete_sequence=px.colors.sequential.RdBu)
fig.update_layout(
    title='Occupation vs Stress Level',
    xaxis_title='Occupation',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    ),
    width=900,
    showlegend=False
)
fig.show()

In [14]:
gender_sleep_disorder = df.groupby(['Gender', 'Sleep Disorder']).size().reset_index(name='Count')

fig = px.treemap(
    gender_sleep_disorder,
    path = ['Gender', 'Sleep Disorder'],
    values = 'Count',
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig.update_traces(
    marker=dict(cornerradius=50),
    textposition='middle center',
    textfont_size=13,
    textinfo = "label+value+percent parent+percent entry",
)
fig.update_layout(
    title='Sleep Disorder Distribution',
    font = dict(
        size=15
    ),
    width=1000,
    title_x=0.5,
    title_y=0.92
)
fig.show()

In [15]:
gender_bmi = df.groupby(['Gender', 'BMI Category']).size().reset_index(name='Count')

fig = px.treemap(
    gender_bmi,
    path = ['Gender', 'BMI Category'],
    values = 'Count',
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig.update_traces(
    marker=dict(cornerradius=50),
    textposition='middle center',
    textfont_size=13,
    textinfo = "label+value+percent parent+percent entry",
)
fig.update_layout(
    title='BMI Category Distribution',
    font = dict(
        size=15
    ),
    width=1000,
    title_x=0.5,
    title_y=0.92
)
fig.show()

<h3 align="center">Sleep and Life Style Analysis<h3>

In [16]:
df.head(3)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP High,BP Low
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,No Disorder,126,83
1,2,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80
2,3,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,No Disorder,125,80


In [17]:
fig = px.histogram(df, x='Sleep Duration', nbins=20, marginal='box')
fig.update_layout(
    title='Sleep Duration Distribution',
    xaxis_title='Sleep Duration',
    yaxis_title=None,
    font = dict(
        size=12
    ),
    width=900
)
fig.update_traces(opacity=.7)
fig.show()

In [18]:
fig = px.histogram(df, x='Quality of Sleep', nbins=10, marginal='box')
fig.update_layout(
    title='Quality of Sleep Distribution out of 10',
    xaxis_title='Quality of Sleep',
    yaxis_title=None,
    font = dict(
        size=12
    ),
    width=900
)
fig.update_traces(opacity=.7)
fig.show()

In [19]:
fig = px.scatter(
    df,
    x='Sleep Duration',
    y='Quality of Sleep',
    size='Physical Activity Level',
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig.update_layout(
    title='Sleep Duration vs Quality of Sleep Sized by Physical Activity Level',
    xaxis_title='Sleep Duration',
    yaxis_title='Quality of Sleep',
    font = dict(
        size=12
    ),
    width=900
)

fig.update_layout(
    legend_title='Activity Level',
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)
fig.show()

In [20]:
fig = px.scatter(
    df,
    x='Physical Activity Level',
    y='Quality of Sleep',
    color='Sleep Duration'
)
fig.update_layout(
    title='Physical Activity Level vs Quality of Sleep',
    xaxis_title='Physical Activity Level',
    yaxis_title='Quality of Sleep',
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [21]:
fig = px.scatter(
    df,
    x='Quality of Sleep',
    y='Stress Level',
    color='Daily Steps'
)
fig.update_layout(
    title='Quality of Sleep vs Stress Level',
    xaxis_title='Quality of Sleep',
    yaxis_title='Stress Level',
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [22]:
lifestyle_corr = df[['Daily Steps', 'Quality of Sleep', 'Sleep Duration', 'Stress Level']].corr()

fig = px.imshow(lifestyle_corr.round(3), text_auto=True)
fig.update_layout(
    title='Correlation Between Lifestyle Factors',
    font = dict(
        size=12
    ),
    width=600
)

fig.show()

<h3 align="center">Health and Sleep Disordering<h3>

In [29]:
fig = px.scatter(
    df,
    x='Heart Rate',
    y=['BP High', 'BP Low'],
    size='Stress Level',
    color_continuous_scale=px.colors.sequential.RdBu
)

fig.update_layout(
    title='Heart Rate vs Blood Pressure Sized by Stress Level',
    xaxis_title='Heart Rate',
    yaxis_title='Blood Pressure',
    font = dict(
        size=12
    ),
    width=900,
    legend_title='Blood Pressure'
)
fig.show()

In [42]:
sleep_do_bp = df.groupby('Sleep Disorder')[['BP High', 'BP Low']].mean().reset_index();sleep_do_bp
df_bp = pd.melt(sleep_do_bp, id_vars='Sleep Disorder', var_name='Blood Pressure Type', value_name='Pressure Value');df_bp

Unnamed: 0,Sleep Disorder,Blood Pressure Type,Pressure Value
0,Insomnia,BP High,132.038961
1,No Disorder,BP High,124.045662
2,Sleep Apnea,BP High,137.769231
3,Insomnia,BP Low,86.857143
4,No Disorder,BP Low,81.0
5,Sleep Apnea,BP Low,92.717949


In [43]:
fig = px.bar(
    df_bp,
    x='Sleep Disorder',
    y='Pressure Value',
    color='Blood Pressure Type',
    barmode='group',
    color_discrete_sequence=px.colors.sequential.RdBu
)

fig.update_layout(
    title='Sleep Disorder vs Blood Pressure',
    xaxis_title='Sleep Disorder',
    yaxis_title='Blood Pressure',
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [48]:
fig = px.box(
    df,
    x='BP High',
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig.update_layout(
    title='Distribution of High Blood Pressure',
    yaxis_title='Blood Pressure',
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [47]:
fig = px.box(
    df,
    x='BP Low',
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig.update_layout(
    title='Distribution of Low Blood Pressure',
    yaxis_title='Blood Pressure',
    font = dict(
        size=12
    ),
    width=900
)
fig.show()

In [51]:
bmi_sleep_do = df.groupby(['BMI Category', 'Sleep Disorder']).size().reset_index(name='Count');bmi_sleep_do

Unnamed: 0,BMI Category,Sleep Disorder,Count
0,Normal,Insomnia,7
1,Normal,No Disorder,183
2,Normal,Sleep Apnea,5
3,Normal Weight,Insomnia,2
4,Normal Weight,No Disorder,17
5,Normal Weight,Sleep Apnea,2
6,Obese,Insomnia,4
7,Obese,Sleep Apnea,6
8,Overweight,Insomnia,64
9,Overweight,No Disorder,19


In [52]:
fig = px.treemap(
    bmi_sleep_do,
    path = ['BMI Category', 'Sleep Disorder'],
    values = 'Count',
    color_discrete_sequence=px.colors.sequential.RdBu
)

fig.update_traces(
    marker=dict(cornerradius=50),
    textposition='middle center',
    textfont_size=13,
    textinfo = "label+value+percent parent+percent entry",
)
fig.update_layout(
    title='BMI Category vs Sleep Disorder',
    font = dict(
        size=15
    ),
    width=1000,
    title_x=0.5,
    title_y=0.92
)
fig.show()

In [54]:
bmi_bpressure = df.groupby('BMI Category')[['BP High', 'BP Low']].mean().reset_index();bmi_bpressure
bmi_df = pd.melt(bmi_bpressure, id_vars='BMI Category', var_name='Blood Pressure Type', value_name='Pressure Value');bmi_df

Unnamed: 0,BMI Category,Blood Pressure Type,Pressure Value
0,Normal,BP High,123.820513
1,Normal Weight,BP High,121.619048
2,Obese,BP High,139.2
3,Overweight,BP High,135.054054
4,Normal,BP Low,80.666667
5,Normal Weight,BP Low,80.0
6,Obese,BP Low,90.2
7,Overweight,BP Low,90.182432


In [56]:
fig = px.bar(
    bmi_df,
    x='BMI Category',
    y='Pressure Value',
    color='Blood Pressure Type',
    barmode='group',
    color_discrete_sequence=px.colors.sequential.RdBu
)

fig.update_layout(
    title='BMI Category vs Blood Pressure',
    xaxis_title='BMI Category',
    yaxis_title='Blood Pressure',
    legend_title='Blood Pressure',
    font = dict(
        size=12
    ),
    width=900
)
fig.show()