In [17]:
# !pip install plotly --upgrade
# !pip install streamlit requests

Read data and import moduls:

In [18]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from plotly.subplots import make_subplots

try:
  path = 'data/Sleep_health_and_lifestyle_dataset.csv' # path to the file
  df = pd.read_csv(path) # read file
  print(df)
except:
  raise RuntimeError("File or path have not found.")



     Person ID  Gender  Age            Occupation  Sleep Duration  \
0            1    Male   27     Software Engineer             6.1   
1            2    Male   28                Doctor             6.2   
2            3    Male   28                Doctor             6.2   
3            4    Male   28  Sales Representative             5.9   
4            5    Male   28  Sales Representative             5.9   
..         ...     ...  ...                   ...             ...   
369        370  Female   59                 Nurse             8.1   
370        371  Female   59                 Nurse             8.0   
371        372  Female   59                 Nurse             8.1   
372        373  Female   59                 Nurse             8.1   
373        374  Female   59                 Nurse             8.1   

     Quality of Sleep  Physical Activity Level  Stress Level BMI Category  \
0                   6                       42             6   Overweight   
1                

Set figures


In [19]:
fig_spray = go.Figure() # 2d histogram
fig_graph = go.Figure() # average sleep duration vs stress level
fig_phyz = go.Figure() # average quality of sleep vs Average stress level vs Average physical activity
fig_duration_vs_quality_vs_phyz = go.Figure() # average quality of sleep vs Average sleep suration vs Average physical activity
fig_chart_occupation_stress = go.Figure() # occupation occures stress level

# group gender data
value_counts_gender = df['Gender'].value_counts()

# set gender statistics graph
fig_gender = go.Figure(data=[
    go.Pie(
        labels=value_counts_gender.index,
        values=value_counts_gender.values,
        hole=0.6,
        marker=dict(colors=['rgb(135, 206, 235)', 'rgb(238, 130, 238)']),
        textinfo='label+percent'
    )
])

# group occupation data
value_counts_occupation = df['Occupation'].value_counts()

# set occupation statistics
fig_occupation = go.Figure(data=[
    go.Pie(
        labels=value_counts_occupation.index,
        values=value_counts_occupation.values,
        hole=0.3,
        textinfo='label+percent'
    )
])




---


Lets draw our gender statistic pie chart


---



In [20]:
# decorate
fig_gender.update_layout(
    title_text='Gender Stats',
    template='plotly_dark',
    showlegend=False
)

# show
fig_gender.show()




---


Then, occupation pie chart


---



In [21]:
# decorate
fig_occupation.update_layout(
    title_text='Occupation Stats',
    template='plotly_dark',
    showlegend=False
)

# show
fig_occupation.show()




---


May be stress level is occured by occupation


---



In [22]:
# group data
average_stress_table = df.groupby('Occupation').agg(
    average_stress_level=('Stress Level', 'mean'),
    count=('Stress Level', 'size')
).reset_index()

# sort
average_stress_table = average_stress_table.sort_values(by='average_stress_level',
                                                        ascending=False
)

# draw the graph
fig_chart_occupation_stress = px.bar(
        average_stress_table,
        x='Occupation',
        y='average_stress_level',
        title='Occupation occurs stress',
        labels={'average_stress_level': 'Average Stress Level', 'Occupation': 'Occupation'},
        text='count',
        hover_data={'count': True}
)

# decorate
fig_chart_occupation_stress.update_layout(xaxis_title="Occupation",
                                          yaxis_title="Average Stress Level",
                                          xaxis_tickangle=-45,
                                          template='plotly_dark'
)

# show the graph
fig_chart_occupation_stress.show()



---


Draw histogram to compare sleep duration and stress level


---



In [23]:
# add sleep duration data
fig_spray.add_trace(go.Histogram2d(
    x = df['Age'],
    y = df['Sleep Duration'],
    colorscale=[
        [0, 'rgba(0, 0, 0, 0)'],
        [0.5, 'rgba(0, 130, 180, 0.5)'],
        [1, 'rgb(0, 130, 180)']
    ],
    nbinsx=60,
    nbinsy=30,
    opacity=0.75,
    colorbar=dict(title='Sleep Freq', x=1.0),
    showscale=True
))

# add stress level data
fig_spray.add_trace(go.Histogram2d(
    x = df['Age'],
    y = df['Stress Level'],
    colorscale=[
        [0, 'rgba(0, 0, 0, 0)'],
        [0.5, 'rgba(205, 92, 92, 0.5)'],
        [1, 'rgb(205, 92, 92)']
    ],
    nbinsx=60,
    nbinsy=30,
    opacity=0.75,
    colorbar=dict(title='Stress Freq', x=1.1),
    showscale=True
))

# decorate
fig_spray.update_layout(
    title='Sleep duration vs Stress Level',
    xaxis_title='Age',
    yaxis_title='Value',
    barmode='overlay',
    template='plotly_dark'
)

# show
fig_spray.show()


Lets look at the main tendense more carefully

Firstly, we need to find the average values at each age and find changes

In [24]:
# group data into two columns by average values: average sleep and average stress
grouped_df = df.groupby('Age').agg(
    average_sleep=('Sleep Duration', 'mean'),
    average_stress=('Stress Level', 'mean')
).reset_index()

# find peaks and downs
sleep_min = grouped_df.loc[grouped_df["average_sleep"].idxmin()]
sleep_max = grouped_df.loc[grouped_df["average_sleep"].idxmax()]

stress_min = grouped_df.loc[grouped_df["average_stress"].idxmin()]
stress_max = grouped_df.loc[grouped_df["average_stress"].idxmax()]

# find every useful points
def fnd(values: list) -> list:
  peaks = []
  downs = []
  mx = max(values)
  mn = min(values)
  for i in range(1, len(values) - 1):
    if values[i] > values[i - 1] and values[i] > values[i + 1] and values[i] != mx:
      peaks.append(i)
    elif values[i] < values[i - 1] and values[i] < values[i + 1] and values[i] != mn:
      downs.append(i)
  return downs, peaks

# finding them
sleep_mins, sleep_maxes = fnd(grouped_df['average_sleep'].values)
stress_mins, stress_maxes = fnd(grouped_df['average_stress'].values)


Now, we are ready to draw the graph and look at it

In [25]:
# define common line styles
line_styles = {
    "average_sleep": dict(color='rgb(0, 130, 180)'),
    "average_stress": dict(color='rgb(205, 92, 92)'),
}

marker_styles = {
    "sleep_down": dict(color='blue', size=10, symbol="triangle-down"),
    "sleep_up": dict(color='blue', size=10, symbol="triangle-up"),
    "stress_down": dict(color='red', size=10, symbol="triangle-down"),
    "stress_up": dict(color='red', size=10, symbol="triangle-up"),
    "peak_marker": dict(size=10, symbol="arrow-bar-up"),
}

# add average lines
for metric, style in line_styles.items():
    fig_graph.add_trace(go.Scatter(
        x=grouped_df['Age'],
        y=grouped_df[metric],
        mode='lines',
        name=f'Average {metric.split("_")[1]}',
        line=style
    ))

# add markers for sleep and stress changes
changes = [
    ("Age", "average_sleep", sleep_maxes, "sleep_down"),
    ("Age", "average_sleep", sleep_mins, "sleep_up"),
    ("Age", "average_stress", stress_maxes, "stress_down"),
    ("Age", "average_stress", stress_mins, "stress_up"),
]

for age_col, metric_col, indices, marker_key in changes:
    fig_graph.add_trace(go.Scatter(
        x=grouped_df[age_col].iloc[indices],
        y=grouped_df[metric_col].iloc[indices],
        mode='markers',
        name=marker_key.replace('_', ' ').title(),
        marker=marker_styles[marker_key]
    ))

# add peaks and downs
peak_data = [
    ("Age", "average_sleep", sleep_min, sleep_max, 'blue', 'Sleep peak'),
    ("Age", "average_stress", stress_min, stress_max, 'red', 'Stress peak'),
]

for age_col, metric_col, min_data, max_data, color, name in peak_data:
    fig_graph.add_trace(go.Scatter(
        x=[min_data[age_col], max_data[age_col]],
        y=[min_data[metric_col], max_data[metric_col]],
        mode='markers+text',
        name=name,
        text=["min", "max"],
        textposition="top center",
        marker=dict(color=color, **marker_styles["peak_marker"])
    ))

# decorate
fig_graph.update_layout(
    title='Average Sleep vs Average Stress',
    xaxis_title='Age',
    yaxis_title='Average Values',
    legend_title='Value',
    template='plotly_dark'
)

# show
fig_graph.show()


We can see that Sleep Duration and Stress level have inverse dependence. Lets draw some more graphs if wee can find some patterns.


In [26]:
# group data by average values
grouped_df = df.groupby('Age').agg(
    average_physical=('Physical Activity Level', 'mean'),
    average_quality=('Quality of Sleep', 'mean'),
    average_stress=('Stress Level', 'mean')
).reset_index()

# add average sleep quality line
fig_phyz.add_trace(go.Scatter(
    x=grouped_df['Age'],
    y=grouped_df['average_quality'],
    mode='lines',
    name='Average quality of sleep',
    line=dict(color='rgb(0, 130, 180)')
))

# add average stress level line
fig_phyz.add_trace(go.Scatter(
    x=grouped_df['Age'],
    y=grouped_df['average_stress'],
    mode='lines',
    name='Average stress level',
    line=dict(color='rgb(205, 92, 92)')
))

# add physical activity line
fig_phyz.add_trace(go.Scatter(
    x=grouped_df['Age'],
    y=grouped_df['average_physical'] / 10,
    mode='lines',
    name='Average physical activity level',
    line=dict(color='orange')
))

# decorate
fig_phyz.update_layout(
    title='Average sleep quality vs Average stress level vs Average physical activity',
    xaxis_title='Age',
    yaxis_title='Average values',
    legend_title='Value',
    template='plotly_dark'
)

# show
fig_phyz.show()


We obtain, that after 35 yo there a bit linear dependence between physical activity and stress level. I am going to look at the patterns between sleep duration, quality of sleep and physical activity.

In [27]:
# group data by average values
grouped_df = df.groupby('Age').agg(
    average_physical=('Physical Activity Level', 'mean'),
    average_quality=('Quality of Sleep', 'mean'),
    average_duration=('Sleep Duration', 'mean')
).reset_index()

# add average sleep quality line
fig_duration_vs_quality_vs_phyz.add_trace(go.Scatter(
    x=grouped_df['Age'],
    y=grouped_df['average_quality'],
    mode='lines',
    name='Average quality of sleep',
    line=dict(color='rgb(0, 130, 180)')
))

# add average sleep duration line
fig_duration_vs_quality_vs_phyz.add_trace(go.Scatter(
    x=grouped_df['Age'],
    y=grouped_df['average_duration'],
    mode='lines',
    name='Average sleep duration',
    line=dict(color='rgb(205, 92, 92)')
))

# add average physical activity line
fig_duration_vs_quality_vs_phyz.add_trace(go.Scatter(
    x=grouped_df['Age'],
    y=grouped_df['average_physical'] / 10,
    mode='lines',
    name='Average physical activity level',
    line=dict(color='orange')
))

# decorate
fig_duration_vs_quality_vs_phyz.update_layout(
    title='Average sleep of quality vs Average sleep suration vs Average physical activity',
    xaxis_title='Age',
    yaxis_title='Average values',
    legend_title='Value',
    template='plotly_dark'
)

# show
fig_duration_vs_quality_vs_phyz.show()




---


We can see that there is such a big dependence, but from 45 till 54 there is exception. After previous analise I can mention, that level of stress affects the physical activity.

But lets look at numeratic statistics. I am going to use numpy to do it.

I am using Pirson's corelation formula.


---





In [None]:
interest_columns = ['Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level']
grouped_df = df[interest_columns]

correl_mtx = grouped_df.corr(method='pearson')
fig_temp = px.imshow(
    correl_mtx,
    text_auto=True,
    color_continuous_scale='RdBu',
    range_color=[-1, 1],
    labels=dict(color='Pearsons Corellation'),
    x=correl_mtx.columns,
    y=correl_mtx.index,
    title='Pearsons Correlations'
)

fig_temp.update_layout(
    xaxis_title="Values",
    yaxis_title="Values",
    template='plotly_dark'
)

fig_temp.show()




---


We can see that sleep duration and sleep quality extremly decreases stress level, physical activity increases a bit sleep duration and quality of sleep, but physical activity has almost no effect on stress level.


---

