# Sleep and lifestyle dataset

By : AUTEF Lucas, BASSOUM Mohamed Emine, GUEMIMI Marouane, JEANNE Arthur, WIATT Chloé

### Links
• Dashboard: [Sleep Analysis Interactive Dashboard](https://viz-dashboard-408143638721.us-central1.run.app/)

• Github: [Sleepy Dashboard Project Repository](https://github.com/Emine-Bassoum/Sleepy-Dashboard-Project)

In [4]:
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from scipy.stats import gaussian_kde
from collections import Counter
import seaborn as sns
from plotly.colors import n_colors
import kagglehub
import os



path = kagglehub.dataset_download("uom190346a/sleep-health-and-lifestyle-dataset")
files = os.listdir(path)
csv_path = os.path.join(path, files[0])

data = pd.read_csv(csv_path)


In [5]:
data["Sleep Disorder"] = data["Sleep Disorder"].fillna("No Disorder")
data["BMI Category"][data["BMI Category"]=="Normal Weight"] = "Normal"

width = 1000
height = 600

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  data["BMI Category"][data["BMI Category"]=="Normal Weight"] = "Normal"
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a

In [6]:
# Calculate correlation matrix
corr_matrix = data.corr(numeric_only=True)

# Create a mask for the upper triangle (optional, for better visualization)
mask = np.triu(np.ones_like(corr_matrix))

# Create heatmap using plotly express
fig = px.imshow(
    corr_matrix,
    text_auto='.1f',  # Format for annotations, similar to fmt=".1f"
    color_continuous_scale='Purp_r',  # Similar to seaborn default
    zmin=-1, zmax=1,  # Set range for correlation values
    aspect="auto",    # Adjust aspect ratio automatically
)

# Update layout
fig.update_layout(
    title='<b>Corrolation between different variables in the dataset<b>',
    xaxis_title='Variables',
    yaxis_title='Variables',
    height=height,
    width=width,
    title_x=0.53
)

# Display the plot
fig.show()

In [7]:
# Group by Age and calculate mean Sleep Duration
grouped_data = data.groupby("Age")["Sleep Duration"].mean().reset_index()

fig = px.line(
    grouped_data,
    x="Age",
    y="Sleep Duration",
    title="<b>Sleep Duration Variation by Age<b>",
    markers=True
)

# Customize the line and markers
fig.update_traces(
    mode='lines+markers',
    marker=dict(color='#a066c2'),  # Dark purple
    line=dict(color='#a066c2')
)

# Improve layout
fig.update_layout(
    xaxis_title="Age",
    yaxis_title="Average Sleep Duration",
    title_x=0.5,
    height=height,
    width=width,
    plot_bgcolor='#faf8ff'
)

# Display the plot
fig.show()

In [8]:
fig = px.box(
    data,
    x="BMI Category",
    y="Sleep Duration",
    title="<b>Sleep Duration by BMI Category<b>",
    color="BMI Category",  # Color boxes by category
    color_discrete_sequence=['#311b92', '#a066c2', '#f2a3c3']
)

# Improve layout
fig.update_layout(
    xaxis_title="BMI Category",
    yaxis_title="Sleep Duration (in hours)",
    showlegend=False,
    title_x=0.5,
    height=height,
    width=width,
    plot_bgcolor='#faf8ff'
)

# Display the plot
fig.show()

In [9]:
# Calculate the 2D KDE
x = data['Heart Rate'].values
y = data['Sleep Duration'].values
xy = np.vstack([x, y])

# Only calculate KDE if we have enough valid data points
if len(x) > 3 and len(y) > 3 and not np.isnan(x).any() and not np.isnan(y).any():
    kde = gaussian_kde(xy)

    # Create grid of points
    x_range = np.linspace(60, max(x) + 1, 100)
    y_range = np.linspace(min(y) - 0.5, max(y) + 0.5, 100)
    X, Y = np.meshgrid(x_range, y_range)
    positions = np.vstack([X.ravel(), Y.ravel()])

    # Evaluate kernel at grid points
    Z = kde(positions).reshape(X.shape)

    # Create the figure
    fig = go.Figure()

    # Add filled contours
    fig.add_trace(go.Contour(
        x=x_range,
        y=y_range,
        z=Z,
        colorscale='Purples',
        contours=dict(
            start=0,
            end=Z.max(),
            size=(Z.max() / 10),
            showlabels=False
        ),
        line=dict(width=0.5),
        colorbar=dict(
            title='Density',
            title_side='right'
        ),
        hovertemplate='Heart Rate: %{x:.1f}<br>Sleep Duration: %{y:.1f}<br>Density: %{z:.4f}<extra></extra>',
        ncontours=20
    ))

    # Add scatter points with very low opacity to show data distribution
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        mode='markers',
        marker=dict(
            color='rgba(104, 71, 141, 0.1)',
            size=3
        ),
        showlegend=False,
        hoverinfo='skip'
    ))

    # Update layout
    fig.update_layout(
        title={
            'text': '<b>Relationship Between Sleep Duration and Heart Rate (HRR)<b>',
            'x': 0.5,
            'font': {'size': 16}
        },
        xaxis_title='Heart Rate',
        yaxis_title='Sleep Duration',
        plot_bgcolor='white',
        width=1000,
        height=600
    )

    fig.show()
else:
    print("Not enough valid data points to create a KDE plot")

In [10]:

fig = px.box(
    data,
    x='Occupation',
    y='Sleep Duration',
    title='<b>Relationship Between Occupation and Sleep Duration<b>',
    color='Occupation',  # Use occupation for coloring
    color_discrete_sequence= [
    "#fab8ba", "#e4b3dd", "#d086c9", "#ba5cb6",
    "#a231a3", "#87148f", "#6a0c7a", "#4d0865",
    "#320653", "#20023f", "#10002c", "#000019"
],
)

fig.update_layout(
    xaxis_title='Occupation',
    yaxis_title='Sleep Duration (Hour)',
    title_font_size=16,
    xaxis_title_font_size=12,
    yaxis_title_font_size=12,
    xaxis_tickangle=45,
    height=height,
    width=width,
    plot_bgcolor='#faf8ff',
    title_x=0.5
)

fig.show()

In [11]:

# Sleep disorder analysis by gender

# Create a crosstab to analyze sleep disorders by gender
sleep_disorder_gender = pd.crosstab(data['Sleep Disorder'], data['Gender'])

# Convert the crosstab to long format for Plotly
sleep_disorder_df = sleep_disorder_gender.reset_index().melt(
    id_vars='Sleep Disorder',
    var_name='Gender',
    value_name='Count'
)

# Create the bar chart with Plotly Express
fig = px.bar(
    sleep_disorder_df,
    x='Sleep Disorder',
    y='Count',
    color='Gender',
    title='<b>Sleep Disorders by Gender<b>',
    color_discrete_map={'Female': '#f2a3c3', 'Male': '#a290c8'},
    text='Count',  # Display count values on bars
    width=width,
    height=height,
    barmode='group'
)

# Update layout for a cleaner look
fig.update_layout(
    xaxis_title='Sleep Disorder Type',
    yaxis_title='Count',
    legend_title='Gender',
    title_x=0.5,
    xaxis={'categoryorder': 'total descending'}, # Optional: order by total count
    plot_bgcolor='#faf8ff'

)

# Remove gridlines
fig.update_yaxes(showgrid=False)
fig.update_xaxes(showgrid=False)

fig.show()

In [12]:

color_palette = {'Male': '#a290c8', 'Female': '#f2a3c3'}

fig = px.violin(
    data,
    x='Gender',
    y='Quality of Sleep',
    color='Gender',
    color_discrete_map=color_palette,
    box=True,       # Display the box plot inside the violins
    # Optionally show all points
    title='<b>Distribution of Quality of Sleep by Gender<b>',
    template='plotly_white'
)

fig.update_layout(
    xaxis_title='Gender',
    yaxis_title='Quality of Sleep',
    title_x=0.5  # Center the title
    , width=width,
    height=height,
    plot_bgcolor='#faf8ff'
)

fig.show()


In [13]:
custom_purple_colors = ["#f2a3c3", "#a290c8", "#fab8ba"]  # Replace with your desired hex codes

fig = px.scatter_3d(
    data,
    x='BMI Category',
    y='Blood Pressure',
    z='Heart Rate',
    color='Sleep Disorder',
    symbol='Sleep Disorder',
    width=1200,
    height=height,
    color_discrete_sequence=custom_purple_colors  # Custom hex colors
)

fig.update_layout(
    title='<b>The relationship between (BMI Category, Blood Pressure and Heart Rate) and their effect on Sleep Disorder</b>',
    showlegend=True,
    plot_bgcolor='#faf8ff',  # Light purple plot background
  # Light purple overall figure background
)

fig.show()


In [14]:

# Préparation des données
subset = data[data["Sleep Disorder"] != "None"].copy()
subset["Physical Activity Group"] = (subset["Physical Activity Level"] // 10) * 10
subset["Activity Range"] = subset["Physical Activity Group"].astype(str) + "-" + (subset["Physical Activity Group"] + 9).astype(str)

# Aggrégation
plot_data = subset.groupby(["Activity Range", "Sleep Disorder"]).size().reset_index(name="Count")

# Create a numeric order for sorting
# Extract the first number from each range to use for sorting
plot_data["Sort_Order"] = plot_data["Activity Range"].str.split("-").str[0].astype(int)

# Sort the dataframe by this numeric value
plot_data = plot_data.sort_values("Sort_Order")

# Graphique interactif
fig = px.bar(
    plot_data,
    x="Activity Range",
    y="Count",
    color="Sleep Disorder",
    color_discrete_map={"No Disorder": "#D4C1EC","Insomnia": "#2E1A47", "Sleep Apnea": "#654ea3"},
    title="<b>Sleep Disorders Distribution by Physical Activity Level<b>",
    labels={"Activity Range": "Physical Activity Level (Grouped by 10s) (%)", "Count": "Number of People"},
    barmode='stack',
    width=width,
    height=height,
    category_orders={"Activity Range": plot_data["Activity Range"].unique()}  # Preserve the sorted order
)

# Améliorations esthétiques
fig.update_layout(
    hovermode="x unified",
    plot_bgcolor='white',
    legend_title_text='Sleep Disorder',
    title_x=0.5
)

# Affichage
fig.show()

In [15]:
data['Systolic BP'] = data['Blood Pressure'].str.split('/').str[0].astype(float)

categories = sorted(data['Quality of Sleep'].unique())
n_categories = len(categories)

colors = n_colors('rgb(79, 41, 146)','rgb(247, 159, 121)', n_categories, colortype='rgb')

fig = go.Figure()

for cat, color in zip(categories, colors):
    cat_data = data.loc[data['Quality of Sleep'] == cat, 'Systolic BP']
    fig.add_trace(go.Violin(
        x=cat_data,
        name=str(cat),
        line_color=color,
        orientation='h',
        box_visible=True,      # Optionally show a box plot inside the violin
        meanline_visible=True       # Show all data points
    ))

# Update layout settings
fig.update_layout(
    title="<b>Distribution of Systolic BP by Quality of Sleep<b>",
    xaxis_title="Systolic BP",
    yaxis_title="Quality of Sleep",
    xaxis_showgrid=False,
    xaxis_zeroline=False,
    violinmode='overlay',
    plot_bgcolor='white',
    title_x = 0.5,
    width = width,
    height = height
)
fig.update_layout(height=height, width=width)

fig.show()


In [16]:

# Configuration
PLOT_WIDTH = 800
PLOT_HEIGHT = 600
PLOT_BG_COLOR = "#faf8ff"
TITLE = "<b>Violin Plot: Qualité du Sommeil par Niveau de Stress<b>"
COLOR_PALETTE = "Purp"
STRESS_MIN = 3
STRESS_MAX = 8

def create_stress_violin_plot(data):
    """Create a Seaborn-style violin plot showing quality of sleep by stress level"""

    # Define full range of stress levels from 3-8
    all_stress_levels = list(range(STRESS_MIN, STRESS_MAX + 1))

    # Generate colors from purple palette for the full range
    num_levels = len(all_stress_levels)
    colors = px.colors.sample_colorscale(COLOR_PALETTE,
                                        [i/(num_levels-1) for i in range(num_levels)])

    # Create figure
    fig = go.Figure()

    # Add violin traces for each stress level
    for i, stress in enumerate(all_stress_levels):
        subset = data[data["Stress Level"] == stress]

        # Skip empty categories (but still preserve their space in x-axis)
        if len(subset) > 0:
            fig.add_trace(go.Violin(
                x=[stress] * len(subset),
                y=subset["Quality of Sleep"],
                name=str(stress),
                fillcolor=colors[i],
                line=dict(color="black", width=1.5),
                opacity=0.9,
                side='both',
                width=0.8,
                box=dict(
                    visible=True,
                    width=0.3,
                    fillcolor='white',
                    line=dict(color='black', width=1.5)
                ),
                meanline=dict(visible=False),
                points=False,
                spanmode='soft',
                bandwidth=0.7
            ))

    # Style the plot
    fig.update_layout(
        title=dict(
            text=TITLE,
            x=0.5,
            font=dict(size=16, family="Arial")
        ),
        xaxis=dict(
            title=dict(text="Niveau de Stress", font=dict(size=12)),
            categoryorder='array',
            categoryarray=all_stress_levels,
            tickmode='array',
            tickvals=all_stress_levels,
            ticktext=[str(level) for level in all_stress_levels],
            showgrid=True,
            showline=True,
            linewidth=1,
            gridcolor="rgba(0,0,0,0.1)",
            linecolor='rgba(0,0,0,0.2)',
            mirror=True
        ),
        yaxis=dict(
            title=dict(text="Qualité du Sommeil", font=dict(size=12)),
            gridcolor="rgba(0,0,0,0.1)",
            zeroline=False,
            showline=True,
            linewidth=1,
            linecolor='rgba(0,0,0,0.2)',
            mirror=True
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        width=PLOT_WIDTH,
        height=PLOT_HEIGHT,
        showlegend=False,
        margin=dict(t=80, b=60, l=60, r=30)
    )

    return fig

# Create and display the plot
fig = create_stress_violin_plot(data)
fig.show()

In [17]:
fig=px.ecdf(data,x='Age',
            color='Sleep Disorder',
            color_discrete_sequence = ['#4A235A', '#8E44AD', '#C39BD3'])


fig.update_layout(
    title={
        'text': '<b>The effect of Age on disorder on Sleep Disorder duration</b>',
        'x': 0.5,
        'xanchor': 'center'
    },
    plot_bgcolor='#faf8ff'


)

fig.update_layout(width=width, height=height)



In [18]:
fig = px.density_heatmap(
    data,
    x="Daily Steps",
    y="Quality of Sleep",

    nbinsx=10,  # adjust based on your data
    nbinsy=10,
    text_auto=True,
    color_continuous_scale="Purp",
    title="<b>Daily Steps vs Quality of Sleep Density Heatmap<b>"
)
fig.update_layout(height=height, width=width, title_text="<b>Daily Steps vs Quality of Sleep Density Heatmap<b>", title_x=0.5)
fig.show()


In [19]:
# Configuration with default values
PLOT_WIDTH = width
PLOT_HEIGHT = height
PLOT_BG_COLOR = "#faf8ff"
TITLE = "<b>Impact of stress level on sleep duration<b>"
COLOR_PALETTE = "Purp_r"  # Reversed purple palette to match your original

def create_stress_violin_plot(data, width=PLOT_WIDTH, height=PLOT_HEIGHT):
    """Create a Seaborn-style violin plot showing sleep duration by stress level

    Parameters:
    - data: DataFrame containing the dataset
    - width: Plot width in pixels (default: 800)
    - height: Plot height in pixels (default: 600)
    """

    # Get stress levels in reverse order (to match your original)
    stress_levels = sorted(data["Stress Level"].unique(), reverse=True)

    # Generate colors from reversed purple palette
    num_levels = len(stress_levels)
    colors = px.colors.sample_colorscale(COLOR_PALETTE,
                                         [i/(num_levels-1) for i in range(num_levels)])

    # Create figure
    fig = go.Figure()

    # Add violin traces for each stress level
    for i, stress in enumerate(stress_levels):
        subset = data[data["Stress Level"] == stress]

        # Skip empty categories (but still preserve their space in x-axis)
        if len(subset) > 0:
            fig.add_trace(go.Violin(
                x=[stress] * len(subset),
                y=subset["Sleep Duration"],
                name=str(stress),
                fillcolor=colors[i],
                line=dict(color="black", width=1.5),
                opacity=0.9,
                side='both',
                width=0.8,
                box=dict(
                    visible=True,
                    width=0.3,
                    fillcolor='white',
                    line=dict(color='black', width=1.5)
                ),
                meanline=dict(visible=False),
                points=False,
                spanmode='soft',
                bandwidth=0.7
            ))

    # Style the plot
    fig.update_layout(
        title=dict(
            text=TITLE,
            x=0.5,
            font=dict(size=16, family="Arial, sans-serif")
        ),
        xaxis=dict(
            title=dict(text="Stress Level", font=dict(size=14, family="Arial, sans-serif")),
            categoryorder='array',
            categoryarray=stress_levels,
            tickmode='array',
            tickvals=stress_levels,
            ticktext=[str(level) for level in stress_levels],
            showgrid=True,
            showline=True,
            linewidth=1,
            gridcolor="rgba(0,0,0,0.05)",
            linecolor='rgba(0,0,0,0.2)',
            mirror=True
        ),
        yaxis=dict(
            title=dict(text="Sleep Duration (hours)", font=dict(size=14, family="Arial, sans-serif")),
            gridcolor="rgba(0,0,0,0.05)",
            zeroline=False,
            showline=True,
            linewidth=1,
            linecolor='rgba(0,0,0,0.2)',
            mirror=True
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        width=width,
        height=height,
        showlegend=False,
        template="plotly_white",
        font=dict(family="Arial, sans-serif", size=14),
        margin=dict(t=80, b=60, l=60, r=30)
    )

    return fig

# Create and display the plot
fig = create_stress_violin_plot(data)
fig.show()

In [20]:
import plotly.express as px
import numpy as np
from scipy import stats

# Create stress categories if not already done
conditions = [
    (data['Stress Level'] == 3) | (data['Stress Level'] == 4),
    (data['Stress Level'] == 5) | (data['Stress Level'] == 6),
    (data['Stress Level'] == 7) | (data['Stress Level'] == 8)
]
choices = ['Low Stress', 'Medium Stress', 'High Stress']

data['Stress Category'] = np.select(conditions, choices, default='Unknown')

# Define colors with transparency included
colors = {
    "Low Stress": "rgba(0, 32, 76, 0.5)",    # #00204c with 0.5 alpha
    "Medium Stress": "rgba(123, 109, 155, 0.5)",  # #7b6d9b with 0.5 alpha
    "High Stress": "rgba(255, 110, 84, 0.5)"      # #ff6e54 with 0.5 alpha
}

# Calculate KDE for each category
x_range = np.linspace(data['Age'].min() - 2, data['Age'].max() + 2, 300)
fig = px.line()  # Start with empty figure

for category in choices:
    subset = data[data['Stress Category'] == category]['Age']
    if len(subset) > 1:  # Need at least 2 points for KDE
        kde = stats.gaussian_kde(subset)
        y_values = kde(x_range)

        # Add KDE curves with proper alpha transparency
        fig.add_scatter(
            x=x_range,
            y=y_values,
            mode='lines',
            line=dict(width=2, color=colors[category].replace("0.5)", "1.0)")),  # Solid line
            name=category,
            fill='tozeroy',
            fillcolor=colors[category],  # Fill with transparency
        )

# Update layout to match the original plot
fig.update_layout(
    title={
        'text': '<b>Age vs Stress Level<b>',
        'font': {'size': 16},
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    plot_bgcolor='#faf8ff',
    width=1000,
    height=600,
    xaxis_title_text='Age of the Person',
    yaxis_title_text='Density',
    xaxis_title_font={'size': 10},
    yaxis_title_font={'size': 10},
    legend_title_text='Stress Category',
    xaxis=dict(range=[data['Age'].min() - 2, data['Age'].max() + 2]),
    yaxis=dict(range=[0, None]),
)

fig.show()

In [21]:
# Compute the average stress level for each occupation and sort by stress level
avg_data = data.groupby('Occupation', as_index=False)['Stress Level'].mean()
avg_data = avg_data.sort_values('Stress Level')

# Create a bar plot using Plotly Express
fig = px.bar(
    avg_data,
    x='Occupation',
    y='Stress Level',
    text='Stress Level',
    color='Occupation',
    color_discrete_sequence=px.colors.sequential.Purp,  # Use a Purples palette
    title='<b>Relationship Between Occupation and Stress Level<b>',
    width=800,
    height=700 )


# Format the text to show two decimals and position it above the bars
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')

# Update layout: set a light purple background, adjust axis titles, and rotate x-axis tick labels
fig.update_layout(
    plot_bgcolor='#faf8ff',
    xaxis_title='Occupation',
    yaxis_title='Average Stress Level',
    xaxis_tickangle=-45,
    title_x=0.5
)

fig.show()


In [22]:
import plotly.express as px
import pandas as pd

fig = px.scatter(
    data,
    x='Daily Steps',
    y='Heart Rate',
    color='BMI Category',
    size='Daily Steps',  # Size represents Daily Steps
    marginal_x='violin',
    marginal_y='violin',
    color_discrete_sequence=['#dcd0ff', '#a066c2', '#311b92'],  # very light, intermediate, and very dark purple
    title='<b>Daily Steps vs. Heart Rate by BMI Category<b>',
    width=1200,
    height=600
)

fig.update_layout(
    xaxis_title='Daily Steps',
    yaxis_title='Heart Rate (bpm)',
    title_x=0.4,  # Center the title
    plot_bgcolor='#faf8ff'
)

# Update main plot grid lines (for the primary xaxis and yaxis)
fig.update_xaxes(showgrid=True, gridcolor='#d1c4e9', row=1, col=1)
fig.update_yaxes(showgrid=True, gridcolor='#d1c4e9', row=1, col=1)

# Disable grid for marginal plots by iterating through layout axes
for key in fig.layout:
    if key.startswith('xaxis') and key != 'xaxis':
        fig.layout[key].showgrid = False
    if key.startswith('yaxis') and key != 'yaxis':
        fig.layout[key].showgrid = False

fig.show()


In [23]:
import plotly.express as px
import pandas as pd
import numpy as np

# Aggregate data: compute mean heart rate and standard error for each Age
grouped = data.groupby("Age")["Heart Rate"].agg(["mean", "std", "count"]).reset_index()
grouped["se"] = grouped["std"] / np.sqrt(grouped["count"])

# Create a point plot (scatter with lines and error bars)
fig = px.scatter(
    grouped,
    x="Age",
    y="mean",
    error_y="se",
    title="<b>Heart Rate Variation by Age<b>",
)

# Connect points with lines and apply a dark purple color for markers and lines
fig.update_traces(
    mode='lines+markers',
    marker=dict(color='#a066c2'),  # Dark purple
    line=dict(color='#a066c2')
)

# Update layout: set axis titles, center the title, and apply background and size settings
fig.update_layout(
    xaxis_title="Age",
    yaxis_title="Mean Heart Rate (bpm)",
    title_x=0.5,  # Center the title
    plot_bgcolor='#faf8ff',  # Light purple background
    width=1000,
    height=600
)

# Update grid lines with a slightly darker purple for contrast
fig.update_xaxes(showgrid=True, gridcolor='#d1c4e9')
fig.update_yaxes(showgrid=True, gridcolor='#d1c4e9')

fig.show()


In [24]:
import plotly.express as px

fig = px.sunburst(
    data,
    path=[px.Constant('Sleep quality'), 'Sleep Disorder', 'Quality of Sleep'],
    values='Sleep Duration',
    color='Sleep Disorder',
    color_discrete_map={
        '(?)': '#faf8ff',
        'Insomnia': '#311b92',
        'Sleep Apnea': '#a066c2',
        'No Disorder': '#dcd0ff'
    }
)

fig.update_layout(
    width=1000,
    height=600,
    title={
        'text': '<b>The effect of sleep disorder on sleep duration<b>',
        'x': 0.5,
        'xanchor': 'center'
    },
    plot_bgcolor='#fe95f0'
)

fig.show()


In [25]:
# Calculate the number of people for each sleep duration
bubble_size = data['Sleep Duration'].value_counts().reset_index()
bubble_size.columns = ['Sleep Duration', 'Count']

# Group by sleep duration to calculate the average sleep quality
data_grouped = data.groupby("Sleep Duration")["Quality of Sleep"].mean().reset_index()

# Create the line trace for average sleep quality with a purple line
line_fig = go.Scatter(
    x=data_grouped["Sleep Duration"],
    y=data_grouped["Quality of Sleep"],
    mode="lines",
    line=dict(color='#6a0dad'),  # Purple line
    name="Average Sleep Quality"
)

# Create the bubble trace with purple markers
bubble_fig = go.Scatter(
    x=bubble_size["Sleep Duration"],
    y=data_grouped["Quality of Sleep"],
    mode="markers",
    marker=dict(
        size=bubble_size["Count"],  # Bubble size based on count
        color='rgba(155, 89, 182, 0.7)',  # Purple bubble color
        line=dict(color='#8e44ad', width=1)
    ),
    name="Number of People by Sleep Duration"
)

# Combine the traces into one figure
fig = go.Figure(data=[line_fig, bubble_fig])

# Update the layout with improved titles and axis labels
fig.update_layout(
    title="<b>Sleep Quality and Number of People by Sleep Duration<b>",
    xaxis_title="Sleep Duration (hours)",
    yaxis_title="Sleep Quality",
    width=1000,
    height=600,
    title_x=0.4,  # Center the title
    plot_bgcolor='#faf8ff',
    showlegend=True
)

fig.show()


In [26]:
import plotly.graph_objects as go
import plotly.express as px

# Configuration with default values
PLOT_WIDTH = 800  # Default width if not specified elsewhere
PLOT_HEIGHT = 600  # Default height if not specified elsewhere
PLOT_BG_COLOR = "#faf8ff"
TITLE = "<b>Impact of stress level on sleep duration<b>"
COLOR_PALETTE = "Purp_r"  # Reversed purple palette to match your original

def create_stress_boxplot(data, width=PLOT_WIDTH, height=PLOT_HEIGHT):
    """Create a Seaborn-style box plot showing sleep duration by stress level

    Parameters:
    - data: DataFrame containing the dataset
    - width: Plot width in pixels (default: 800)
    - height: Plot height in pixels (default: 600)
    """

    # Get stress levels in reverse order (to match your original)
    stress_levels = sorted(data["Stress Level"].unique(), reverse=True)

    # Generate colors from reversed purple palette
    num_levels = len(stress_levels)
    colors = px.colors.sample_colorscale(COLOR_PALETTE,
                                         [i/(num_levels-1) for i in range(num_levels)])

    # Create figure
    fig = go.Figure()

    # Add box traces for each stress level
    for i, stress in enumerate(stress_levels):
        subset = data[data["Stress Level"] == stress]

        fig.add_trace(go.Box(
            x=[stress] * len(subset),
            y=subset["Sleep Duration"],
            name=str(stress),
            fillcolor=colors[i],
            line=dict(color="black", width=1.5),
            opacity=0.9,
            boxmean=True,  # Show mean as a dashed line
            boxpoints=False,  # Don't show underlying points
            width=0.8,
            marker=dict(
                color=colors[i],
                opacity=0.7
            ),
            whiskerwidth=0.8,
            line_width=1.5
        ))

    # Style the plot
    fig.update_layout(
        title=dict(
            text=TITLE,
            x=0.5,
            font=dict(size=16, family="Arial, sans-serif")
        ),
        xaxis=dict(
            title=dict(text="Stress Level", font=dict(size=14, family="Arial, sans-serif")),
            categoryorder='array',
            categoryarray=stress_levels,
            showgrid=True,
            showline=True,
            linewidth=1,
            gridcolor="rgba(0,0,0,0.05)",
            linecolor='rgba(0,0,0,0.2)',
            mirror=True
        ),
        yaxis=dict(
            title=dict(text="Sleep Duration (hours)", font=dict(size=14, family="Arial, sans-serif")),
            gridcolor="rgba(0,0,0,0.05)",
            zeroline=False,
            showline=True,
            linewidth=1,
            linecolor='rgba(0,0,0,0.2)',
            mirror=True
        ),
        plot_bgcolor=PLOT_BG_COLOR,
        width=width,
        height=height,
        showlegend=False,
        template="plotly_white",  # Keep the white template from original
        font=dict(family="Arial, sans-serif", size=14),
        margin=dict(t=80, b=60, l=60, r=30)
    )

    return fig

# Create and display the plot
# If you have defined width and height elsewhere, pass them as arguments
# fig = create_stress_boxplot(data, width=width, height=height)
# Otherwise, use default values:
fig = create_stress_boxplot(data)
fig.show()

In [27]:
# Define your colors
highlight_color = '#dcd0ff'
base_color = '#311b92'
# Custom color list (adjust length as needed)
custom_color_list = [
    highlight_color, base_color, base_color, base_color, base_color,
    base_color, base_color, base_color, highlight_color, highlight_color,
    base_color, base_color, base_color
]

# Compute the number of non-missing values for each column
completeness = data.notnull().sum()

# Build a DataFrame with column names and counts
df_complete = pd.DataFrame({
    'Column': completeness.index,
    'NonMissing': completeness.values
})

# Sort descending by non-missing count (as in Missingno's sort='descending')
df_complete = df_complete.sort_values('NonMissing', ascending=False)

# Create a custom color for each column from the list (use base_color if list is too short)
custom_colors = []
for i, col in enumerate(df_complete['Column']):
    if i < len(custom_color_list):
        custom_colors.append(custom_color_list[i])
    else:
        custom_colors.append(base_color)
df_complete['Color'] = custom_colors

# Build a mapping from column names to colors for Plotly Express
color_mapping = dict(zip(df_complete['Column'], df_complete['Color']))

# Create the bar chart using Plotly Express
fig = px.bar(
    df_complete,
    x='Column',
    y='NonMissing',
    text='NonMissing',
    color='Column',
    color_discrete_map=color_mapping,
    title="<b> Visualization of All The Variables In The Dataset (374 Rows)<b>",
    width = 800,
    height = 600
)

# Update layout: set background colors, center title, and customize fonts & margins
fig.update_layout(
    plot_bgcolor='#faf8ff',   # Plot area background (light purple)
    paper_bgcolor='#faf8ff',  # Overall figure background
    title_x=0.4,
    xaxis_title="",
    yaxis_title="Non-missing Count",
    font=dict(family='Serif', size=12),
    margin=dict(l=40, r=40, t=80, b=80)
)

# Rotate x-axis tick labels and style them
fig.update_xaxes(
    tickangle=50,
    tickfont=dict(family='Serif', color='#512b58')
)

# Optionally hide y-axis tick labels to mimic the original style
fig.update_yaxes(showticklabels=False)

fig.show()


In [28]:
# Prepare the gender count data
gender_counts = data['Gender'].value_counts().reset_index()
gender_counts.columns = ['Gender', 'Count']

# Create pie chart with a sunset-inspired color palette
fig = px.pie(
    gender_counts,
    names="Gender",
    values="Count",
    color="Gender",
    title="<b>Gender Distribution<b>",
    hole=0.3,
    color_discrete_sequence=['#311b92','#dcd0ff'],
    width=800,
    height=600
)

# Update layout for better appearance
fig.update_layout(
    title_font_size=20,
    legend_title_font_size=16,
    legend_font_size=14,
    title_x=0.5,
     legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.15,
        xanchor="center",
        x=0.5
    )
)

# Display the chart
fig.show()

In [29]:
import plotly.express as px

fig = px.histogram(
    data_frame=data,
    x="Age",
    nbins=20,
    title="<b>Age Distribution<b>",
    color_discrete_sequence=['#311b92'],
    width=800,
    height=600
)

fig.update_layout(
    xaxis_title="Age",
    yaxis_title="Count",
    template="plotly_white",
    title_x=0.5,
    bargap=0.15,
    font=dict(family="Arial, sans-serif", size=14),
    plot_bgcolor='#faf8ff'

)

fig.show()

In [30]:
# Get BMI category counts
bmi_counts = data["BMI Category"].value_counts().reset_index()
bmi_counts.columns = ["BMI Category", "Count"]

# Create the pie chart with Sunset color palette
fig = px.pie(
    bmi_counts,
    names="BMI Category",
    values="Count",
    color="BMI Category",
    title="<b>BMI Categories Distribution<b>",
    color_discrete_sequence=['#a066c2', '#dcd0ff','#311b92']  # Using Sunset color palette
)

# Keep the same layout settings
fig.update_layout(
    title_x=0.5,
    font=dict(size=12),
    legend_title_text="",
    template="plotly_white",
    margin=dict(t=80, b=20, l=20, r=20),
    autosize=False,
    width=800,
    height=600,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.15,
        xanchor="center",
        x=0.5
    )
)

fig.show()

In [31]:
# Prepare sleep quality distribution data
sleep_quality_distribution = data['Quality of Sleep'].value_counts().sort_index().reset_index()
sleep_quality_distribution.columns = ['Quality of Sleep', 'Count']

# Create horizontal bar chart with Sunset palette
fig = px.bar(
    sleep_quality_distribution,
    x='Count',
    y='Quality of Sleep',
    orientation='h',
    title='<b>Sleep Quality Distribution<b>',  # Translated title
    labels={'Count': 'Number of People', 'Quality of Sleep': 'Sleep Quality (0-10)'},  # Translated labels
    color='Quality of Sleep',
    color_continuous_scale=px.colors.sequential.Purpor
)

# Customize the chart
fig.update_layout(
    yaxis_title="Sleep Quality (0-10)",  # Translated axis title
    xaxis_title="Number of People",  # Translated axis title
    title_font_size=20,
    title_x=0.5,  # Center the title
    showlegend=False,
    coloraxis_showscale=True,
    coloraxis_colorbar=dict(
        title="Quality",  # Translated colorbar title
        title_side="right",
        thickness=15
    ),
    plot_bgcolor='#faf8ff',
    width=800,
    height=600
)



# Display the chart
fig.show()

In [32]:
sleep_disorder_counts = data["Sleep Disorder"].fillna("None").value_counts().reset_index()
sleep_disorder_counts.columns = ["Sleep Disorder", "Count"]

# Créer le funnel chart
fig = px.funnel(
    sleep_disorder_counts,
    x="Count",
    y="Sleep Disorder",
    title="<b>Distribution of Sleep Disorders<b>",
    color_discrete_sequence=['#311b92']
)

# Update layout to center title
fig.update_layout(
    title_x=0.5,
    width=1200,
    height=600,
    plot_bgcolor='#faf8ff'
)

# Afficher
fig.show()

In [33]:
stress_counts = data["Stress Level"].value_counts().reset_index()
stress_counts.columns = ["Stress Level", "Count"]

# Sort the data by Stress Level for better visualization
stress_counts = stress_counts.sort_values("Stress Level")

fig = px.bar(
    stress_counts,
    x="Stress Level",
    y="Count",
    color="Stress Level",  # Apply color based on stress level
    color_continuous_scale=px.colors.sequential.Purpor,  # Use Sunset color palette
    title="<b>Distribution of Stress Levels<b>",
    labels={"Stress Level": "Stress Level", "Count": "Number of People"},
)

# Hide grid and add white background with centered title
fig.update_layout(
  # White background
    xaxis=dict(showgrid=False),
    yaxis=dict(showgrid=False),
    title_x=0.5,  # Center the title
    coloraxis_colorbar=dict(
        title="Stress Level",
        title_side="right"
    ),
    width=800,
    height=600,
    plot_bgcolor='#faf8ff'
)

fig.show()

In [34]:
# Define occupation categories with more detailed grouping
occupation_categories = {
    'Healthcare': ['Doctor', 'Nurse'],
    'Technology': ['Software Engineer', 'Engineer', 'Scientist'],
    'Business': ['Accountant', 'Manager'],
    'Sales': ['Salesperson', 'Sales Representative'],
    'Education/Legal': ['Teacher', 'Lawyer']  # Assuming "Layer" was a typo for "Lawyer"
}

# Create mapping from occupation to category
occupation_to_category = {}
for category, occupations in occupation_categories.items():
    for occupation in occupations:
        occupation_to_category[occupation] = category

# Process data for treemap
# Create treemap data
treemap_data = []

for occupation in data['Occupation'].unique():
    subset = data[data['Occupation'] == occupation]
    category = occupation_to_category.get(occupation, 'Other')

    # Get sleep disorder distribution
    disorder_counts = subset['Sleep Disorder'].value_counts(normalize=True) * 100
    disorder_info = ', '.join([f"{k}: {v:.1f}%" for k, v in disorder_counts.items()])

    treemap_data.append({
        'Category': category,
        'Occupation': occupation,
        'Count': len(subset),
        'Avg Sleep Duration': subset['Sleep Duration'].mean(),
        'Avg Sleep Quality': subset['Quality of Sleep'].mean(),
        'Avg Stress Level': subset['Stress Level'].mean(),
        'Avg Physical Activity': subset['Physical Activity Level'].mean(),
        'Sleep Disorders': disorder_info
    })

treemap_df = pd.DataFrame(treemap_data)

# Create treemap with Sunset color palette
fig = px.treemap(
    treemap_df,
    path=[px.Constant('All Occupations'), 'Category', 'Occupation'],
    values='Count',
    color='Category',  # Color by occupation category
    color_discrete_sequence=px.colors.sequential.Purpor,  # Use Sunset color palette
    hover_data=['Avg Sleep Duration', 'Avg Sleep Quality', 'Avg Stress Level', 'Sleep Disorders'],
    title='<b>Sleep Metrics by Occupation Group<b>'
)

fig.update_layout(
    margin=dict(t=50, l=25, r=25, b=25),
    title_x=0.5  # Center the title
)

fig.show()

In [35]:
# Create categories
data['Sleep Duration Category'] = pd.cut(
    data['Sleep Duration'],
    bins=[0, 6.0, 7.0, 10],
    labels=['Short (<6h)', 'Average (6-7h)', 'Long (>7h)']
)

data['Sleep Quality Category'] = pd.cut(
    data['Quality of Sleep'],
    bins=[0, 4, 7, 10],
    labels=['Poor', 'Average', 'Excellent']
)

# Categories to visualize
categories = ['Occupation', 'BMI Category', 'Sleep Disorder',
              'Sleep Duration Category', 'Sleep Quality Category']

# Create figure for polar plot
fig = go.Figure()

# Color palettes with specific colorscales for each category
palettes = {
    'Occupation': px.colors.sequential.Agsunset,
    'BMI Category': px.colors.sequential.Sunsetdark,
    'Sleep Disorder': px.colors.sequential.Purp,
    'Sleep Duration Category': px.colors.sequential.Sunset,
    'Sleep Quality Category': px.colors.sequential.Purp_r
}

# Custom multipliers for each category
category_multipliers = {
    'Occupation': 0.65,
    'BMI Category': 1.23,
    'Sleep Disorder': 1.14,
    'Sleep Duration Category': 1.16,
    'Sleep Quality Category': 1.25
}

# Define explicit ordered categories for ordinal variables
ordered_categories = {
    'Sleep Duration Category': ['Short (<6h)', 'Average (6-7h)', 'Long (>7h)'],
    'Sleep Quality Category': ['Poor', 'Average', 'Excellent'],
    'BMI Category': ['Underweight', 'Normal', 'Overweight', 'Obese']
}

# Process each category
for category in categories:
    # Get value counts and ensure Sleep Disorder shows all values
    if category == 'Sleep Disorder':
        all_values = data[category].unique()
        value_counts = data[category].value_counts().reindex(all_values).fillna(0)
    else:
        value_counts = data[category].value_counts()

    # Handle ordering properly
    if category in ordered_categories:
        # Use predefined order for ordinal categories
        valid_categories = [cat for cat in ordered_categories[category] if cat in value_counts.index]
        value_counts = value_counts.reindex(valid_categories).dropna()
    elif category == 'Occupation' and len(value_counts) > 12:
        # Handle Occupation specially (limit to top 12)
        value_counts = value_counts.nlargest(12)
        other_count = data[category].shape[0] - value_counts.sum()
        if other_count > 0:
            value_counts['Other'] = other_count
    else:
        # For other categories, sort by count in descending order
        value_counts = value_counts.sort_values(ascending=False)

    subcategories = value_counts.index.tolist()
    counts = value_counts.values

    # Get colors from specific palette for each category
    palette = palettes.get(category)

    # Generate colors for each subcategory by sampling the palette
    if len(subcategories) <= 1:
        colors = [palette[0]]
    else:
        # For ordered categories, use gradient from low to high values
        if category in ordered_categories:
            indices = np.linspace(0, len(palette)-1, len(subcategories)).astype(int)
            colors = [palette[i] for i in indices]
        else:
            # For nominal categories, use distributed colors
            indices = np.linspace(0, len(palette)-1, len(subcategories)).astype(int)
            colors = [palette[i] for i in indices]

    # Use a more discriminating transformation to ensure better visual distinction
    # Use a power transformation instead of logarithmic to better emphasize differences
    transformed_counts = np.power(counts, 0.5) * category_multipliers[category]

    fig.add_trace(
        go.Barpolar(
            r=transformed_counts,
            theta=[category] * len(subcategories),
            width=0.85,
            marker_color=colors,
            marker_line_color="white",
            marker_line_width=1,
            opacity=0.9,
            name=category,
            customdata=[[s, int(c)] for s, c in zip(subcategories, counts)],
            hovertemplate="<b>%{theta}</b><br>%{customdata[0]}<br>Count: %{customdata[1]}<extra></extra>"
        )
    )

# Update layout
fig.update_layout(
    title="<b>Health Metrics by Category<b>",
    polar=dict(
        radialaxis=dict(visible=True, showticklabels=False, gridcolor="lightgrey"),
        angularaxis=dict(direction="clockwise", rotation=90, tickfont=dict(size=14))
    ),
    height=600,
    width=850,
    template="plotly_white",
    legend=dict(orientation="h", yanchor="bottom", y=-0.1, xanchor="center", x=0.5),
    title_x=0.5
)

fig.show()

In [None]:
# Create the histogram with 10 bins and spaces between bars
fig = px.histogram(
    data,
    x="Daily Steps",
    nbins=10,
    title="<b>Distribution of Daily Steps<b>",
    labels={"Daily Steps": "Daily Steps", "count": "Frequency"},
    opacity=0.8  # Add transparency to create space effect
)

# Calculate the bin width - add this line first!
bin_width = (data["Daily Steps"].max() - data["Daily Steps"].min()) / 10

# Update layout to center title, set axis labels, and extend x-axis
fig.update_layout(
    xaxis_title="Daily Steps",
    yaxis_title="Frequency",
    title_x=0.5,
    width=800,
    height=600,
    bargap=0.2,  # Add gap between bars
    plot_bgcolor='#faf8ff'
)

# Compute the KDE over a range spanning the data
x_vals = np.linspace(data["Daily Steps"].min(), 11000, 200)  # Extend range to 11000
kde = gaussian_kde(data["Daily Steps"])
density = kde(x_vals)

fig.update_traces(marker_color='#dcd0ff')

# Scale the KDE to match histogram frequencies
density_scaled = density * len(data["Daily Steps"]) * bin_width

# Add the KDE line as a new trace
fig.add_trace(go.Scatter(
    x=x_vals,
    y=density_scaled,
    mode='lines',
    name='KDE',
    line=dict(color='#311b92')  # Change color as desired
))

# Update x-axis tick values and range
fig.update_xaxes(
    tickvals=np.arange(data["Daily Steps"].min(), 11001, step=500),
    tickfont=dict(size=12),
    range=[data["Daily Steps"].min(), 11000]  # Extend x-axis range
)

# Do not call fig.show() in a Dash application - the figure will be displayed
# through the dcc.Graph component



In [50]:

data["Blood Pressures"] = (
    data["Blood Pressure"].str.split("/").apply(lambda x: (int(x[0]) + int(x[1])) / 2)
)

# Create a numerical encoding for Sleep Disorder
disorder_encoding = {"No Disorder": 0, "Insomnia": 1, "Sleep Apnea": 2}
data["Sleep Disorder Code"] = data["Sleep Disorder"].map(disorder_encoding)

# Create a numerical encoding for BMI Category
bmi_encoding = {"Obese": 0, "Overweight": 1, "Normal": 2}
data["BMI Category Code"] = data["BMI Category"].map(bmi_encoding)

# Create parallel coordinates plot
fig1 = px.parallel_coordinates(
    data,
    dimensions=[
        "BMI Category Code",
        "Physical Activity Level",
        "Daily Steps",
        "Blood Pressures",
        "Stress Level",
        "Sleep Duration",
        "Quality of Sleep",
    ],
    color="Sleep Disorder Code",
    color_continuous_scale=px.colors.sequential.Agsunset,
    color_continuous_midpoint=1,
    title="Sleep Health Factors by Disorder Type")

# Add annotations
fig1.update_layout(
    
    coloraxis_colorbar=dict(
        title="Sleep Disorders",
        tickvals=[0, 1, 2],
        ticktext=["No Disorder", "Insomnia", "Sleep Apnea"],
    ),
)
fig1.update_layout(title_x=0.5)

fig1.show()