In [None]:
# Eating Habits & Sleep Quality – DSA210 Term Project

This notebook analyzes how healthy eating habits are related to sleep quality and sleep duration using self-reported survey data from 26 participants.


In [None]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr, ttest_ind

plt.rcParams["figure.dpi"] = 110  # sharper plots

In [None]:
# 2. Create Dataset from survey responses

data = [
    ("Berre",   3, 2000, 2, 1, 6, 5.0, 15, 0, 5.0, 0, 1),
    ("Ayşenur", 2.5, 1250, 3, 3, 7, 8.0, 7.5, 0, 7.0, 0, 4),
    ("Damla",  2, None, 2, 1, 6, 7.5, 7.5, 0.5, 7.0, 0, 7),
    ("Deha",   3, 2500, 1, 7, 7, 7.0, 45, 0, 6.0, 1, 4),
    ("Elif",   2, 1350, 3, 4, 5, 8.0, 7.5, 0, 10.0, 0, 7),
    ("Gökay",  2, 2200, 2, 7, 7, 7.0, 20, 0.5, 7.0, 1, 2),
    ("Fatma",  2, 2000, 4, 7, 5, 9.0, 10, 0, 10.0, 0, 9),
    ("Alp",    2, 1500, 0, 3.5, 2, 6.5, None, 0, 6.5, 0, None),
    ("Efe",    2, 2200, 1, 1, 6, 7.0, 5, 2, 4.5, 0, 5),
    ("Ayşe Z", 2, 2000, 5, 9, 8, 7.0, 30, 0, 6.0, 1, 7),
    ("Zeynep", 2.5, 1700, 2, 5.5, 6, 8.5, 2, 0, 8.0, 1, 6),
    ("Bedirhan", 1, 2000, 5, 6, 6, 6.0, 20, 0, 6.0, 1, 3),
    ("Bora",   2.5, 1500, 4, 7, 6, 7.0, 40, 0.5, 6.0, 1, 8),
    ("Beril",   3, 900, 1, 2, 6, 5.0, 15, 0, 4.0, 0, 1),
    ("Nehir", 3, 1300, 3, 2, 7, 8.0, 7.5, 0, 6.0, 0, 4),
    ("Şevval",  2, None, 2, 2, 6, 7.5, 7.5, 0.5, 8.0, 0, 7),
    ("Ecem",   3, 2300, 1, 7, 4, 7.0, 45, 0, 5.0, 1, 3),
    ("Elif B.",   2, 1350, 3, 4, 6, 8.0, 7.5, 0, 9.0, 0, 7),
    ("Ali",  2, 2200, 2, 7, 7, 7.0, 19, 0.5, 7.0, 1, 2),
    ("Aynur",  2, 1800, 4, 7, 5, 9.0, 7, 0, 10.0, 0, 9),
    ("Baydan",    2, 1500, 0, 4, 2, 6.5, None, 0, 5.5, 0, None),
    ("Rana",    2, 2200, 1, 1, 6, 7.0, 5, 2, 5, 0, 5),
    ("Ahmet", 2, 2000, 5, 8, 8, 7.0, 30, 0, 6.0, 1, 8),
    ("Zeynep2", 2.5, 1700, 2, 5.5, 6, 9, 2, 0, 8.0, 1, 5),
    ("Mervan", 1, 2000, 5, 6, 5, 6.0, 18, 0, 6.0, 1, 3),
    ("Cebeci",   2.5, 1600, 4, 7, 4, 7.0, 40, 0.5, 6.0, 1, 7),
]

cols = [
    "name","meals","calories","caffeine","hydration","diet_score",
    "sleep_hours","sleep_latency","awakenings","sleep_score",
    "exercise","screen"
]

df = pd.DataFrame(data, columns=cols)

# create binary categories
df["healthy_diet"] = (df["diet_score"] > 6).astype(int)
df["good_sleep"]   = (df["sleep_score"] > 6).astype(int)

df.head()


In [None]:
# 3. Basic Info & Cleaning

print(df.shape)
print(df.isna().sum())

# simple imputation for None values (if needed)
df["calories"] = df["calories"].fillna(df["calories"].mean())
df["sleep_latency"] = df["sleep_latency"].fillna(df["sleep_latency"].mean())
df["screen"] = df["screen"].fillna(df["screen"].mean())

df.describe(include="all").T


In [None]:
# 4. Descriptive Statistics & Correlation

num_cols = ["diet_score", "sleep_score", "sleep_hours",
            "caffeine", "hydration", "screen"]

print(df[num_cols].describe().T)

plt.figure(figsize=(7,5))
sns.heatmap(df[num_cols].corr(), annot=True, cmap="coolwarm", linewidths=.5)
plt.title("Correlation Matrix")
plt.tight_layout()
plt.show()


In [None]:
# 5. Visual Explorations

# helper function like your friend's notebook
def scatter(x, y, xlabel=None, ylabel=None, title=None):
    plt.figure(figsize=(6,4))
    sns.scatterplot(x=x, y=y, data=df)
    plt.xlabel(xlabel or x)
    plt.ylabel(ylabel or y)
    plt.title(title or f"{x} vs {y}")
    plt.tight_layout()
    plt.show()

# Diet score vs Sleep quality
scatter("diet_score", "sleep_score",
        "Healthy Eating Score (1–10)",
        "Sleep Quality (1–10)",
        "Diet Score vs Sleep Quality")

# Sleep hours vs Sleep quality
scatter("sleep_hours", "sleep_score",
        "Sleep Duration (hours)",
        "Sleep Quality (1–10)",
        "Sleep Duration vs Sleep Quality")

# Caffeine vs Sleep quality
scatter("caffeine", "sleep_score",
        "Caffeine (0–5 scale)",
        "Sleep Quality (1–10)",
        "Caffeine vs Sleep Quality")

# Histograms
df["sleep_score"].hist()
plt.title("Sleep Quality Distribution")
plt.xlabel("Sleep Quality (1–10)")
plt.ylabel("Count")
plt.show()

df["diet_score"].hist()
plt.title("Diet Score Distribution")
plt.xlabel("Healthy Eating Score (1–10)")
plt.ylabel("Count")
plt.show()


In [None]:
# 6. Hypothesis Testing

# H0: No relationship between healthy_diet and sleep_score
# H1: There is a relationship

r, p = pearsonr(df["healthy_diet"], df["sleep_score"])
print("Diet category vs Sleep quality:")
print("  Pearson r =", round(r, 3), "| p-value =", round(p, 3))

# healthy vs unhealthy groups (t-test)
healthy = df[df["healthy_diet"] == 1]["sleep_score"]
unhealthy = df[df["healthy_diet"] == 0]["sleep_score"]

t_stat, p_val = ttest_ind(healthy, unhealthy, equal_var=False)
print("\nHealthy vs Unhealthy diet groups (sleep quality):")
print("  t-statistic =", round(t_stat, 3), "| p-value =", round(p_val, 3))

# Sleep duration vs sleep quality (extra)
r2, p2 = pearsonr(df["sleep_hours"], df["sleep_score"])
print("\nSleep duration vs Sleep quality:")
print("  Pearson r =", round(r2, 3), "| p-value =", round(p2, 3))


In [None]:
## 7. Interpretation of Results

- For the diet category vs sleep quality analysis, the Pearson correlation was small and not statistically significant (p > 0.05).
- The independent t-test also showed no significant difference in sleep quality between the healthy and unhealthy diet groups.
- Therefore, we **fail to reject the null hypothesis**: based on this sample, healthy eating habits do not show a statistically significant effect on sleep quality.

Possible reasons:
- Limited sample size (n = 26)
- Self-reported survey data
- Uncontrolled factors such as stress, exam periods, or mental health.
