NOTEBOOK 3: MODELING/TRAINING

# Sleep Score Prediction Model

Goal:
Train a machine learning model to predict Fitbit Sleep Score using engineered features.

Models evaluated:
- Baseline (mean predictor)
- Gradient Boosting Regressor
- Cross-validated model performance

In [None]:
# CELL 1: load dataset
import pandas as pd

df = pd.read_csv("../data/processed/fitbit_features.csv", parse_dates=["DATE"])

In [None]:
# CELL 2: select features + target
features = ["HOURS_OF_SLEEP_HOURS","REM_SLEEP","DEEP_SLEEP","HEART_RATE_UNDER_RESTING"]
X = df[features].fillna(0)
y = df["SLEEP_SCORE"]

In [None]:
# CELL 3: split + train model
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)


In [None]:
# CELL 4: evaluate model
preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
mae

In [None]:
# CELL 5: save model
import pickle
pickle.dump(model, open("../models/sleep_rf_model.pkl", "wb"))

In [None]:
# CELL 6: polished regression model
import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import GradientBoostingRegressor
import pickle

# ----------------------
# 1. Load engineered data
# ----------------------
df = pd.read_csv("../data/clean/fitbit_clean.csv", parse_dates=["DATE"])
df = df.dropna(subset=["SLEEP_SCORE"])

# ----------------------
# 2. Feature engineering
# ----------------------
def time_to_float(s):
    if not isinstance(s, str): return np.nan
    try:
        t = pd.to_datetime(s.strip().lower())
        return t.hour + t.minute/60
    except:
        return np.nan

df["bedtime_hour"] = df["BEDTIME"].apply(time_to_float)
df["wakeup_hour"]  = df["WAKEUP"].apply(time_to_float)
df["weekday"]      = df["DATE"].dt.day_name()

features = [
    "HOURS_OF_SLEEP_HOURS",
    "REM_SLEEP",
    "DEEP_SLEEP",
    "HEART_RATE_UNDER_RESTING",
    "bedtime_hour",
    "wakeup_hour",
    "weekday"
]
target = "SLEEP_SCORE"

X = df[features]
y = df[target]

num_feats = [
    "HOURS_OF_SLEEP_HOURS",
    "REM_SLEEP",
    "DEEP_SLEEP",
    "HEART_RATE_UNDER_RESTING",
    "bedtime_hour",
    "wakeup_hour"
]
cat_feats = ["weekday"]

preprocess = ColumnTransformer([
    ("num", Pipeline([
        ("imp", SimpleImputer(strategy="median"))
    ]), num_feats),

    ("cat", Pipeline([
        ("imp", SimpleImputer(strategy="most_frequent")),
        ("oh", OneHotEncoder(handle_unknown="ignore"))
    ]), cat_feats)
])

# ----------------------
# 3. Train/test split
# ----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------
# 4. Baseline
# ----------------------
baseline_pred = np.full_like(y_test, y_train.mean(), dtype=float)
baseline_mae = mean_absolute_error(y_test, baseline_pred)

# ----------------------
# 5. Model
# ----------------------
model = Pipeline([
    ("prep", preprocess),
    ("gbr", GradientBoostingRegressor(random_state=42))
])

model.fit(X_train, y_train)

preds = model.predict(X_test)

mae  = mean_absolute_error(y_test, preds)
rmse = mean_squared_error(y_test, preds, squared=False)
r2   = r2_score(y_test, preds)

print("Baseline MAE:", round(baseline_mae, 3))
print("Model MAE   :", round(mae, 3))
print("Model RMSE  :", round(rmse, 3))
print("Model R2    :", round(r2, 3))

# ----------------------
# 6. Cross-validation (optional but great)
# ----------------------
cv_mae = -cross_val_score(
    model, X, y, cv=5, scoring="neg_mean_absolute_error"
).mean()
print("CV MAE:", round(cv_mae, 3))

# ----------------------
# 7. Save model
# ----------------------
pickle.dump(model, open("../models/sleep_gbr_model.pkl", "wb"))
print("Saved: models/sleep_gbr_model.pkl")


In [None]:
# CELL 7: classification model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

df["good_sleep"] = (df["HOURS_OF_SLEEP_HOURS"] >= 7).astype(int)

X = df[features]
y = df["good_sleep"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

clf = Pipeline([
    ("prep", preprocess),
    ("logreg", LogisticRegression(max_iter=2000))
])

clf.fit(X_train, y_train)
preds = clf.predict(X_test)
proba = clf.predict_proba(X_test)[:, 1]

print("Accuracy:", round(accuracy_score(y_test, preds), 3))
print("F1 score:", round(f1_score(y_test, preds), 3))
print("ROC AUC :", round(roc_auc_score(y_test, proba), 3))

pickle.dump(clf, open("../models/good_sleep_logreg.pkl", "wb"))


In [None]:
# CELL 8: feature importance chart
import matplotlib.pyplot as plt
import numpy as np

importances = model["gbr"].feature_importances_
feature_names = model["prep"].transformers_[0][2] + list(
    model["prep"].transformers_[1][1]["oh"].get_feature_names_out()
)

plt.barh(feature_names, importances)
plt.title("Feature Importance")
plt.show()

In [None]:
# CELL 9: save model
import pickle
pickle.dump(model, open("../models/sleep_rf_model.pkl", "wb"))

In [None]:
# CELL 10: hyperparameter tuning
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

param_dist = {
    "gbr__n_estimators": [100, 200, 300],
    "gbr__learning_rate": [0.01, 0.05, 0.1],
    "gbr__max_depth": [2, 3, 4],
    "gbr__subsample": [0.8, 1.0],
}

search = RandomizedSearchCV(
    model,  # your pipeline
    param_distributions=param_dist,
    n_iter=20,
    scoring="neg_mean_absolute_error",
    cv=5,
    random_state=42,
    n_jobs=-1,
)

search.fit(X, y)
best_model = search.best_estimator_


I built a sleep quality predictor using cleaned Fitbit sleep logs.
I started with a mean baseline, then engineered interpretable features: sleep duration, REM/deep %, heart-rate under resting, and schedule regularity (bed/wake times + weekday).
I trained a Gradient Boosting Regressor to predict Fitbit sleep score and validated it with MAE and cross-validation.
The model beats baseline by ~X points and highlights that consistency + deep sleep share are strong predictors.
- The model achieves an MAE of X.X, improving over baseline by Y.Y.
- Deep sleep %, bedtime, and heart rate under resting were strong predictors.
- This model will be integrated into the Streamlit dashboard.

baseline

feature engineering

train/test split

cross-validation

MAE/RMSE/RÂ²

interpretability