In [1]:
import copy
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [2]:
df = pd.read_csv("../data/samsung_health_sleep.csv")
df["date"] = pd.to_datetime(df["date"])
df = df.drop(["total_sleep_time_weight", "factor_01", "factor_02", "factor_03", "factor_04", "factor_05", "factor_06", "factor_07", "factor_08", "factor_09", "factor_10", "latency_weight", "latency_score", "deep_weight", "rem_weight", "wake_weight"], axis=1)

Regression with NN

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error

features = [
    "mental_recovery",
	"wake_score",
    "deep_score",
	"sleep_efficiency_with_latency",
	"total_rem_duration",
	"physical_recovery",
	"movement_awakening",
	"rem_score",
	"sleep_cycle",
	"total_light_duration",
	"efficiency",
	"sleep_duration" 
]

X = df[features].values
y = df["sleep_score"].values.reshape(-1,1)

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
scaler_y = StandardScaler()

x_train = scaler_X.fit_transform(x_train)
x_test = scaler_X.transform(x_test)

y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

In [5]:
class SleepRegressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

In [6]:
X_train_t = torch.tensor(x_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test_t = torch.tensor(x_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [7]:
model = SleepRegressor(input_dim=x_train.shape[1])

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 500
for epoch in range(epochs):
    model.train()

    y_pred = model(X_train_t)
    loss = criterion(y_pred, y_train_t)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")


Epoch 0 | Loss: 0.9510
Epoch 50 | Loss: 0.0797
Epoch 100 | Loss: 0.0386
Epoch 150 | Loss: 0.0263
Epoch 200 | Loss: 0.0172
Epoch 250 | Loss: 0.0109
Epoch 300 | Loss: 0.0071
Epoch 350 | Loss: 0.0044
Epoch 400 | Loss: 0.0026
Epoch 450 | Loss: 0.0015


In [8]:
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_t)

y_pred_test = scaler_y.inverse_transform(y_pred_test.numpy())
y_test_real = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_test))
print(f"RMSE: {rmse:.2f}")


RMSE: 4.33


Without leak features

In [9]:
features = [
    "mental_recovery",
	"total_rem_duration",
	"physical_recovery",
	"movement_awakening",
	"sleep_cycle",
	"total_light_duration",
	"sleep_duration" 
]

X = df[features].values
y = df["sleep_score"].values.reshape(-1,1)

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
scaler_y = StandardScaler()

x_train = scaler_X.fit_transform(x_train)
x_test = scaler_X.transform(x_test)

y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

In [11]:
X_train_t = torch.tensor(x_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test_t = torch.tensor(x_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [12]:
model = SleepRegressor(input_dim=x_train.shape[1])

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 500
for epoch in range(epochs):
    model.train()

    y_pred = model(X_train_t)
    loss = criterion(y_pred, y_train_t)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")


Epoch 0 | Loss: 0.9911
Epoch 50 | Loss: 0.1869
Epoch 100 | Loss: 0.1427
Epoch 150 | Loss: 0.1238
Epoch 200 | Loss: 0.1038
Epoch 250 | Loss: 0.0857
Epoch 300 | Loss: 0.0685
Epoch 350 | Loss: 0.0524
Epoch 400 | Loss: 0.0395
Epoch 450 | Loss: 0.0289


In [13]:
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_t)

y_pred_test = scaler_y.inverse_transform(y_pred_test.numpy())
y_test_real = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(mean_squared_error(y_test_real, y_pred_test))
print(f"RMSE: {rmse:.2f}")


RMSE: 7.24


In [None]:
results = pd.DataFrame({
    "true": y_test_real.flatten(),
    "pred": y_pred_test.flatten()
})

results["abs_error"] = abs(results["true"] - results["pred"])
results.groupby(pd.cut(results["true"], bins=[0,40,60,80,100])).mean()


  results.groupby(pd.cut(results["true"], bins=[0,40,60,80,100])).mean()


Unnamed: 0_level_0,true,pred,abs_error
true,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(0, 40]",40.0,40.611336,0.611336
"(40, 60]",53.428571,59.890537,8.420487
"(60, 80]",69.363636,71.099091,5.20101
"(80, 100]",86.666667,87.81971,4.459082
