In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [2]:
person = 'Kai'
data = pd.read_csv(f'{person}/autosleep-{person}.csv')
data['date'] = pd.to_datetime(data['ISO8601'], errors='coerce')
data['date'] = data['date'].apply(lambda x: x.tz_localize(None).normalize() if x is not pd.NaT else pd.NaT)
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['inBed_minutes'] = pd.to_timedelta(data['inBed']).dt.total_seconds() / 60
data['day_of_week'] = data['date'].dt.dayofweek
data['is_weekend'] = data['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)
data['date'][len(data) - 10:]

  data['date'] = pd.to_datetime(data['ISO8601'], errors='coerce')


35   2024-03-31
36   2024-04-01
37   2024-04-02
38   2024-04-03
39   2024-04-04
40   2024-04-05
41   2024-04-06
42   2024-04-07
43   2024-04-08
44   2024-04-09
Name: date, dtype: datetime64[ns]

In [3]:
def rename_columns(df):
  column_mappings = {
      'Name': 'name',
      'Date': 'date',
      'Grogginess': 'grogginess',
      'How many alarms did you set?': 'num_alarms',
      'What time did you set your first alarm for?': 'first_alarm',
      'Did you take sleep-aiding medicine (not weed, ie melatonin/antihystamine)?': 'sleep_medicine',
      "What was the temperature when you woke up? (Don't include °F, just the number)": 'waking_temp',
      'Were you intoxicated when you went to sleep?': 'intoxicated',
      'Were you sick when you went to sleep?': 'sick',
      'Did you eat within an hour of going to bed?': 'eat_before_bed',
      'Did you sleep alone?': 'sleep_alone',
      'Did you sleep in your own bed/room?': 'own_bed',
      'How stressed were you last night?': 'stress',
      'Did you use your phone before going to sleep?': 'phone_before_bed',
      "When was the latest you ingested caffeine before going to bed? (Don't answer if N/A)": 'caffeine_before_bed'
  }
  
  df_renamed = df.rename(columns=column_mappings)
  return df_renamed

In [4]:
suppl = rename_columns(pd.read_csv('form.csv'))

suppl['date'] = pd.to_datetime(suppl['date'], format='%m/%d/%Y').dt.normalize()


def time_to_minutes(time_str):
  if pd.isna(time_str) or time_str == '':
      return 0
  time = pd.to_datetime(time_str, format='%I:%M:%S %p')
  return time.hour * 60 + time.minute

suppl['first_alarm'] = suppl['first_alarm'].apply(time_to_minutes)
suppl['caffeine_before_bed'] = suppl['caffeine_before_bed'].apply(time_to_minutes)
lag_periods = 1
suppl['grogginess_lag1'] = suppl['grogginess'].shift(lag_periods)
suppl = suppl[suppl['name'] == person].fillna(0)
# suppl

In [5]:
merged_data = pd.merge(suppl, data, on='date', how='inner').fillna(0)


column_headers = list(merged_data.columns.values)

print(column_headers)



['Timestamp', 'name', 'date', 'grogginess', 'num_alarms', 'first_alarm', 'sleep_medicine', 'waking_temp', 'intoxicated', 'sick', 'eat_before_bed', 'sleep_alone', 'own_bed', 'stress', 'phone_before_bed', 'caffeine_before_bed', 'grogginess_lag1', 'ISO8601', 'fromDate', 'toDate', 'bedtime', 'waketime', 'inBed', 'awake', 'fellAsleepIn', 'sessions', 'asleep', 'asleepAvg7', 'efficiency', 'efficiencyAvg7', 'quality', 'qualityAvg7', 'deep', 'deepAvg7', 'sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7', 'wakingBPM', 'wakingBPMAvg7', 'hrv', 'hrvAvg7', 'sleepHRV', 'sleepHRVAvg7', 'SpO2Avg', 'SpO2Min', 'SpO2Max', 'respAvg', 'respMin', 'respMax', 'tags', 'notes', 'year', 'month', 'day', 'inBed_minutes', 'day_of_week', 'is_weekend']


In [6]:
to_normalize = ['sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7', 'wakingBPM', 'wakingBPMAvg7', 
            'hrv', 'hrvAvg7', 'sleepHRV', 'sleepHRVAvg7', 'SpO2Avg', 'SpO2Min', 'SpO2Max', 
            'respAvg', 'respMin', 'respMax', 'inBed_minutes', 'day_of_week', 'is_weekend', 
            'first_alarm', 'num_alarms', 'sleep_medicine', 'waking_temp', 'intoxicated', 
            'sick', 'eat_before_bed', 'sleep_alone', 'own_bed', 'stress', 'phone_before_bed', 
            'caffeine_before_bed']


for cat in to_normalize:
    merged_data[cat] = merged_data[cat] / (merged_data[cat].max())


merged_data.dropna(axis=1, inplace=True)
merged_data.head(10)


Unnamed: 0,Timestamp,name,date,grogginess,num_alarms,first_alarm,sleep_medicine,waking_temp,intoxicated,eat_before_bed,...,SpO2Min,SpO2Max,tags,notes,year,month,day,inBed_minutes,day_of_week,is_weekend
0,2/19/2024 15:34:32,Kai,2024-02-18,5,0.142857,0.592105,0.0,1.0,0.0,0.0,...,0.989583,0.98,0.0,0.0,2024,2,18,0.696219,1.0,1.0
1,2/19/2024 15:35:20,Kai,2024-02-19,6,0.142857,0.447368,1.0,1.0,0.0,0.0,...,0.979167,0.99,0.0,0.0,2024,2,19,0.835463,0.0,0.0
2,2/20/2024 8:48:05,Kai,2024-02-20,3,0.142857,0.460526,0.0,0.972973,0.0,0.0,...,0.979167,0.97,0.0,0.0,2024,2,20,0.759511,0.166667,0.0
3,2/21/2024 8:40:51,Kai,2024-02-21,6,0.571429,0.342105,0.0,1.0,0.0,0.0,...,0.9375,0.99,0.0,0.0,2024,2,21,0.758105,0.333333,0.0
4,2/22/2024 9:15:05,Kai,2024-02-22,6,0.857143,0.316667,1.0,1.0,0.0,0.0,...,0.979167,0.98,0.0,0.0,2024,2,22,0.810146,0.5,0.0
5,2/23/2024 8:59:04,Kai,2024-02-23,7,1.0,0.342105,1.0,0.986486,0.0,0.0,...,0.989583,1.0,0.0,0.0,2024,2,23,0.630113,0.666667,0.0
6,2/24/2024 9:16:16,Kai,2024-02-24,6,0.285714,0.473684,0.0,1.0,0.0,0.0,...,0.989583,0.99,0.0,0.0,2024,2,24,0.540097,0.833333,1.0
7,2/25/2024 12:49:50,Kai,2024-02-25,3,0.0,0.0,1.0,1.0,0.0,0.0,...,0.979167,0.99,0.0,0.0,2024,2,25,0.863593,1.0,1.0
8,2/26/2024 8:32:46,Kai,2024-02-26,9,0.857143,0.263158,0.0,1.0,0.0,0.0,...,1.0,1.0,0.0,0.0,2024,2,26,0.461333,0.0,0.0
9,2/27/2024 8:05:47,Kai,2024-02-27,6,0.142857,0.421053,0.0,1.0,0.0,0.0,...,0.947917,0.99,0.0,0.0,2024,2,27,0.675121,0.166667,0.0


In [7]:
features = ['sleepBPM', 
            # 'sleepBPMAvg7', 
            # 'dayBPM', 
            # 'dayBPMAvg7',
              'wakingBPM',
              #  'wakingBPMAvg7', 
            # 'hrv',
              # 'hrvAvg7', 
              # 'sleepHRV', 
              # 'sleepHRVAvg7', 
              # 'SpO2Avg', 
              # 'SpO2Min', 'SpO2Max', 
            # 'respAvg', 'respMin', 'respMax', 
            #'year',
            #  'month', 
            #'day', 
            'inBed_minutes', 
            # 'day_of_week', 'is_weekend',
              ]
features += [
# 'first_alarm', 'num_alarms', 'sleep_medicine', 'waking_temp', 'intoxicated', 'sick', 'eat_before_bed', 'sleep_alone', 'own_bed', 'stress', 'phone_before_bed', 'caffeine_before_bed',
#  'grogginess_lag1'
 ]

target = ['grogginess']
# merged_data[features].head().to_csv('out.csv')
# from sklearn.model_selection import train_test_split

features = [f for f in features if f in merged_data.columns]
X = merged_data[features]  # Features
y = merged_data[target]  # Assuming you have a target variable

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# split_point = int(len(X) * 0.9)
split_point = len(X) - 1
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

X_train

Unnamed: 0,sleepBPM,wakingBPM,inBed_minutes
0,0.924324,0.855072,0.696219
1,0.894595,0.855072,0.835463
2,0.851351,0.913043,0.759511
3,0.902703,0.869565,0.758105
4,0.852703,0.855072,0.810146
5,0.893243,0.898551,0.630113
6,0.898649,0.855072,0.540097
7,0.918919,0.956522,0.863593
8,0.882432,0.898551,0.461333
9,0.816216,0.84058,0.675121


In [8]:
from torch.utils.data import TensorDataset, DataLoader

np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(42)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# torch.cuda.set_device(torch.cuda.current_device())

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
batch_size = 32  # Adjust based on your computational resources

training_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)


In [9]:
class GrogginessModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GrogginessModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size) 
        self.layer2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.layer2(x)
        return x

input_size = len(features) 
hidden_size = 5  
output_size = 1
model = GrogginessModel(input_size, hidden_size, output_size)
len(features)

3

In [10]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) 
lambda1 = 0.1
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


In [11]:
def evaluate_model(model, test_loader, criterion):
  model.eval()  # Set the model to evaluation mode
  total_loss = 0.0
  with torch.no_grad():  # No need to track gradients
    for inputs, targets in test_loader:
      outputs = model(inputs)
      loss = criterion(outputs, targets)
      l1_norm = sum(p.abs().sum() for p in model.parameters())
      total_loss += loss + lambda1 * l1_norm
      
  avg_loss = total_loss / len(test_loader.dataset)
  return avg_loss
  
class EarlyStopping:
  def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
    self.patience = patience
    self.verbose = verbose
    self.counter = 0
    self.best_score = None
    self.early_stop = False
    self.val_loss_min = np.Inf
    self.delta = delta
    self.path = path

  def __call__(self, val_loss, model):
    score = -val_loss

    if self.best_score is None:
      self.best_score = score
      self.save_checkpoint(val_loss, model)
    elif score < self.best_score + self.delta:
      self.counter += 1
      # print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
      if self.counter >= self.patience:
          self.early_stop = True
    else:
      self.best_score = score
      self.save_checkpoint(val_loss, model)
      self.counter = 0

  def save_checkpoint(self, val_loss, model):
    if self.verbose:
      print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
    torch.save(model.state_dict(), self.path)
    self.val_loss_min = val_loss


In [12]:
epochs = 5000
early_stopping = EarlyStopping(patience=1000, verbose=False)

for epoch in range(epochs):
    model.train()  # Set the model to training mode
    val_loss = evaluate_model(model, test_loader, criterion)
    early_stopping(val_loss, model)
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, targets)  # Compute loss
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        total_loss = loss + lambda1 * l1_norm
        total_loss.backward()  # Backward pass
        optimizer.step()  # Optimize
        
        running_loss += total_loss.item() * inputs.size(0)  # Total loss for the batch

    epoch_loss = running_loss / len(train_loader.dataset)  # Average loss for the epoch

    if epoch % 1000 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')


Epoch 1/5000, Loss: 46.2413
Epoch 1001/5000, Loss: 4.5848
Epoch 2001/5000, Loss: 4.2283
Epoch 3001/5000, Loss: 4.1191
Epoch 4001/5000, Loss: 4.0240


In [13]:
print(features)

['sleepBPM', 'wakingBPM', 'inBed_minutes']


In [14]:
realtime_data_values = X_test[features].iloc[0]
print(realtime_data_values)
realtime_data_values = realtime_data_values.tolist()

sleepBPM         0.828378
wakingBPM        0.797101
inBed_minutes    0.548536
Name: 41, dtype: float64


In [15]:

realtime_data_tensor = torch.tensor([realtime_data_values], dtype=torch.float32)  # Wrap in a list to keep dimensions

model.eval()  # Set the model to evaluation mode
with torch.no_grad():
  predicted_grogginess = model(realtime_data_tensor)
print(predicted_grogginess[0])
print(y_test)


tensor([5.6464])
    grogginess
41           7


In [16]:

avg_test_loss = evaluate_model(model, test_loader, criterion)
print(f'Average test loss: {avg_test_loss:.4f}')

Average test loss: 2.7883


In [17]:
model = GrogginessModel(input_size, hidden_size, output_size)
model.load_state_dict(torch.load('checkpoint.pt'))
model.eval()

total_loss = 0.0
for inputs, targets in test_loader:
  with torch.no_grad():
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    total_loss += loss.item() * inputs.size(0)

avg_loss = total_loss / len(test_loader.dataset)
print(f'Average test loss: {avg_loss:.4f}')

Average test loss: 1.5431
