In [528]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [529]:
person = 'Mark'
data = pd.read_csv(f'{person}/autosleep-{person}.csv')
data['date'] = pd.to_datetime(data['ISO8601'], errors='coerce')
data['date'] = data['date'].apply(lambda x: x.tz_localize(None).normalize() if x is not pd.NaT else pd.NaT)
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['inBed_minutes'] = pd.to_timedelta(data['inBed']).dt.total_seconds() / 60
data['day_of_week'] = data['date'].dt.dayofweek
data['is_weekend'] = data['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)
data['date'][len(data) - 10:]

  data['date'] = pd.to_datetime(data['ISO8601'], errors='coerce')


32   2024-03-31
33   2024-04-01
34   2024-04-02
35   2024-04-03
36   2024-04-04
37   2024-04-05
38   2024-04-06
39   2024-04-07
40   2024-04-08
41   2024-04-09
Name: date, dtype: datetime64[ns]

In [530]:
def rename_columns(df):
  column_mappings = {
      'Name': 'name',
      'Date': 'date',
      'Grogginess': 'grogginess',
      'How many alarms did you set?': 'num_alarms',
      'What time did you set your first alarm for?': 'first_alarm',
      'Did you take sleep-aiding medicine (not weed, ie melatonin/antihystamine)?': 'sleep_medicine',
      "What was the temperature when you woke up? (Don't include °F, just the number)": 'waking_temp',
      'Were you intoxicated when you went to sleep?': 'intoxicated',
      'Were you sick when you went to sleep?': 'sick',
      'Did you eat within an hour of going to bed?': 'eat_before_bed',
      'Did you sleep alone?': 'sleep_alone',
      'Did you sleep in your own bed/room?': 'own_bed',
      'How stressed were you last night?': 'stress',
      'Did you use your phone before going to sleep?': 'phone_before_bed',
      "When was the latest you ingested caffeine before going to bed? (Don't answer if N/A)": 'caffeine_before_bed'
  }
  
  df_renamed = df.rename(columns=column_mappings)
  return df_renamed

In [531]:
suppl = rename_columns(pd.read_csv('form.csv'))

suppl['date'] = pd.to_datetime(suppl['date'], format='%m/%d/%Y').dt.normalize()


def time_to_minutes(time_str):
  if pd.isna(time_str) or time_str == '':
      return 0
  time = pd.to_datetime(time_str, format='%I:%M:%S %p')
  return time.hour * 60 + time.minute

suppl['first_alarm'] = suppl['first_alarm'].apply(time_to_minutes)
suppl['caffeine_before_bed'] = suppl['caffeine_before_bed'].apply(time_to_minutes)
lag_periods = 1
suppl['grogginess_lag1'] = suppl['grogginess'].shift(lag_periods)
suppl = suppl[suppl['name'] == person].fillna(0)
# suppl

In [532]:
merged_data = pd.merge(suppl, data, on='date', how='inner').fillna(0)


column_headers = list(merged_data.columns.values)

print(column_headers)



['Timestamp', 'name', 'date', 'grogginess', 'num_alarms', 'first_alarm', 'sleep_medicine', 'waking_temp', 'intoxicated', 'sick', 'eat_before_bed', 'sleep_alone', 'own_bed', 'stress', 'phone_before_bed', 'caffeine_before_bed', 'grogginess_lag1', 'ISO8601', 'fromDate', 'toDate', 'bedtime', 'waketime', 'inBed', 'awake', 'fellAsleepIn', 'sessions', 'asleep', 'asleepAvg7', 'efficiency', 'efficiencyAvg7', 'quality', 'qualityAvg7', 'deep', 'deepAvg7', 'sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7', 'wakingBPM', 'wakingBPMAvg7', 'hrv', 'hrvAvg7', 'sleepHRV', 'sleepHRVAvg7', 'SpO2Avg', 'SpO2Min', 'SpO2Max', 'respAvg', 'respMin', 'respMax', 'tags', 'notes', 'year', 'month', 'day', 'inBed_minutes', 'day_of_week', 'is_weekend']


In [533]:
to_normalize = ['sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7', 'wakingBPM', 'wakingBPMAvg7', 
            'hrv', 'hrvAvg7', 'sleepHRV', 'sleepHRVAvg7', 'SpO2Avg', 'SpO2Min', 'SpO2Max', 
            'respAvg', 'respMin', 'respMax', 'inBed_minutes', 'day_of_week', 'is_weekend', 
            'first_alarm', 'num_alarms', 'sleep_medicine', 'waking_temp', 'intoxicated', 
            'sick', 'eat_before_bed', 'sleep_alone', 'own_bed', 'stress', 'phone_before_bed', 
            'caffeine_before_bed']


for cat in to_normalize:
    merged_data[cat] = merged_data[cat] / (merged_data[cat].max())


merged_data.dropna(axis=1, inplace=True)
merged_data.head(10)


Unnamed: 0,Timestamp,name,date,grogginess,num_alarms,first_alarm,waking_temp,intoxicated,sick,eat_before_bed,...,respMin,respMax,tags,notes,year,month,day,inBed_minutes,day_of_week,is_weekend
0,2/20/2024 2:28:47,Mark,2024-02-19,7,0.666667,0.9,0.931507,0.0,0.0,0.0,...,0.884615,0.860465,0.0,0.0,2024,2,19,0.557312,0.0,0.0
1,2/20/2024 18:38:08,Mark,2024-02-21,6,0.5,1.0,0.931507,0.0,0.0,0.0,...,0.846154,0.767442,0.0,0.0,2024,2,21,0.348814,0.333333,0.0
2,2/22/2024 13:31:58,Mark,2024-02-22,8,0.666667,1.0,1.0,0.0,0.0,0.0,...,0.884615,0.790698,0.0,0.0,2024,2,22,0.527652,0.5,0.0
3,2/23/2024 14:16:26,Mark,2024-02-23,4,0.0,0.0,0.958904,1.0,0.0,0.0,...,1.0,0.906977,0.0,0.0,2024,2,23,0.786561,0.666667,0.0
4,2/25/2024 16:45:03,Mark,2024-02-24,7,0.333333,1.0,0.958904,1.0,0.0,0.0,...,1.0,0.883721,0.0,0.0,2024,2,24,0.716403,0.833333,1.0
5,2/25/2024 16:45:55,Mark,2024-02-25,4,0.0,0.0,0.931507,0.0,0.0,0.0,...,0.846154,0.953488,0.0,0.0,2024,2,25,0.826087,1.0,1.0
6,2/27/2024 3:10:27,Mark,2024-02-26,5,1.0,0.9,0.931507,0.0,0.0,1.0,...,0.923077,0.744186,0.0,0.0,2024,2,26,0.449605,0.0,0.0
7,2/27/2024 14:30:15,Mark,2024-02-27,7,1.0,0.9,0.931507,0.0,0.0,1.0,...,0.923077,0.767442,0.0,0.0,2024,2,27,0.519763,0.166667,0.0
8,3/7/2024 23:24:11,Mark,2024-03-07,7,0.833333,0.9,0.958904,0.0,0.0,0.0,...,0.846154,1.0,0.0,0.0,2024,3,7,0.706522,0.5,0.0
9,3/11/2024 0:01:02,Mark,2024-03-10,9,0.833333,0.5,0.945205,0.0,0.0,0.0,...,0.923077,0.674419,0.0,0.0,2024,3,10,0.133399,1.0,1.0


In [534]:
features = ['sleepBPM', 
            # 'sleepBPMAvg7', 
            # 'dayBPM', 
            # 'dayBPMAvg7',
              'wakingBPM',
              #  'wakingBPMAvg7', 
            # 'hrv',
              # 'hrvAvg7', 
              # 'sleepHRV', 
              # 'sleepHRVAvg7', 
              # 'SpO2Avg', 
              # 'SpO2Min', 'SpO2Max', 
            # 'respAvg', 'respMin', 'respMax', 
            #'year',
            #  'month', 
            #'day', 
            'inBed_minutes', 
            # 'day_of_week', 
            'is_weekend',
              ]
features += [
# 'first_alarm', 
# 'num_alarms', 
'sleep_medicine', 
# 'waking_temp', 
# 'intoxicated', 
# 'sick', 
#'eat_before_bed', 
# 'sleep_alone',
#  'own_bed', 
# 'stress', 
'phone_before_bed', 
'caffeine_before_bed',
#  'grogginess_lag1'
 ]

target = ['grogginess']
# merged_data[features].head().to_csv('out.csv')
# from sklearn.model_selection import train_test_split

features = [f for f in features if f in merged_data.columns]
X = merged_data[features]  # Features
y = merged_data[target]  # Assuming you have a target variable

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# split_point = int(len(X) * 0.9)
split_point = len(X) - 1
X_train, X_test = X[:split_point], X[split_point:]
y_train, y_test = y[:split_point], y[split_point:]

# X_train

In [535]:
from torch.utils.data import TensorDataset, DataLoader

np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
  torch.cuda.manual_seed_all(42)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# torch.cuda.set_device(torch.cuda.current_device())

X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
batch_size = 32  # Adjust based on your computational resources

training_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)


In [536]:
class GrogginessModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GrogginessModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size) 
        self.layer2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.layer2(x)
        return x

input_size = len(features) 
hidden_size = 5  
output_size = 1
model = GrogginessModel(input_size, hidden_size, output_size)
len(features)

5

In [537]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) 
lambda1 = 0.1
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


In [538]:
def evaluate_model(model, test_loader, criterion):
  model.eval()  # Set the model to evaluation mode
  total_loss = 0.0
  with torch.no_grad():  # No need to track gradients
    for inputs, targets in test_loader:
      outputs = model(inputs)
      loss = criterion(outputs, targets)
      l1_norm = sum(p.abs().sum() for p in model.parameters())
      total_loss += loss + lambda1 * l1_norm
      
  avg_loss = total_loss / len(test_loader.dataset)
  return avg_loss
  
class EarlyStopping:
  def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
    self.patience = patience
    self.verbose = verbose
    self.counter = 0
    self.best_score = None
    self.early_stop = False
    self.val_loss_min = np.Inf
    self.delta = delta
    self.path = path

  def __call__(self, val_loss, model):
    score = -val_loss

    if self.best_score is None:
      self.best_score = score
      self.save_checkpoint(val_loss, model)
    elif score < self.best_score + self.delta:
      self.counter += 1
      # print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
      if self.counter >= self.patience:
          self.early_stop = True
    else:
      self.best_score = score
      self.save_checkpoint(val_loss, model)
      self.counter = 0

  def save_checkpoint(self, val_loss, model):
    if self.verbose:
      print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
    torch.save(model.state_dict(), self.path)
    self.val_loss_min = val_loss


In [539]:
epochs = 100000
early_stopping = EarlyStopping(patience=1000, verbose=False)

for epoch in range(epochs):
    model.train()  # Set the model to training mode
    val_loss = evaluate_model(model, test_loader, criterion)
    early_stopping(val_loss, model)
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, targets)  # Compute loss
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        total_loss = loss + lambda1 * l1_norm
        total_loss.backward()  # Backward pass
        optimizer.step()  # Optimize
        
        running_loss += total_loss.item() * inputs.size(0)  # Total loss for the batch

    epoch_loss = running_loss / len(train_loader.dataset)  # Average loss for the epoch

    if epoch % 1000 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')


Epoch 1/100000, Loss: 36.9470


Epoch 1001/100000, Loss: 3.8533
Epoch 2001/100000, Loss: 3.4122
Epoch 3001/100000, Loss: 3.2252
Epoch 4001/100000, Loss: 3.1264
Epoch 5001/100000, Loss: 3.0541
Epoch 6001/100000, Loss: 2.9992
Epoch 7001/100000, Loss: 2.8823
Epoch 8001/100000, Loss: 2.6837
Epoch 9001/100000, Loss: 2.5389
Epoch 10001/100000, Loss: 2.5109
Epoch 11001/100000, Loss: 2.5101
Epoch 12001/100000, Loss: 2.5100
Epoch 13001/100000, Loss: 2.5100
Epoch 14001/100000, Loss: 2.5100
Epoch 15001/100000, Loss: 2.5100
Epoch 16001/100000, Loss: 2.5100
Epoch 17001/100000, Loss: 2.5099
Epoch 18001/100000, Loss: 2.5100
Epoch 19001/100000, Loss: 2.5099
Epoch 20001/100000, Loss: 2.5099
Epoch 21001/100000, Loss: 2.5100
Epoch 22001/100000, Loss: 2.5100
Epoch 23001/100000, Loss: 2.5099
Epoch 24001/100000, Loss: 2.5099
Epoch 25001/100000, Loss: 2.5100
Epoch 26001/100000, Loss: 2.5099
Epoch 27001/100000, Loss: 2.5100
Epoch 28001/100000, Loss: 2.5100
Epoch 29001/100000, Loss: 2.5100
Epoch 30001/100000, Loss: 2.5099
Epoch 31001/100000,

In [540]:
print(features)

['sleepBPM', 'wakingBPM', 'inBed_minutes', 'is_weekend', 'phone_before_bed']


In [541]:
realtime_data_values = X_test[features].iloc[0]
print(realtime_data_values)
realtime_data_values = realtime_data_values.tolist()
realtime_data_tensor = torch.tensor([realtime_data_values], dtype=torch.float32)  # Wrap in a list to keep dimensions

sleepBPM            0.738295
wakingBPM           0.777778
inBed_minutes       0.711462
is_weekend          1.000000
phone_before_bed    1.000000
Name: 25, dtype: float64


In [542]:
avg_test_loss = evaluate_model(model, test_loader, criterion)
print(f'Average test loss: {avg_test_loss:.4f}')

Average test loss: 0.6728


In [543]:
model = GrogginessModel(input_size, hidden_size, output_size)
model.load_state_dict(torch.load('checkpoint.pt'))
model.eval()

total_loss = 0.0
for inputs, targets in test_loader:
  with torch.no_grad():
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    total_loss += loss.item() * inputs.size(0)
    predicted_grogginess = model(realtime_data_tensor)


avg_loss = total_loss / len(test_loader.dataset)
print(f'Average test loss: {avg_loss:.4f}')
print(predicted_grogginess[0])
print(y_test)

Average test loss: 0.0009
tensor([5.9701])
    grogginess
25           6
