In [1]:
# ! pip install coremltools

In [2]:
import math
import json
import numpy as np
import pandas as pd
import coremltools as ct
import matplotlib.pyplot as plt
from datetime import datetime
from collections import defaultdict

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

scikit-learn version 1.2.2 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.
TensorFlow version 2.14.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.


In [3]:
STAGE = {
    'In Bed': 0,
    'Awake': 1,
    'Asleep': 2,
    'REM': 3,
    'Core': 4,
    'Deep': 5,
    'Unknown': 6
}

In [6]:
def load_json(path: str) -> dict:
    with open(path, 'r') as file:
        return json.load(file)

def parse_time(time: str) -> datetime:
    return datetime.strptime(time, '%Y-%m-%d %H:%M:%S')

def to_min(cur_time: datetime, start_time: datetime) -> int:
    return int((cur_time - start_time).total_seconds()//60)

def process_sleep_data(sleep_data: dict) -> dict:
    stage_data = defaultdict(dict)

    for data in sleep_data:
        if data['stage'] in ['Awake', 'REM', 'Core', 'Deep']:
            start_time = parse_time(data['start_time'])
            end_time = parse_time(data['end_time'])
            date = end_time.strftime('%Y-%m-%d')
    
            if not date in stage_data: stage_data[date]['stages'] = list()
            stage_data[date]['stages'].append({
                'start_time': start_time,
                'end_time': end_time,
                'stage': STAGE[data['stage']]
            })

    for date, data in stage_data.items():
        stage_data[date]['stages'] = sorted(data['stages'], key=lambda x: x['start_time'])
        stage_data[date]['start_time'] = stage_data[date]['stages'][0]['start_time']
        stage_data[date]['end_time'] = stage_data[date]['stages'][-1]['end_time']
        stage_data[date]['day_of_week'] = stage_data[date]['start_time'].weekday()

    return stage_data

def post_process(stage_data: dict) -> pd.DataFrame:
    ml_data = list()
    for date, data in stage_data.items():
        for item in data['stages']:
            start_time = to_min(item['start_time'], data['start_time'])
            end_time = to_min(item['end_time'], data['start_time'])
            for time in range(start_time, end_time):
                ml_data.append((
                    date,
                    data['day_of_week'],
                    time,
                    item['stage']
                ))
    return pd.DataFrame(ml_data, columns=['date', 'day_of_week', 'time', 'stage'])

In [25]:
data = load_json('./sleepData.json')
data = data['sleep_data']

stage_data = process_sleep_data(data)
ml_data = post_process(stage_data)

print(ml_data['stage'].value_counts())
print(ml_data['day_of_week'].value_counts())
print(max(ml_data['time']))
ml_data

stage
4    109175
3     53845
5     27051
1      4937
Name: count, dtype: int64
day_of_week
1    30793
0    29565
4    29299
2    28885
3    27739
6    26211
5    22516
Name: count, dtype: int64
813


Unnamed: 0,date,day_of_week,time,stage
200,2024-02-20,1,200,3
201,2024-02-20,1,201,3
202,2024-02-20,1,202,3
203,2024-02-20,1,203,3
204,2024-02-20,1,204,3
...,...,...,...,...
395,2024-02-20,1,395,4
396,2024-02-20,1,396,4
397,2024-02-20,1,397,4
398,2024-02-20,1,398,4


In [9]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')
print('Current backend accelerator:', device)

Current backend accelerator: mps


In [10]:
# Split data into features and target
X = ml_data[['day_of_week', 'time']].values
y = ml_data['stage'].values

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a PyTorch Dataset
class SleepStageDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)

train_dataset = SleepStageDataset(X_train, y_train)
val_dataset = SleepStageDataset(X_val, y_val)

# DataLoader
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [11]:
class TransformerModel(nn.Module):
    def __init__(self, num_features, num_classes, dim_model=128, num_heads=4, num_encoder_layers=3, dropout_rate=0.1):
        super(TransformerModel, self).__init__()
        self.embedding_layer = nn.Linear(num_features, dim_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_model, nhead=num_heads, dropout=dropout_rate)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(dim_model, num_classes)

    def forward(self, src):
        src = self.embedding_layer(src)
        src = src.unsqueeze(1)
        output = self.transformer_encoder(src)
        output = output.squeeze(1)
        output = self.dropout(output)
        output = self.classifier(output)
        return output

In [12]:
def train(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}')

def evaluate(model, val_loader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total}%')

In [21]:
# Model parameters
num_features = 2  # day_of_week and time
num_classes = len(STAGE)  # Assuming y_train is accessible here

# Initialize the model, criterion, and optimizer
model = TransformerModel(num_features, num_classes)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train and evaluate the model
train(model, train_loader, criterion, optimizer, num_epochs=1)
evaluate(model, val_loader)

Epoch 1, Loss: 1.0587799029334635
Accuracy: 55.97405261268653%


In [23]:
model.eval()
test_input = torch.tensor([(i, 2) for i in range(0, 8 * 60)]).to(device).type(torch.float32)
test_output = model(test_input)
print(test_output.data)
_, predicted = torch.max(test_output.data, 1)
print(predicted)

tensor([[-7.9295, -1.3123, -8.1016,  ...,  1.5704,  0.0349, -7.4154],
        [-7.9295, -1.3123, -8.1016,  ...,  1.5704,  0.0349, -7.4154],
        [-7.9295, -1.3123, -8.1016,  ...,  1.5704,  0.0349, -7.4154],
        ...,
        [-7.9295, -1.3123, -8.1017,  ...,  1.5704,  0.0349, -7.4154],
        [-7.9295, -1.3123, -8.1017,  ...,  1.5704,  0.0349, -7.4154],
        [-7.9295, -1.3123, -8.1017,  ...,  1.5704,  0.0349, -7.4154]],
       device='mps:0')
tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4

In [None]:
# Convert to TorchScript
model.eval()
model.to('cpu')
torch.save(model, 'pytorchmodel.pth')

X_data, _ = list(train_loader)[0]

traced_model = torch.jit.trace(model, X_data)
traced_model.save('traced_model.pt')

model = ct.convert(
    traced_model,
    convert_to='mlprogram',
    inputs=[ct.TensorType(shape=X_data.shape)]
)
 
# Save the converted model.
model.save('sleepCoreML.mlpackage')