In [6]:
from Utils.scanner import read_daphnet_dataset, flatten_dfs
from Utils.preprocess import Pipeline, PreProcessor

import os
from datetime import datetime
import numpy as np
import random

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import OneHotEncoder

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

In [7]:
# Load the database of patients and trails
patients_trails_dfs, feature_columns, label_column, sample_rate = read_daphnet_dataset()


# Randomly select 25% of patients for testing
num_patients = len(patients_trails_dfs)
test_indices = random.sample(range(num_patients), int(num_patients * 0.25))
train_indices = [i for i in range(num_patients) if i not in test_indices]
train_comb_trails_dfs = flatten_dfs([patients_trails_dfs[i] for i in train_indices])
test_comb_trails_dfs = flatten_dfs([patients_trails_dfs[i] for i in test_indices])
print(f"There are {len(train_comb_trails_dfs)} trails for training patient data, \
and {len(test_comb_trails_dfs)} trails for testing patient data.")

There are 26 trails for training patient data, and 9 trails for testing patient data.


In [8]:
class SimpleCNN(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(32, 16, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(16, num_classes)

    def forward(self, x):
        # x: (batch, seq_len, channels)
        x = x.permute(0, 2, 1)  # (batch, channels, seq_len)
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool(x)  # (batch, 64, 1)
        x = x.squeeze(-1) # (batch, 64)
        x = self.fc(x)    # (batch, num_classes)
        return x
    
    def save(self, current_date, directory_name, epoch):
        # Save the model
        model_name = f"model_{current_date}_epoch{epoch + 1}.pth"
        save_path = os.path.join(directory_name, model_name)
        torch.save(self.state_dict(), save_path)
        print(f"Model saved to {save_path}")

In [9]:
# Define hyperparameters to tune
slice_window_size =     4
slice_window_stride =   0.5 
pre_window_sec =        0

# Preprocess the data
pp = Pipeline()
preprocessor = PreProcessor(pre_window_sec=pre_window_sec, sample_rate=sample_rate)
pp.add_step(preprocessor.downsample_dfs_to_50hz)
pp.add_step(preprocessor.normalize)
p_train_comb_trails_dfs = pp.execute(train_comb_trails_dfs, feature_columns)
p_test_comb_trails_dfs = pp.execute(test_comb_trails_dfs, feature_columns)

# Slice the data into windows
input_windows, raw_target = preprocessor.slice_windows(dfs = p_train_comb_trails_dfs,
                                                        window_size = slice_window_size, 
                                                        stride = slice_window_stride, 
                                                        feature_columns = feature_columns,
                                                        target_cols= label_column)

#shuffle the data
indices = np.arange(len(input_windows))
np.random.shuffle(indices)
input_windows = input_windows[indices]
raw_target = raw_target[indices]

# One-hot encode the target labels
encoder = OneHotEncoder(sparse_output=False)
onehot_target = encoder.fit_transform(raw_target)

# Prepare data
X = torch.tensor(input_windows, dtype=torch.float32)  # (num_samples, seq_len, 9)
y = torch.tensor(onehot_target, dtype=torch.float32)  # (num_samples, 3)

# Reset the model
model = SimpleCNN(input_channels=len(feature_columns), num_classes=onehot_target.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create a directory with the format "Lnn_Date_YYYYMMDD"
current_date = datetime.now().strftime('%Y%m%d_%H%M%S')
directory_name = f"Models/Date_{current_date}_SimpleCNN"
if not os.path.exists(directory_name):
    os.makedirs(directory_name)
# print(directory_name)

# Train the SimpleCNN model with input_windows and onehot_target
num_epochs = 10
batch_size = 64
for epoch in range(num_epochs):
    permutation = torch.randperm(X.size(0))
    epoch_loss = 0.0
    for i in range(0, X.size(0), batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = X[indices], y[indices]

        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * batch_x.size(0)
    avg_loss = epoch_loss / X.size(0)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
    model.save(current_date, directory_name, epoch)


Epoch 1/10, Loss: 0.3720
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch1.pth
Epoch 2/10, Loss: 0.3370
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch2.pth
Epoch 3/10, Loss: 0.3268
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch3.pth
Epoch 4/10, Loss: 0.3196
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch4.pth
Epoch 5/10, Loss: 0.3162
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch5.pth
Epoch 6/10, Loss: 0.3126
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch6.pth
Epoch 7/10, Loss: 0.3097
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch7.pth
Epoch 8/10, Loss: 0.3067
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch8.pth
Epoch 9/10, Loss: 0.3021
Model saved to Models/Date_20250825_161512_SimpleCNN/model_20250825_161512_epoch9.pth
E

In [10]:
# Slice the validation data into windows
val_input_windows, val_raw_target = preprocessor.slice_windows(dfs = p_test_comb_trails_dfs,
                                                                window_size = slice_window_size, 
                                                                stride = slice_window_stride, 
                                                                feature_columns = feature_columns,
                                                                target_cols= label_column)

# One-hot encode the validation target labels
encoder = OneHotEncoder(sparse_output=False)
onehot_val_target = encoder.fit_transform(val_raw_target)

model_dir = directory_name
model_files = [f for f in os.listdir(model_dir) if f.endswith(".pth")]

# Prepare validation data
X_val = torch.tensor(val_input_windows, dtype=torch.float32)
y_val = np.argmax(onehot_val_target, axis=1)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)  # For CrossEntropyLoss

# evaluate the model
model.eval()

with torch.no_grad():
    logits = model(X_val)
    preds = torch.argmax(logits, dim=1).cpu().numpy()
    val_loss = criterion(logits, y_val_tensor).item()
    macro_f1 = f1_score(y_val, preds, average="macro")

print("Classification Report:")
print(classification_report(y_val, preds, digits=4))
print("Validation CrossEntropyLoss:", val_loss)
print("Macro F1 Score:", f1_score(y_val, preds, average="macro"))

Classification Report:
              precision    recall  f1-score   support

           0     0.9393    0.9857    0.9620     10453
           1     0.3968    0.1283    0.1939       764

    accuracy                         0.9273     11217
   macro avg     0.6680    0.5570    0.5779     11217
weighted avg     0.9023    0.9273    0.9096     11217

Validation CrossEntropyLoss: 0.26292428374290466
Macro F1 Score: 0.5779121166953458
