In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay, f1_score, roc_auc_score, matthews_corrcoef

import torch
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary

In [None]:
df = pd.read_csv('sample_data/final_df_sleep_v0.csv')
df

In [None]:
df = df.drop(columns = ['time'])
df

In [None]:
X = df.drop(columns = ['target'])
y = df['target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    test_size = 0.3, 
                                                    random_state = 42, 
                                                    shuffle = True)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
class Data(Dataset):
  def __init__(self, X_train, y_train):
    self.X = torch.from_numpy(X_train.astype(np.float32))
    self.y = torch.from_numpy(y_train).type(torch.LongTensor)
    self.len = self.X.shape[0]
  
  def __getitem__(self, index):
    return self.X[index], self.y[index]
    
  def __len__(self):
    return self.len

In [None]:
BATCH_SIZE = 64
INPUT_DIM = 6
HIDDEN_LAYERS = 25
OUTPUT_DIM = 2

In [None]:
train_data = Data(X_train.to_numpy(), y_train.to_numpy())
test_data = Data(X_test.to_numpy(), y_test.to_numpy())

train_loader = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)
test_loader = DataLoader(test_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)

In [None]:
class NN(nn.Module):
  def __init__(self):
    super(NN, self).__init__()
    self.linear1 = nn.Linear(INPUT_DIM, HIDDEN_LAYERS)
    self.linear2 = nn.Linear(HIDDEN_LAYERS, HIDDEN_LAYERS * 2)
    self.dropout = nn.Dropout(0.2)
    self.linear3 = nn.Linear(HIDDEN_LAYERS * 2, HIDDEN_LAYERS)
    self.linear4 = nn.Linear(HIDDEN_LAYERS, OUTPUT_DIM)
    self.dropout = nn.Dropout(0.2)

  def forward(self, x):
    x = torch.relu(self.linear1(x))
    x = torch.relu(self.linear2(x))
    x = self.dropout(x)
    x = torch.relu(self.linear3(x))
    x = self.linear4(x)
    return x

In [None]:
NN = NN().to(device)
summary(NN, (1, 6))

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(NN.parameters(), lr = 0.001)

In [None]:
NUM_EPOCHS = 20

for epoch in range(NUM_EPOCHS):
  loop = tqdm(train_loader)
  for batch, (X, y) in enumerate(loop):
    X, y = X.to(device), y.to(device)

    pred = NN(X)

    loss = criterion(pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loop.set_description(f"Epoch [{epoch}/{NUM_EPOCHS}]")

In [None]:
correct, total = 0, 0
list_pred, list_true = [], []

with torch.no_grad():
  for (X, y) in test_loader:
    X, y = X.to(device), y.to(device)

    output = NN(X)
    pred = output.argmax(dim = 1, keepdim = True)
    list_pred.append(pred.cpu().numpy())
    list_true.append(y.cpu().numpy())
    correct += pred.eq(y.view_as(pred)).sum().item()
accuracy = correct / len(test_loader.dataset)
print(f'Accuracy: {accuracy:>0.4f} %')

In [None]:
list_pred = np.concatenate(list_pred)
list_pred = np.hstack(list_pred).tolist()
list_true = np.concatenate(list_true)
list_true = np.hstack(list_true).tolist()
cm = confusion_matrix(list_pred, list_true)
cm

In [None]:
nn_AS_test = accuracy_score(list_pred, list_true)
nn_BAS_test = balanced_accuracy_score(list_pred, list_true)
nn_f1_weighted_test = f1_score(list_pred, list_true, average = 'weighted')
catboost_roc_auc_score_test = roc_auc_score(list_pred, list_true)
nn_mcc_test = matthews_corrcoef(list_pred, list_true)

nn_test_scores_dict = {'Accuracy' : nn_AS_test,
                       'Balanced accuracy' : nn_BAS_test,
                       'F1 score' : nn_f1_weighted_test,
                       'ROC auc score' : catboost_roc_auc_score_test,
                       'Matthews correlation coefficient' : nn_mcc_test}

nn_test_scores_df = pd.DataFrame.from_dict(nn_test_scores_dict, orient = 'index', columns = ['Scores'])

nn_test_scores_df

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ['Non-REM sleep', 'REM sleep'])
disp.plot()
plt.show()

In [None]:
torch.save(NN.state_dict(), './nn_sleep.pth')