In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F

import sklearn 
from sklearn.model_selection import train_test_split

from tqdm import tqdm

import os
import sys

current_folder_path = os.path.abspath('')

## Loading the training dataset
The numpy array has the following shapes :
- 6556 rows, reprensenting 6556 event occurences
- 1445 columns representing :  
-- 1 column of step value (initialized for every serie id (ie test subject))  
-- 721 columns of **anglez** samples, centered 30 minutes before and after an event  
-- 721 columns of **enmo** samples, centered 30 minutes before and after an event  
-- 1 column of timestamp, ie the moment of the sample   
-- 1 column of **event** which is the **target** of the dataset. (0: *onset*, 1: *wakeup*)  

In [None]:
offset = 75
data = np.load(f"{current_folder_path}/training_data/data_{offset}.npy")

#[step, :, :, timestamp, event]
step = data[:,0]
X = data[:, 1:-2]
Y = data[:, -1]

print(X[0:2, :])
print(Y[0:2])


[[0.00000e+00 0.00000e+00 0.00000e+00 ... 0.00000e+00 0.00000e+00
  0.00000e+00]
 [2.18420e+01 2.85359e+01 3.44465e+01 ... 4.70000e-03 4.70000e-03
  4.60000e-03]]
[0. 0.]


## Data splitting

In [23]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)

train_features = torch.from_numpy(X_train).float()
train_labels = torch.from_numpy(Y_train).float().unsqueeze(1)
test_features = torch.from_numpy(X_test).float()
test_labels = torch.from_numpy(Y_test).float().unsqueeze(1)

train_features = F.normalize(train_features)
test_features = F.normalize(test_features)

print(train_features.size(), train_labels.size(), test_features.size(), test_labels.size())

torch.Size([4917, 1442]) torch.Size([4917, 1]) torch.Size([1639, 1442]) torch.Size([1639, 1])


## NN architecture

In [24]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.input_size = input_size
        self.hidden_size= hidden_size
        self.output_size = output_size
        
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()

        self.input_layer = nn.Linear(input_size, hidden_size)
        
        self.layer1 = nn.Linear(hidden_size, hidden_size//2)
        self.layer2 = nn.Linear(hidden_size//2, hidden_size//4)
        
        self.batchnorm1 = nn.BatchNorm1d(hidden_size//2)
        self.batchnorm2 = nn.BatchNorm1d(hidden_size//4)

        self.output_layer = nn.Linear(hidden_size//4, output_size)
        self.sigmoid = nn.Sigmoid()  
        self.dropout = nn.Dropout1d(p=0.1)
            
    def forward(self, x):
        x = self.input_layer(x)
        x = self.relu(x)
        
        x = self.layer1(x)
        x = self.relu(x)
        x = self.batchnorm1(x)
        
        x = self.layer2(x)
        x = self.relu(x)
        x = self.batchnorm2(x)
        
        x = self.dropout(x)
        
        x = self.output_layer(x)  
        #x = self.relu(x)    
        x = self.sigmoid(x)
        return x        



## Model training

In [25]:
m, n = train_features.shape
input_size = n
print(input_size)
hidden_size = 64
output_size = 1

model = SimpleNN(input_size, hidden_size, output_size)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#optimize = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

model.train() 
running_loss = 0.0

losses_list = []
num_epochs = int(input("Number of epochs : "))    
for epoch in tqdm(range(num_epochs)):
    optimizer.zero_grad()
    outputs = model(train_features)
    loss = criterion(outputs, train_labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
    losses_list.append(loss.item())

print(f"Epoch {epoch+1}, Loss: {running_loss}")
print(np.round(losses_list[::2], 6))


1442


100%|██████████| 100/100 [00:02<00:00, 48.54it/s]

Epoch 100, Loss: 34.71864132583141
[0.756949 0.580346 0.560412 0.548129 0.536322 0.524146 0.517991 0.507865
 0.499804 0.495006 0.484265 0.476457 0.468436 0.46367  0.454037 0.452745
 0.438198 0.428868 0.422587 0.410308 0.40088  0.384408 0.368587 0.358067
 0.349259 0.334542 0.324217 0.313738 0.289402 0.285854 0.273766 0.264003
 0.255959 0.246959 0.242625 0.229477 0.226257 0.213829 0.20641  0.20045
 0.206041 0.189233 0.187891 0.186613 0.17468  0.172032 0.171065 0.164134
 0.157074 0.155987]





## Model testing

In [27]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    outputs = model(test_features) 
    #print(outputs[0:10].T)
    #predicted = torch.round(outputs.data)
    #_, predicted = torch.max(outputs.data, 1)
    predicted = torch.round(outputs.data)
    # predicted = np.round(outputs.numpy())
    total += test_labels.size(0)
    # correct += (predicted.numpy() == test_labels.numpy().T).sum().item()
    correct += (predicted == test_labels).sum().item()        
    
    #print(predicted.numpy()[0:10].T, test_labels.numpy().T[0, 0:10])
    #correct += (predicted == test_labels.numpy().T[0]).sum().item()
    print(total, correct)

    accuracy = 100 * correct / total
    print(f"Accuracy on test set: {accuracy:.2f}%")

tensor([[0.2831, 0.9855, 0.9718, 0.9463, 0.9850, 0.0416, 0.9175, 0.6917, 0.0509,
         0.1022]])
[[0. 1. 1. 1. 1. 0. 1. 1. 0. 0.]] [0. 1. 1. 1. 1. 0. 1. 1. 0. 0.]
1639 1460
Accuracy on test set: 89.08%
