# Classical Artificial Neural Network - Prediction of Sleep and Awake States Including Engineered Features

In [None]:
# Import needed libraries and packages
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
import random

import warnings
# To ignore all warnings
warnings.filterwarnings("ignore")

RSEED=42

### Getting the Data into Dataframe

In [None]:
# Define the directory containing the files
data_directory = '../data/file_per_night'

If you already run the following cells and have the files 'train_data.parquet' and 'test_data.parque' in the 'data/file_per_night' directory, you can skip the following cells and continue with importing those two files into new dataframes.

In [None]:
# Get a list of all files in the directory
file_paths = [os.path.join(data_directory, file) for file in os.listdir(data_directory) if os.path.isfile(os.path.join(data_directory, file))]

# Concatenate all dataframes into a single dataframe
dataframes = [pd.read_parquet(file) for file in file_paths]
full_dataframe = pd.concat(dataframes, ignore_index=True)

To have only two classes, we set the event 'onset' to 'awake' and 'wakeup' to 'sleep'

In [None]:
full_dataframe['event'].unique()

In [None]:
for i in full_dataframe.index:
    if full_dataframe['event'][i] == 'onset':
        full_dataframe['event'][i] = 'awake'
    elif full_dataframe['event'][i] == 'wakeup':
        full_dataframe['event'][i] = 'sleep'
    else:
        continue

### Remove Nas

In [None]:
full_dataframe.isna().sum()

In [None]:
# remove Nas
full_dataframe.drop(['night', 'anglez_enmo_ratio'], axis = 1, inplace =True)
full_dataframe.dropna(inplace=True)

In [None]:
full_dataframe.isna().sum()

### Train / Test Split

Next, we perform the regular train-test-split for training and evaluation of the model as usual:

In [None]:
# Split the data into train and test sets with stratification
train_data, test_data = train_test_split(full_dataframe, test_size=0.25, stratify=full_dataframe['event'], random_state=RSEED)
train_data = train_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

Safe the train and test sets for later:

In [None]:
train_data.to_parquet(os.path.join(data_directory, "train_data.parquet"))
test_data.to_parquet(os.path.join(data_directory, "test_data.parquet"))

--> continue here, if you already created 'train_data.parque' and 'test_data.parquet'

In [None]:
# use this to read the train and test set, if they were already created
train_data = pd.read_parquet(os.path.join(data_directory, "train_data.parquet"))
test_data = pd.read_parquet(os.path.join(data_directory, "test_data.parquet"))

In [None]:
train_data.head()

### Preprocessing

For this model, we select all the newly engineered features that we created including several statistical values for the corresponding bins such as mean, standard deviation and maximum, but also so values associated with previous timepoints. For further information regarding feature engineering refer to [this file]().

Feature Extraction

In [None]:
# Extract features and target variable from the data
X_train = train_data.drop(['event', 'series_id', 'step'], axis=1).reset_index(drop=True)
y_train = train_data['event'].reset_index(drop=True)

X_test = test_data.drop(['event', "series_id", "step"], axis=1).reset_index(drop=True)
y_test = test_data['event'].reset_index(drop=True)

Oversampling

In [None]:
# For oversampling (because we have more sleep than awake states), we apply SMOTE
smote = SMOTE(random_state=RSEED)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
X_train= X_train_smote
y_train = y_train_smote

If you want to reuse the 'smoted' data sets again (also for other models), you could now save it and afterwards only need to reload them.

In [None]:
# Safe the oversampled train data into PARQUET and CSV files respectively
X_train.to_parquet(os.path.join(data_directory, "X_train_smote.parquet"))
y_train.to_csv(os.path.join(data_directory, "y_train_smote.csv"))

In [None]:
# Use this to reload the oversampled train data
X_train = pd.read_parquet(os.path.join(data_directory, "X_train_smote.parquet"))
y_train = pd.read_csv(os.path.join(data_directory, "y_train.csv"))['event']


X_test = test_data.drop(['event', "series_id", "step"], axis=1).reset_index(drop=True)
y_test = test_data['event'].reset_index(drop=True)

## Setting up and training of the model

Model type : Classic Artificial Neural Network

In [None]:
# Defining the ANN model architecture with 2 hidden layers and 1 output layer (binary classification)
def create_model(N_features):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(N_features,)))
    model.add(Dropout(0.4))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))  
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Define the number of features for the model 
N_features = 31  
model = create_model(N_features)

In [None]:
# Choose file_path for of the train and test data
file_path = '/data/file_per_night/'

In [None]:
# Read the train and test data
y_train = pd.read_csv(os.path.join(file_path, 'y_train_smote.csv'))['event']
X_train = pd.read_parquet(os.path.join(file_path, 'train_data_smote.parquet'))
X_test = pd.read_parquet(os.path.join(file_path, 'test_data.parquet')).drop(['event', "series_id", "step"], axis=1).reset_index(drop=True)
y_test = pd.read_parquet(os.path.join(file_path, 'test_data.parquet'))['event'].reset_index(drop=True)


In [None]:
# Encode the target variable for train and test
y_train = LabelEncoder().fit_transform(y_train) #sleep = 1, awake = 0
y_test = LabelEncoder().fit_transform(y_test) #sleep = 1, awake = 0

In [None]:
# Scale the features
scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train the model and evaluate it on the test set
history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=64, validation_split=0.2 )
loss, accuracy = model.evaluate(X_test_scaled, y_test)

In [None]:
# Save the trained ANN model in an h5 file
model.save('model/ANN_trained_on_full_data.h5')

In [None]:
# Load the trained ANN model
model = load_model('model/ANN_trained_on_full_data.h5')

In [None]:
# Predict the target variable for the test set
y_pred = model.predict(X_test_scaled)

In [None]:
# Convert the predicted probabilities to binary values and save them in a list 
event = [] 
for i in y_pred:
    if i.round(0) == 0.0:
        event.append('awake')
    else:
        event.append('sleep')
y_pred = event

Evaluation of the model performance:

In [None]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy on test data: {accuracy:.2%}")
print(f"Precision on test data: {precision:.2%}")
print(f"Recall on test data: {recall:.2%}")
print(f"F1 Score on test data: {f1:.2%}")

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted')
plt.xticks(ticks = [0.5,1.5], labels= ['awake', 'sleep'])
plt.ylabel('Actual')
plt.yticks(ticks = [0.5,1.5], labels= ['awake', 'sleep'])
plt.title('Confusion Matrix')
plt.show()