# Drowsiness Detection Model Training

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
import random
import os

from tqdm import tqdm

if torch.cuda.is_available():
  device = torch.device("cuda:0")
  print("gpu")
else:
  device = torch.device("cpu")

gpu


## Load Extracted Features and Perform Preprocessing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# load features
root = "/content/drive/My Drive/Drowsiness Detection/Features/"
maxlen = 1500

sc = MinMaxScaler()

X = [] # shape - (5, 90, 500, 6)
y = [] # shape - (5, 90)
for person in range(1, 6):
    videos = []
    y_temp = []
    for mode in range(3):
        folder = []
        for video in range(10):
            df = pd.read_csv(root + '/0' + str(person) + '/' + str(mode) + '/' + str(video) + '.csv', index_col=False, header=None)

            # remove nans
            # df.interpolate(method='linear', axis=0, inplace=True, limit_direction='both') # linear
            df.interpolate(method='spline', order=3, axis=0, inplace=True, limit_direction='both') # cubic spline

            # get distance of pupils
            df[[3, 4, 5, 6]] = df[[3, 4, 5, 6]].diff()
            df.iloc[0,3:7] = 0
            df[[3, 4, 5, 6]] = df[[3, 4, 5, 6]] ** 2
            df[8] = np.sqrt(df[3] + df[4])
            df[9] = np.sqrt(df[5] + df[6])
            df.drop(columns=[3, 4, 5, 6], inplace=True)

            temp = df.to_numpy() # feature vector is [left eye angle, right eye angle, mouth angle, head pitch angle, left pupil distance, right pupil distance]
            if temp.shape[0] > maxlen:
                temp = temp[:maxlen]
            else:
                temp = np.pad(temp, ((0, maxlen - temp.shape[0]), (0, 0)), mode='constant')
            
            folder.extend(temp)
            for _ in range(3):
                y_temp.append(mode)
        # normalize features for each folder
        folder = np.array(folder)
        folder = sc.fit_transform(folder)

        # split each video into 3
        for video in range(30):
            videos.append(folder[video * maxlen // 3: (video + 1) * maxlen // 3])
        
    X.append(np.array(videos))
    y.append(np.array(y_temp))
X = np.array(X)
y = np.array(y)

print(X.shape)
print(y.shape)

Mounted at /content/drive
(5, 90, 500, 6)
(5, 90)


## Custom LSTM/BiLSTM Class

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers, bidirectional):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = maxlen // 3
        self.bidirectional = bidirectional
        D = 2 if self.bidirectional else 1
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
        self.dropout = nn.Dropout(0.25)
        
        self.fc = nn.Linear(D * hidden_size, num_classes)

    def forward(self, x):
        D = 2 if self.bidirectional else 1
        h_0 = Variable(torch.zeros(
            D * self.num_layers, x.size(0), self.hidden_size)).to(device)
        
        c_0 = Variable(torch.zeros(
            D * self.num_layers, x.size(0), self.hidden_size)).to(device)
        
        # Propagate input through LSTM

        if self.bidirectional:
            h_out, _ = self.lstm(x, (h_0, c_0))
            out = self.fc(h_out[:, -1, :])
            return out
        
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size)

        out = self.fc(h_out)
        
        return out

## Model Training

In [None]:
num_epochs = 2000
learning_rate = 0.005

input_size = 6
hidden_size = 128
num_layers = 1
num_classes = 3
device = torch.device('cuda')

def accuracy(outputs, y):
    preds = torch.max(outputs, 1)[1]
    return torch.sum(preds == y) / y.size(0)

def per_class_accuracy(outputs, y, num_classes):
    preds = torch.max(outputs, 1)[1]
    acc = [0 for _ in range(num_classes)]
    for c in range(num_classes):
        acc[c] = ((preds == y) * (y == c)).float().sum() / max((y == c).sum(), 1)
    return acc

def train_model(bidirectional, verbose, features=[0, 1, 2, 3, 4, 5]):
    # set seeds
    seed = 42
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

    total_acc = 0
    total_acc_per_class = [0 for _ in range(3)]
    for i in range(5):
        X_train = np.empty([0, maxlen // 3, 6])
        y_train = np.empty([0])
        X_test = np.empty([0, maxlen // 3, 6])
        y_test = np.empty([0])
        for person in range(5):
            for mode in range(3):
                for video in range(10):
                    if video >= 2 * i and video < 2 * (i + 1):
                        X_test = np.concatenate((X_test, X[person][mode * 30 + 3 * video:mode * 30 + 3 * (video + 1)]))
                        y_test = np.concatenate((y_test, y[person][mode * 30 + 3 * video:mode * 30 + 3 * (video + 1)]))
                    else:
                        X_train = np.concatenate((X_train, X[person][mode * 30 + 3 * video:mode * 30 + 3 * (video + 1)]))
                        y_train = np.concatenate((y_train, y[person][mode * 30 + 3 * video:mode * 30 + 3 * (video + 1)]))
        X_train, y_train = shuffle(X_train, y_train)
        X_test, y_test = shuffle(X_test, y_test)

        X_train = X_train[:, :, features]
        X_test = X_test[:, :, features]

        X_train = Variable(torch.Tensor(X_train)).to(device)
        y_train = Variable(torch.LongTensor(y_train)).to(device)
        X_test = Variable(torch.Tensor(X_test)).to(device)
        y_test = Variable(torch.LongTensor(y_test)).to(device)

        lstm = LSTM(num_classes, len(features), hidden_size, num_layers, bidirectional).to(device)

        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
        #optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

        # Train the model
        print('----------------------TRAINING FOLD %d----------------------' % (i + 1))
        for epoch in tqdm(range(num_epochs)):
            outputs = lstm(X_train)
            optimizer.zero_grad()
            
            # obtain the loss function
            loss = criterion(outputs, y_train)
            
            loss.backward()
            
            optimizer.step()
            if verbose and epoch > 0 and epoch % 100 == 0:
                print("Epoch: %d, loss: %1.5f, accuracy: %2.5f%%" % (epoch, loss.item(), accuracy(outputs, y_train) * 100))
            
        
        test_outputs = lstm(X_test)
        test_loss = criterion(test_outputs, y_test)
        test_acc = accuracy(test_outputs, y_test) * 100
        test_acc_per_class = per_class_accuracy(test_outputs, y_test, 3)
        print('RESULTS FOR FOLD %d' % (i + 1))
        print('Test Loss: %1.5f, Test Accuracy: %2.5f%%' % (test_loss.item(), test_acc))
        print('Accuracy per Class')
        for i, c in enumerate(['Alert', 'Low Vigilant', 'Drowsy']):
            print('%s: %2.5f%%' % (c, test_acc_per_class[i] * 100))
            total_acc_per_class[i] += test_acc_per_class[i] * 20
        total_acc += test_acc / 5

    print('Total Accuracy: %2.5f%%' % (total_acc))
    print('Accuracy per Class')
    for i, c in enumerate(['Alert', 'Low Vigilant', 'Drowsy']):
        print('%s: %2.5f%%' % (c, total_acc_per_class[i]))

### Using LSTM

In [None]:
# lstm
train_model(bidirectional=False, features=[0, 1, 2, 3, 4, 5], verbose=False)

----------------------TRAINING FOLD 1----------------------


100%|██████████| 2000/2000 [02:03<00:00, 16.21it/s]


RESULTS FOR FOLD 1
Test Loss: 0.80398, Test Accuracy: 64.44444%
Accuracy per Class
Alert: 43.33333%
Low Vigilant: 53.33334%
Drowsy: 96.66666%
----------------------TRAINING FOLD 2----------------------


100%|██████████| 2000/2000 [02:01<00:00, 16.40it/s]


RESULTS FOR FOLD 2
Test Loss: 0.35743, Test Accuracy: 83.33334%
Accuracy per Class
Alert: 100.00000%
Low Vigilant: 53.33334%
Drowsy: 96.66666%
----------------------TRAINING FOLD 3----------------------


100%|██████████| 2000/2000 [02:01<00:00, 16.42it/s]


RESULTS FOR FOLD 3
Test Loss: 0.35865, Test Accuracy: 85.55556%
Accuracy per Class
Alert: 90.00000%
Low Vigilant: 70.00000%
Drowsy: 96.66666%
----------------------TRAINING FOLD 4----------------------


100%|██████████| 2000/2000 [02:01<00:00, 16.45it/s]


RESULTS FOR FOLD 4
Test Loss: 0.95504, Test Accuracy: 46.66667%
Accuracy per Class
Alert: 36.66667%
Low Vigilant: 40.00000%
Drowsy: 63.33333%
----------------------TRAINING FOLD 5----------------------


100%|██████████| 2000/2000 [02:01<00:00, 16.45it/s]


RESULTS FOR FOLD 5
Test Loss: 0.69886, Test Accuracy: 68.88889%
Accuracy per Class
Alert: 53.33334%
Low Vigilant: 76.66666%
Drowsy: 76.66666%
Total Accuracy: 69.77779%
Accuracy per Class
Alert: 64.66666%
Low Vigilant: 58.66667%
Drowsy: 86.00000%


### Using BiLSTM

In [None]:
# bilstm
train_model(bidirectional=True, features=[0, 1, 2, 3, 4, 5], verbose=False)

----------------------TRAINING FOLD 1----------------------


100%|██████████| 2000/2000 [04:20<00:00,  7.68it/s]


RESULTS FOR FOLD 1
Test Loss: 0.63273, Test Accuracy: 84.44444%
Accuracy per Class
Alert: 80.00000%
Low Vigilant: 80.00000%
Drowsy: 93.33334%
----------------------TRAINING FOLD 2----------------------


100%|██████████| 2000/2000 [04:19<00:00,  7.69it/s]


RESULTS FOR FOLD 2
Test Loss: 0.40216, Test Accuracy: 81.11111%
Accuracy per Class
Alert: 80.00000%
Low Vigilant: 80.00000%
Drowsy: 83.33333%
----------------------TRAINING FOLD 3----------------------


100%|██████████| 2000/2000 [04:19<00:00,  7.70it/s]


RESULTS FOR FOLD 3
Test Loss: 0.32726, Test Accuracy: 91.11111%
Accuracy per Class
Alert: 100.00000%
Low Vigilant: 86.66666%
Drowsy: 86.66666%
----------------------TRAINING FOLD 4----------------------


100%|██████████| 2000/2000 [04:20<00:00,  7.68it/s]


RESULTS FOR FOLD 4
Test Loss: 0.23892, Test Accuracy: 92.22223%
Accuracy per Class
Alert: 93.33334%
Low Vigilant: 93.33334%
Drowsy: 90.00000%
----------------------TRAINING FOLD 5----------------------


100%|██████████| 2000/2000 [04:20<00:00,  7.69it/s]


RESULTS FOR FOLD 5
Test Loss: 0.48148, Test Accuracy: 84.44444%
Accuracy per Class
Alert: 93.33334%
Low Vigilant: 83.33333%
Drowsy: 76.66666%
Total Accuracy: 86.66667%
Accuracy per Class
Alert: 89.33333%
Low Vigilant: 84.66666%
Drowsy: 86.00000%
