# Eye Drowsiness Detection

### Opis podatkov s strani kaggle:
This dataset is a forked version of the original MRL Eye Dataset, containing infrared eye images categorized into Awake and Sleepy states. It is split into training, validation, and test sets, comprising over 85,000 images captured under various lighting conditions using multiple sensors. This dataset is tailored for tasks such as eye detection, gaze estimation, blink detection, and drowsiness analysis in computer vision.

## Importi in uporaba GPU

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision

import numpy as np
from PIL import Image

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
torch.__version__

'2.5.1+cu124'

In [6]:
print("Number of GPU: ", torch.cuda.device_count())
print("GPU name: ", torch.cuda.get_device_name())

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: ", device)

Number of GPU:  1
GPU name:  NVIDIA GeForce MX350
Using device:  cuda


## Nalaganje podatkov in transformacije - augmentacije

In [3]:
# Define transformations for the images
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load training data
train_dataset = datasets.ImageFolder(root='data/train', transform=transform)

# Load validation data
val_dataset = datasets.ImageFolder(root='data/val', transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Print class labels to verify
print(train_dataset.classes)  # ['awake', 'sleepy']
print(train_dataset.class_to_idx)  # {'awake': 0, 'sleepy': 1}

['awake', 'sleepy']
{'awake': 0, 'sleepy': 1}


In [7]:
image, label = train_dataset[30000]
label

1

## Definiranje in učenje modela

In [3]:
class EnhancedModel(nn.Module):
    def __init__(self, num_classes):
        super(EnhancedModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1) #self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  lahko probas ker so grayscale slike
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))
        
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [7]:
#Uporabim Enhanced model
num_classes = 2 
model = EnhancedModel(num_classes)
model = model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

print(model)

EnhancedModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=2, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


**Učenje nad 4-imi epochi, zaradi velike časovne zahtevnosti (50000 slik):**

In [14]:
for epoch in range(4):
    print(f'Training epoch {epoch}...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("Using device: ", device)

    running_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)

        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Loss: {running_loss / len(train_loader):.4f}')

Training epoch 0...
Using device:  cuda
Loss: 0.1500
Training epoch 1...
Using device:  cuda
Loss: 0.0904
Training epoch 2...
Using device:  cuda
Loss: 0.0754
Training epoch 3...
Using device:  cuda
Loss: 0.0660


## Shranjevanje modela

In [15]:
# TA MODEL JE KASNEJE (OB ODDAJI) BIL PREIMENOVAN V EYE_DETECTION_MODEL
torch.save(model.state_dict(), 'trained_Model_1.pth')

## Ponovno nalaganje modela

In [8]:
net = EnhancedModel(num_classes)
net.load_state_dict(torch.load('trained_Model_1.pth'))

  net.load_state_dict(torch.load('trained_Model_1.pth'))


<All keys matched successfully>

## Preverjanje natančnosti

In [17]:
correct = 0
total = 0

net.eval()

with torch.no_grad():
    for data in val_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = (100 * correct) / total

print(f'Accuracy: {accuracy}%')

Accuracy: 97.97997644287398%


**Končna natančnost: 97,97%**

## Dodatno testiranje
Ob pregledu kode sem opazil, da sem pri nalaganju podatkov nad val_dataset izvedel iste transformacije (in augmentacije) kot nad train_dataset. Zato sem nato nad testno množico izvedel le nujne transformacije, da lahko model primerno deluje: Resize, ToTensor in normalizacija, katera se je izkazala za nujno, saj je brez nje model dosegel le 54% natančnost, z njo pa 98%.

In [18]:
test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
test_dataset = datasets.ImageFolder(root='data/test', transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [19]:
correct = 0
total = 0

net.eval()

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = (100 * correct) / total

print(f'Accuracy: {accuracy}%')

Accuracy: 98.44532124138743%


### Končna natančnost
98,4%