## Creating The DataSet for training Awake or drowsy labels

In [None]:
import cv2
import os

# Create a folder to store the captured images
output_folder = 'aw_2'
os.makedirs(output_folder, exist_ok=True)

# Initialize the RealSense camera capture
# Replace '0' with the appropriate index for the RealSense camera
cap = cv2.VideoCapture(0)

# Check if the camera is opened successfully
if not cap.isOpened():
    print("Error: Unable to open RealSense camera.")
else:
    count = 0  # Counter for captured images
    while True:
        if(count==15):
            break
        # Capture frame-by-frame
        ret, frame = cap.read()

        # Display the frame
        cv2.imshow('RealSense Camera Feed', frame)

        # Check for the Enter key press to capture an image
        if cv2.waitKey(1) == 13:  # 13 is the ASCII code for Enter key
            # Save the captured image
            img_name = f'image_{count}.png'
            img_path = os.path.join(output_folder, img_name)
            cv2.imwrite(img_path, frame)
            print(f"Saved {img_name}")
            count += 1

        # Check for the 'q' key press to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release the camera capture
cap.release()

# Close all OpenCV windows
cv2.destroyAllWindows()

## MODEL TRAINING

### Pytorch 

In [4]:
import os
import cv2
import time
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.nn import Sequential, Linear, ReLU
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn

from torchvision.transforms import ToTensor, Normalize, Compose, Resize
from PIL import Image


# Define the preprocessing transformation for input frames
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define a custom dataset class for your data
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))

    def __len__(self):
        return sum(len(files) for _, _, files in os.walk(self.root_dir))

    def __getitem__(self, idx):
        class_name = self.classes[idx // 30]  # Assuming there are 12 images per class
        class_dir = os.path.join(self.root_dir, class_name)
        img_idx = idx % 30    ## chnages here
        img_name = os.listdir(class_dir)[img_idx]
        img_path = os.path.join(class_dir, img_name)
        image = cv2.imread(img_path)
        if self.transform:
            image = self.transform(image)
        label = 0 if class_name == 'awake' else 1
        return image, label

# Define paths to your dataset directories
train_data_dir = 'Aw_Dr_Model_Data'

# Create custom dataset for training
train_dataset = CustomDataset(train_data_dir, transform=preprocess)

# Define data loader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Load the pre-trained ResNet18 model
model = Sequential(
    resnet18(pretrained=True),
    Linear(1000, 2)  # 2 output classes for 'Awake' and 'Drowsy'
)

# Freeze all layers except the last one
for param in model.parameters():
    param.requires_grad = False
for param in model[-1].parameters():
    param.requires_grad = True

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001)

# Train the model
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")


Epoch 1/25, Loss: 1.1863
Epoch 2/25, Loss: 0.8162
Epoch 3/25, Loss: 0.5794
Epoch 4/25, Loss: 0.4986
Epoch 5/25, Loss: 0.4359
Epoch 6/25, Loss: 0.3219
Epoch 7/25, Loss: 0.3539
Epoch 8/25, Loss: 0.2378
Epoch 9/25, Loss: 0.2248
Epoch 10/25, Loss: 0.1667
Epoch 11/25, Loss: 0.1676
Epoch 12/25, Loss: 0.1674
Epoch 13/25, Loss: 0.1418
Epoch 14/25, Loss: 0.1090
Epoch 15/25, Loss: 0.1312
Epoch 16/25, Loss: 0.1550
Epoch 17/25, Loss: 0.0955
Epoch 18/25, Loss: 0.1339
Epoch 19/25, Loss: 0.1120
Epoch 20/25, Loss: 0.1228
Epoch 21/25, Loss: 0.1123
Epoch 22/25, Loss: 0.1923
Epoch 23/25, Loss: 0.1880
Epoch 24/25, Loss: 0.0814
Epoch 25/25, Loss: 0.1513


In [7]:
# Print model parameters
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)


1.weight tensor([[-0.0092, -0.0099, -0.0098,  ...,  0.0252,  0.0246, -0.0271],
        [-0.0119,  0.0097, -0.0256,  ...,  0.0084,  0.0241,  0.0098]])
1.bias tensor([-0.0151,  0.0308])


In [None]:
# Define the file path where you want to save the model
model_path = "unquantized_resnet_model_for_awake_dr_pred.pth"

# Save the model
torch.save(model.state_dict(), model_path)

print(f"Model saved to {model_path}")

## MODEL INFERENCE

In [1]:
import os
import cv2
import time
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.nn import Sequential, Linear, ReLU
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset

In [None]:
# Set model to evaluation mode
model.eval()

# Open a video capture object (0 for webcam)
cap = cv2.VideoCapture(0)

# Check if the camera is opened correctly
if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

# Define class labels
class_labels = ['Awake', 'Drowsy']

# Read frames from the video capture object
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Preprocess the frame
    input_tensor = preprocess(frame).unsqueeze(0).to(device)

    # Perform inference
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted = torch.max(output, 1)
        predicted_label = class_labels[predicted.item()]

    # Display the predicted label on the frame
    cv2.putText(frame, predicted_label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('Video', frame)

    # Delay for 1 second
    time.sleep(0.01)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()


### Loading the model 

In [3]:
# Define the file path where the model is saved
model_path = "resnet_model_for_awake_dr_pred.pth"


# Define the model architecture
model_loaded = Sequential(
    resnet18(pretrained=True),
    Linear(1000, 2)  # Assuming 2 output classes
)

# Load the model state dictionary
model_loaded.load_state_dict(torch.load(model_path))

print(f"Model loaded from {model_path}")




Model loaded from resnet_model_for_awake_dr_pred.pth


In [16]:
model_loaded

Sequential(
  (0): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_runnin

In [4]:


# Calculate the number of parameters
total_params = sum(p.numel() for p in model_loaded.parameters())

# Calculate the memory footprint (assuming single-precision floating-point numbers)
memory_footprint = total_params * 4  # 4 bytes per parameter (32 bits)

print("Total Number of Parameters:", total_params)
print("Memory Footprint (in bytes):", memory_footprint)
print("Memory Footprint (in megabytes):", memory_footprint / (1024 * 1024))


Total Number of Parameters: 11691514
Memory Footprint (in bytes): 46766056
Memory Footprint (in megabytes): 44.599586486816406


In [6]:
# Iterate over the parameters and print their data types
for name, param in model_loaded.named_parameters():
    print(f"Parameter '{name}' has data type: {param.dtype}")

Parameter '0.conv1.weight' has data type: torch.float32
Parameter '0.bn1.weight' has data type: torch.float32
Parameter '0.bn1.bias' has data type: torch.float32
Parameter '0.layer1.0.conv1.weight' has data type: torch.float32
Parameter '0.layer1.0.bn1.weight' has data type: torch.float32
Parameter '0.layer1.0.bn1.bias' has data type: torch.float32
Parameter '0.layer1.0.conv2.weight' has data type: torch.float32
Parameter '0.layer1.0.bn2.weight' has data type: torch.float32
Parameter '0.layer1.0.bn2.bias' has data type: torch.float32
Parameter '0.layer1.1.conv1.weight' has data type: torch.float32
Parameter '0.layer1.1.bn1.weight' has data type: torch.float32
Parameter '0.layer1.1.bn1.bias' has data type: torch.float32
Parameter '0.layer1.1.conv2.weight' has data type: torch.float32
Parameter '0.layer1.1.bn2.weight' has data type: torch.float32
Parameter '0.layer1.1.bn2.bias' has data type: torch.float32
Parameter '0.layer2.0.conv1.weight' has data type: torch.float32
Parameter '0.laye

In [9]:
# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
import os
import cv2
import time
import torch
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.nn import Sequential, Linear, ReLU
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor, Normalize, Compose, Resize
from PIL import Image



# Define the preprocessing transformation for input frames
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
# Set model to evaluation mode
model_loaded.eval()

# Open a video capture object (0 for webcam)
cap = cv2.VideoCapture(0)

# Check if the camera is opened correctly
if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

# Define class labels
class_labels = ['Awake', 'Drowsy']

# Read frames from the video capture object
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Preprocess the frame
    input_tensor = preprocess(frame).unsqueeze(0)

    # Perform inference
    with torch.no_grad():
        output = model_loaded(input_tensor)
        _, predicted = torch.max(output, 1)
        predicted_label = class_labels[predicted.item()]

    # Display the predicted label on the frame
    cv2.putText(frame, predicted_label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the resulting frame
    cv2.imshow('Video', frame)

    # Delay for 1 second
    time.sleep(0.01)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()

### Inference with the emergency sound on drowsy

In [4]:
import cv2
import numpy as np
import pygame
import os
import torch
import time

# Initialize Pygame for audio playback
pygame.init()

# Load the beep sound
beep_sound = pygame.mixer.Sound("Beep_cut_2.wav")

# Set model to evaluation mode
model_loaded.eval()

# Open a video capture object (0 for webcam)
cap = cv2.VideoCapture(0)

# Check if the camera is opened correctly
if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

# Define class labels
class_labels = ['Awake', 'Drowsy']

# Initialize variables for time tracking
start_time = None
drowsy_detected = False

# Read frames from the video capture object
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Preprocess the frame
    input_tensor = preprocess(frame).unsqueeze(0)

    # Perform inference
    with torch.no_grad():
        output = model_loaded(input_tensor)
        _, predicted = torch.max(output, 1)
        predicted_label = class_labels[predicted.item()]

    # Display the predicted label on the frame
    cv2.putText(frame, predicted_label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Check if the predicted label is "Drowsy"
    if predicted_label == 'Drowsy':
        # If "Drowsy" label detected for the first time, start the timer
        if not drowsy_detected:
            start_time = time.time()
            drowsy_detected = True
        # If "Drowsy" label is detected for 2 seconds continuously, play the beep sound
        elif time.time() - start_time >= 2:
            beep_sound.play()
    else:
        # Reset timer and flag if label is not "Drowsy"
        start_time = None
        drowsy_detected = False

    # Display the resulting frame
    cv2.imshow('Video', frame)

    # Delay for 1 second
    time.sleep(0.01)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()
