In [1]:
from model import Model
from train import trainModel
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dset
from torchvision import transforms
import torchvision.models as models
import cv2
from chatbot import chat_loop
from sleepy import trigger_sleepy as buzzer_on
from chatbot import trigger_sleepy as prompt_bot
from thermostat import trigger_thermostat as thermostat_on

In [None]:
chat_thread = threading.Thread(target=chat_loop)
chat_thread.daemon = True
chat_thread.start()

In [2]:
if torch.cuda.is_available():
    device = 'cuda'

In [3]:
root = 'train'
dataset = dset.ImageFolder(root=root,
                           transform=transforms.Compose([
                               transforms.Resize(64),
                               transforms.CenterCrop(64),
                               transforms.ToTensor(),
                           ]))

train_loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True, num_workers=2, drop_last = True)

In [4]:
model = models.resnet18()
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential(nn.Linear(model.fc.in_features, 512),
                         nn.ReLU(),
                         nn.Linear(512, 1),
                         nn.Sigmoid())
criterion = nn.BCELoss()
optim = optim.Adam(model.parameters(), lr = 1e-03)

In [5]:
for epoch in range(5):
    for idx, data in enumerate(train_loader):
        images, labels = data
        optim.zero_grad()
        model = model.to(device)
        labels = labels.to(device)
        images = images.to(device)
        labels = labels.to(torch.float32)
        outputs = model(images).squeeze()
        loss = criterion(outputs.to(torch.float32), labels.to(torch.float32))
        loss.backward()
        optim.step()
    print(f'Loss: {loss}')


Loss: 0.17297877371311188
Loss: 0.12622374296188354
Loss: 0.07490165531635284
Loss: 0.08751120418310165
Loss: 0.14981615543365479


In [6]:
(outputs > 0.5).to(torch.float64), labels


(tensor([0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1.,
         0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0.,
         1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0.,
         1., 0., 0., 1., 0., 1., 1., 0., 1., 1.], device='cuda:0',
        dtype=torch.float64),
 tensor([0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1.,
         0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0.,
         1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 0.,
         1., 0., 0., 1., 0., 1., 1., 0., 1., 1.], device='cuda:0'))

In [7]:
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [8]:
cap = cv2.VideoCapture(700)
model = model.cpu()
while True:
    ret, frame = cap.read()
    if not ret:
        break
    input = preprocess(frame)
    input_batch = input.unsqueeze(0)
    with torch.no_grad():
        output = model(input_batch)
    output = (output > 0.6).to(torch.int32)
    msg = "Drowsy"
    if output == 1:
        msg = "Not Drowsy"
    cv2.putText(frame, msg, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Live Classification', frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break  
cap.release()
cv2.destroyAllWindows()

tensor([[0.1789]])
tensor([[0.7551]])
tensor([[0.3917]])
tensor([[0.7421]])
tensor([[0.8336]])
tensor([[0.2058]])
tensor([[0.8125]])
tensor([[0.3130]])
tensor([[0.1086]])
tensor([[0.3100]])
tensor([[0.6733]])
tensor([[0.2562]])
tensor([[0.6418]])
tensor([[0.1464]])
tensor([[0.4048]])
tensor([[0.2274]])
tensor([[0.6479]])
tensor([[0.8563]])
tensor([[0.5445]])
tensor([[0.5041]])
tensor([[0.7299]])
tensor([[0.7125]])
tensor([[0.1805]])
tensor([[0.6820]])
tensor([[0.4567]])
tensor([[0.5785]])
tensor([[0.7094]])
tensor([[0.4209]])
tensor([[0.7310]])
tensor([[0.5069]])
tensor([[0.1244]])
tensor([[0.7940]])
tensor([[0.8687]])
tensor([[0.7651]])
tensor([[0.5689]])
tensor([[0.2647]])
tensor([[0.2895]])
tensor([[0.2035]])
tensor([[0.3910]])
tensor([[0.9357]])
tensor([[0.4762]])
tensor([[0.5747]])
tensor([[0.2956]])
tensor([[0.2310]])
tensor([[0.3052]])
tensor([[0.4770]])
tensor([[0.2412]])
tensor([[0.3832]])
tensor([[0.2475]])
tensor([[0.7719]])
tensor([[0.7960]])
tensor([[0.4095]])
tensor([[0.3

In [21]:
import dlib
import numpy as np

preprocess = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 
])

def predict_eye_state(eye_image):
    input_tensor = transforms.ToTensor()(eye_image)
    input_batch = input_tensor.unsqueeze(0)  
    with torch.no_grad():
        output = model(input_batch)  
    return output

hog_face_detector = dlib.get_frontal_face_detector()
dlib_facelandmark = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

count = 0
cond = True

def calculate_EAR(eye):
    x1, y1 = eye[1]
    x5, y5 = eye[5]
    x2, y2 = eye[2]
    x4, y4 = eye[4]
    x0, y0 = eye[0]
    x3, y3 = eye[3]

    A = np.linalg.norm((x1 - x5, y1 - y5))
    B = np.linalg.norm((x2 - x4, y2 - y4))
    C = np.linalg.norm((x0 - x3, y0 - y3))

    # Calculate EAR
    ear_aspect_ratio = (A + B) / (2 * C)
    return ear_aspect_ratio


cap = cv2.VideoCapture(700)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1000)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1000)

while cond:
    _, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = hog_face_detector(gray)
    for face in faces:
        face_landmarks = dlib_facelandmark(gray, face)
        leftEye = []
        rightEye = []

        for n in range(36, 42):
            x = face_landmarks.part(n).x
            y = face_landmarks.part(n).y
            leftEye.append((x, y))


        for n in range(42, 48):
            x = face_landmarks.part(n).x
            y = face_landmarks.part(n).y
            rightEye.append((x, y))

        right_eye_pts = np.array(rightEye)
        right_eye_box = cv2.boundingRect(right_eye_pts)
        right_eye_region = frame[right_eye_box[1]:right_eye_box[1] + right_eye_box[3],
                                  right_eye_box[0]:right_eye_box[0] + right_eye_box[2]]

        output = predict_eye_state(right_eye_region)

        left_ear = calculate_EAR(leftEye)
        right_ear = calculate_EAR(rightEye)
        EAR = (left_ear + right_ear) / 2
        EAR = round(EAR, 2)
        cv2.putText(frame, f'Drowsiness count: {count}', (350, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 0, 0), 4)

        if EAR < .20 or output < 0.35:
            count += 1
            cv2.putText(frame, "DROWSY", (20, 200), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 0, 255), 4)

        if count >= 100:
            cv2.putText(frame, "TAKE A BREAK!", (300, 700), cv2.FONT_HERSHEY_SIMPLEX, 3, (0,0,255), 5)
            cap.release()
            cv2.destroyAllWindows()
            cond = False

    # Display the processed frame
    cv2.imshow("Drowsiness Detector", frame)

    # Check for key press to exit
    key = cv2.waitKey(1)
    if key == 27:
        cap.release()
        cv2.destroyAllWindows()
        cond = False


Eye State Output: tensor([[0.7368]])
Eye State Output: tensor([[0.1688]])
Eye State Output: tensor([[0.2226]])
Eye State Output: tensor([[0.3123]])
Eye State Output: tensor([[0.3959]])
Eye State Output: tensor([[0.2818]])
Eye State Output: tensor([[0.4787]])
Eye State Output: tensor([[0.2685]])
Eye State Output: tensor([[0.2443]])
Eye State Output: tensor([[0.3887]])
Eye State Output: tensor([[0.1790]])
Eye State Output: tensor([[0.4457]])
Eye State Output: tensor([[0.3102]])
Eye State Output: tensor([[0.4171]])
Eye State Output: tensor([[0.3837]])
Eye State Output: tensor([[0.4668]])
Eye State Output: tensor([[0.4132]])
Eye State Output: tensor([[0.2563]])
Eye State Output: tensor([[0.8945]])
Eye State Output: tensor([[0.2656]])
Eye State Output: tensor([[0.5458]])
Eye State Output: tensor([[0.0767]])
Eye State Output: tensor([[0.6620]])
Eye State Output: tensor([[0.7469]])
Eye State Output: tensor([[0.2542]])
Eye State Output: tensor([[0.8300]])
Eye State Output: tensor([[0.5527]])
E

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 256, 1, 1])