## Load model

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
import os
import cv2
import mediapipe as mp
from torchvision import datasets, models, transforms

model_ft = models.mobilenet_v2(pretrained=True)

num_ftrs = model_ft.classifier[1].in_features

model_ft.classifier = nn.Sequential(
    # Flattening the output
    nn.Flatten(),
    # Dropout 30%
    nn.Dropout(0.4),
    nn.Linear(num_ftrs, 65),
    nn.ReLU(),
    # Dropout 30%
    nn.Dropout(0.4),
    nn.Linear(65, 2),
    nn.Softmax(dim=1)
)

model_ft.load_state_dict(torch.load('MobileNetV2_last.pt', map_location=torch.device('cpu')))
model_ft.eval()



MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [3]:
from PIL import Image

# Load the image
input_image = Image.open("gray_eye/test/Drowsiness/frame_0000.jpg")

# Define transformations
preprocess = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Preprocess the image
input_tensor = preprocess(input_image)

# Create a mini-batch as expected by the model
input_batch = input_tensor.unsqueeze(0)

# Move the input to CPU
input_batch = input_batch.to('cpu')
labels = ['Drowsiness', 'NotDrowsiness']
# Make the prediction
with torch.no_grad():
    output = model_ft(input_batch)
    _, preds = torch.max(abs(output), 1)
print(labels[preds])

RuntimeError: output with shape [1, 224, 224] doesn't match the broadcast shape [3, 224, 224]

## Code version1

In [3]:
# import cv2
# import time
# import mediapipe as mp
# # 
# # 
# def display_text(frame, text):
#     # Add text to the frame
#     cv2.putText(frame, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

# Points =[ 63, 117, 293, 346]
# mp_eye_landmarks = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)
# cv2.namedWindow('eye', cv2.WINDOW_NORMAL)
# cv2.namedWindow('All', cv2.WINDOW_NORMAL)
# points_to_cut1 = [464, 443, 265, 450]
# points_to_cut2 = [124, 223, 244, 230]
# cut_size = (224, 224)
# cap = cv2.VideoCapture(0)  
# # video_path = '/run/user/1000/gvfs/mtp:host=OPPO_CPH1911_MVTCZSGEEA4H85F6/Bộ nhớ trong dùng chung/DCIM/Camera/VID20240405004440.mp4'
# # cap = cv2.VideoCapture(video_path)
# while True:
#     ret, frame = cap.read()  
#     if not ret:
#         break
    
#     start = time.time()
#     points_landmarks = []
#     # Đọc ảnh
#     # image = cv2.imread(image_path)

#     # Chuyển đổi ảnh sang không gian màu RGB
#     image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

#     # Nhận diện landmarks của khuôn mặt trên ảnh
#     results = mp_eye_landmarks.process(image_rgb)
#     # cropped_image = 0

#     # Kiểm tra xem có landmarks nào được nhận diện không
#     if results.multi_face_landmarks:
#         # Lấy danh sách các landmarks của khuôn mặt đầu tiên trong ảnh
#         face_landmarks = results.multi_face_landmarks[0]

#         # Tạo danh sách tọa độ của các điểm LEFT_EYE
        

#         for index in Points:
#             landmark = face_landmarks.landmark[index]
#             landmark_x = int(landmark.x * frame.shape[1])
#             landmark_y = int(landmark.y * frame.shape[0])
#             points_landmarks.append((landmark_x, landmark_y))

#         # Vẽ một hình tròn đỏ tại mỗi điểm LEFT_EYE trên ảnh
#         # for landmark in points_landmarks:
#         #     cv2.circle(image, landmark, 1, (0, 0, 255), -1)  

#         start_px = min(points_landmarks[0][0],points_landmarks[1][0])
#         start_py = min(points_landmarks[0][1],points_landmarks[2][1])
#         end_px = max(points_landmarks[2][0],points_landmarks[3][0])
#         end_py = max(points_landmarks[1][1],points_landmarks[3][1])
#         # cropped_image = frame[start_py:end_py, start_px:end_px]

#         # Preprocess the image
#         input_image = Image.fromarray(frame[start_py:end_py, start_px:end_px])
#         input_tensor = preprocess(input_image)

#         # Create a mini-batch as expected by the model
#         input_batch = input_tensor.unsqueeze(0)

#         # Move the input to CPU
#         input_batch = input_batch.to('cpu')
        
#         # Make the prediction
#         with torch.no_grad():
#             output = model_ft(input_batch)
#             _, preds = torch.max(abs(output), 1)
#         display_text(frame,labels[preds])
#         end = time.time()
#         totalTime = end-start
#         fps = 1/totalTime
#         # Display FPS on the frame
#         cv2.putText(frame, f"FPS: {int(fps)}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#         cv2.imshow('All', frame)
#         cv2.imshow('eye', frame[start_py:end_py, start_px:end_px])
#     else:
#         display_text(frame,"No Face")
#         cv2.imshow('eye', frame)
#         cv2.imshow('All', frame)
#         continue
#     # Calculate FPS
    
    
#     print("FPS: ",fps)
#     # Display frame
    
    
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()




## Code Version 2

In [9]:
import cv2
import time
import mediapipe as mp
# 
# 
def display_text(frame, text):
    # Add text to the frame
    cv2.putText(frame, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

points_to_cut1 = [464, 443, 265, 450]
points_to_cut2 = [124, 223, 244, 230]
cut_size = (224, 224)

# Khởi tạo Face Mesh detection
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils


cv2.namedWindow('eye', cv2.WINDOW_NORMAL)
cv2.namedWindow('All', cv2.WINDOW_NORMAL)
# video_path = '//media/rambo/HDD/University/DATN/Data/Microsleep/data.mp4'
# cap = cv2.VideoCapture(video_path)
cap = cv2.VideoCapture(0)  
while True:
    ret, frame = cap.read()  
    if not ret:
        break
    
    start = time.time()
    # Chuyển đổi ảnh sang không gian màu RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1) as face_mesh:
            results = face_mesh.process(frame_rgb)
            if results.multi_face_landmarks:
                 # Tính toán tọa độ của các điểm cần cắt cho vùng 1
               
                landmarks = results.multi_face_landmarks[0].landmark
                min_x1 = min(landmarks[i].x * frame.shape[1] for i in points_to_cut1)
                min_y1 = min(landmarks[i].y * frame.shape[0] for i in points_to_cut1)
                max_x1 = max(landmarks[i].x * frame.shape[1] for i in points_to_cut1)
                max_y1 = max(landmarks[i].y * frame.shape[0] for i in points_to_cut1)

                # Tính toán tọa độ của các điểm cần cắt cho vùng 2
               
                min_x2 = min(landmarks[i].x * frame.shape[1] for i in points_to_cut2)
                min_y2 = min(landmarks[i].y * frame.shape[0] for i in points_to_cut2)
                max_x2 = max(landmarks[i].x * frame.shape[1] for i in points_to_cut2)
                max_y2 = max(landmarks[i].y * frame.shape[0] for i in points_to_cut2)

                # Cắt vùng mắt từ ảnh và resize về kích thước 224x224
                cut_region1 = frame[int(min_y1):int(max_y1), int(min_x1):int(max_x1)]
                resized_cut_region1 = cv2.resize(cut_region1, cut_size)

                cut_region2 = frame[int(min_y2):int(max_y2), int(min_x2):int(max_x2)]
                resized_cut_region2 = cv2.resize(cut_region2, cut_size)

                # Hiển thị vùng cắt đã resize
                # cv2.imshow("Resized Cut Region 1", resized_cut_region1)
                # cv2.imshow("Resized Cut Region 2", resized_cut_region2)
                # Ghép lại các vùng cắt đã resize
                merged_image = cv2.hconcat([resized_cut_region2, resized_cut_region1])
                cv2.imshow("eye",merged_image)



                # Preprocess the image
                input_image = Image.fromarray(merged_image)
                input_tensor = preprocess(input_image)

                # Create a mini-batch as expected by the model
                input_batch = input_tensor.unsqueeze(0)

                # Move the input to CPU
                input_batch = input_batch.to('cpu')
                
                # Make the prediction
                with torch.no_grad():
                    output = model_ft(input_batch)
                    _, preds = torch.max(abs(output), 1)
                display_text(frame,labels[preds])
                end = time.time()
                totalTime = end-start
                fps = 1/totalTime
                # Display FPS on the frame
                cv2.putText(frame, f"FPS: {int(fps)}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.imshow('All', frame)
                # cv2.imshow('eye', merged_image)
            else:
                display_text(frame,"No Face")
                cv2.imshow('eye', frame)
                cv2.imshow('All', frame)
                continue
            # Calculate FPS
    
    
    # print("FPS: ",fps)
    
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread (0x96e4890).
Cannot move to target thread (0x8ef3b60)

QObject::moveToThread: Current thread (0x8ef3b60) is not the object's thread

error: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'


: 