In [2]:
pip install --upgrade pip


Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
    --------------------------------------- 0.0/1.8 MB 1.9 MB/s eta 0:00:01
   --------- ------------------------------ 0.4/1.8 MB 6.5 MB/s eta 0:00:01
   -------------- ------------------------- 0.7/1.8 MB 5.2 MB/s eta 0:00:01
   ---------------- ----------------------- 0.7/1.8 MB 5.2 MB/s eta 0:00:01
   --------------------- ------------------ 1.0/1.8 MB 4.3 MB/s eta 0:00:01
   ------------------------ --------------- 1.1/1.8 MB 4.1 MB/s eta 0:00:01
   ---------------------------- ----------- 1.3/1.8 MB 4.0 MB/s eta 0:00:01
   ------------------------------- -------- 1.4/1.8 MB 3.9 MB/s eta 0:00:01
   ---------------------------------- ----- 1.5/1.8 MB 3.8 MB/s eta 0:00:01
   ------------------------------------- -- 1.7/1.8 MB 3.7 MB/s eta 0:00:01
   ---------------------------------------- 1.8/1.8



In [3]:
pip install opencv-python mediapipe joblib scikit-learn pillow numpy


Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install torch torchvision torchaudio


Note: you may need to restart the kernel to use updated packages.


In [11]:
import cv2
import dlib
import numpy as np
import joblib
import winsound
from imutils import face_utils
from scipy.spatial import distance as dist
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image

# ==================== DEVICE ====================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:", device)

# ==================== LOAD ML MODEL ====================
ml_model = joblib.load(r"C:\Users\Prachi\OneDrive\Desktop\college\ML project\best_drowsiness_model.pkl")
scaler = joblib.load(r"C:\Users\Prachi\OneDrive\Desktop\college\ML project\feature_scaler.pkl")

# ==================== DLIB FACE DETECTOR ====================
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(r"C:\Users\Prachi\OneDrive\Desktop\college\ML project\shape_predictor_68_face_landmarks.dat")

# ==================== FEATURE FUNCTIONS ====================
def eye_aspect_ratio(eye):
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3]) + 1e-8
    return (A + B) / (2.0 * C)

def mouth_aspect_ratio(mouth):
    A = dist.euclidean(mouth[14], mouth[18])
    C = dist.euclidean(mouth[12], mouth[16]) + 1e-8
    return A / C

def circularity(eye):
    A = dist.euclidean(eye[1], eye[4])
    radius = A / 2.0
    area = np.pi * (radius ** 2)
    perimeter = sum(dist.euclidean(eye[i], eye[(i+1)%6]) for i in range(6)) + 1e-8
    return 4 * np.pi * area / (perimeter**2)

def mouth_over_eye(left_eye, right_eye, mouth):
    ear = (eye_aspect_ratio(left_eye) + eye_aspect_ratio(right_eye)) / 2.0 + 1e-8
    mar = mouth_aspect_ratio(mouth)
    return mar / ear

# ==================== CNN MODEL ====================
class DrowsinessCNN(nn.Module):
    def __init__(self):
        super(DrowsinessCNN, self).__init__()
        resnet = models.resnet18(pretrained=True)
        self.face_branch = nn.Sequential(*list(resnet.children())[:-2])
        self.face_pool = nn.AdaptiveAvgPool2d((1,1))
        self.eye_branch = nn.Sequential(
            nn.Conv2d(3,32,3,padding=1), nn.BatchNorm2d(32), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32,64,3,padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128,256,3,padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.AdaptiveAvgPool2d((1,1))
        )
        self.classifier = nn.Sequential(
            nn.Linear(512+256+256,512),
            nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(512,2)
        )
    def forward(self, face, left_eye, right_eye):
        f = self.face_branch(face)
        f = self.face_pool(f).view(f.size(0),-1)
        l = self.eye_branch(left_eye).view(left_eye.size(0),-1)
        r = self.eye_branch(right_eye).view(right_eye.size(0),-1)
        x = torch.cat((f,l,r),dim=1)
        x = self.classifier(x)
        return x

# Load CNN model
cnn_model = DrowsinessCNN().to(device)
checkpoint_path = r"C:\Users\Prachi\OneDrive\Desktop\college\ML project\drowsiness-95_98.pt"

# Allow safe globals
torch.serialization.add_safe_globals([
    torch.nn.modules.container.Sequential,
    torch.nn.modules.batchnorm.BatchNorm2d,
    torch.nn.modules.conv.Conv2d,
    torch.nn.modules.linear.Linear,
    torch.nn.modules.pooling.AdaptiveAvgPool2d,
    torch.nn.modules.pooling.MaxPool2d,
    torch.nn.modules.dropout.Dropout,
    torch.nn.modules.activation.ReLU
])

# Now load checkpoint
checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=False)

# Handle different checkpoint formats:
# - dict with 'state_dict'
# - plain state_dict (dict)
# - saved full model (nn.Module)
# - object with state_dict() method
if isinstance(checkpoint, nn.Module):
    # Loaded full model object â€” use it directly
    cnn_model = checkpoint.to(device)
else:
    if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
        state_dict = checkpoint['state_dict']
    elif isinstance(checkpoint, dict):
        state_dict = checkpoint
    elif hasattr(checkpoint, 'state_dict'):
        state_dict = checkpoint.state_dict()
    else:
        raise RuntimeError("Unsupported checkpoint format: cannot extract state_dict")

    # Clean keys if they were wrapped with DataParallel 'module.' prefix
    clean_state = {k.replace("module.", ""): v for k, v in state_dict.items()}
    cnn_model.load_state_dict(clean_state)

cnn_model.eval()

# ==================== TRANSFORMS ====================
face_tf = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
eye_tf = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# ==================== BEEP FUNCTION ====================
def beep():
    winsound.Beep(1500,700)

# ==================== WEBCAM LOOP ====================
cap = cv2.VideoCapture(0)
drowsy_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    status = "ALERT"
    color = (0,255,0)

    if len(faces) > 0:
        face = faces[0]
        shape = predictor(gray, face)
        pts = face_utils.shape_to_np(shape)

        left_eye_pts = pts[36:42]
        right_eye_pts = pts[42:48]
        mouth_pts = pts[48:68]

        # ----------------- Extract ML Features -----------------
        ear = (eye_aspect_ratio(left_eye_pts) + eye_aspect_ratio(right_eye_pts)) / 2.0
        mar = mouth_aspect_ratio(mouth_pts)
        cir = (circularity(left_eye_pts) + circularity(right_eye_pts)) / 2.0
        moe = mouth_over_eye(left_eye_pts, right_eye_pts, mouth_pts)
        ml_features = scaler.transform([[ear, mar, cir, moe]])
        ml_pred = ml_model.predict(ml_features)[0]  # ML prediction 0/1

        # ----------------- CNN Features -----------------
        (x, y, w, h) = face_utils.rect_to_bb(face)
        face_crop = frame[y:y+h, x:x+w]
        lx, ly, lw, lh = cv2.boundingRect(left_eye_pts)
        rx, ry, rw, rh = cv2.boundingRect(right_eye_pts)
        left_crop = frame[ly:ly+lh, lx:lx+lw]
        right_crop = frame[ry:ry+rh, rx:rx+rw]

        try:
            face_tensor = face_tf(Image.fromarray(face_crop)).unsqueeze(0).to(device)
            left_tensor = eye_tf(Image.fromarray(left_crop)).unsqueeze(0).to(device)
            right_tensor = eye_tf(Image.fromarray(right_crop)).unsqueeze(0).to(device)
            with torch.no_grad():
                out = cnn_model(face_tensor, left_tensor, right_tensor)
                print(out)  # raw logits
                cnn_prob = torch.softmax(out, dim=1)[0][1].item()
        except:
            cnn_prob = 0.0

        # ----------------- ENSEMBLE -----------------
        final_score = 0.5*ml_pred + 0.5*cnn_prob
        final_class = 1 if final_score >= 0.5 else 0
        if final_class == 1:
            drowsy_count += 1
            status = "DROWSY!"
            color = (0,0,255)
            if drowsy_count >= 5:
                beep()
        else:
            drowsy_count = 0

        cv2.rectangle(frame, (x,y), (x+w,y+h), color,2)
        cv2.putText(frame, f"Score: {final_score:.2f}", (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,0.7,color,2)

    cv2.putText(frame, status, (40,80), cv2.FONT_HERSHEY_DUPLEX,2,color,3)
    cv2.imshow("Drowsiness Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Using: cpu


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


tensor([[ 3.7511, -3.6848]])




tensor([[ 4.1088, -4.0382]])




tensor([[ 5.1807, -5.0933]])




tensor([[ 4.5814, -4.5203]])




tensor([[ 5.9419, -5.8676]])




tensor([[ 3.8819, -3.8172]])




tensor([[ 5.1884, -5.1006]])




tensor([[ 4.3002, -4.2405]])




tensor([[ 3.7315, -3.6695]])




tensor([[ 5.4229, -5.3351]])




tensor([[ 6.7028, -6.6143]])




tensor([[ 6.6159, -6.5401]])




tensor([[ 7.6481, -7.5593]])




tensor([[ 5.1032, -5.0284]])




tensor([[ 2.9612, -2.8981]])




tensor([[ 3.3405, -3.2752]])




tensor([[ 4.1173, -4.0387]])




tensor([[ 5.3429, -5.2595]])




tensor([[ 1.4116, -1.3402]])




tensor([[ 4.2551, -4.1847]])
