In [1]:
!pip install opencv-python dlib numpy scipy



In [2]:
!wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
!bunzip2 shape_predictor_68_face_landmarks.dat.bz2

--2025-09-29 03:59:32--  http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Resolving dlib.net (dlib.net)... 107.180.26.78
Connecting to dlib.net (dlib.net)|107.180.26.78|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 [following]
--2025-09-29 03:59:32--  https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Connecting to dlib.net (dlib.net)|107.180.26.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64040097 (61M)
Saving to: ‘shape_predictor_68_face_landmarks.dat.bz2’


2025-09-29 03:59:34 (38.6 MB/s) - ‘shape_predictor_68_face_landmarks.dat.bz2’ saved [64040097/64040097]



In [3]:
# Colab cell 2: imports & helpers
import cv2, dlib, math, time
import numpy as np
from imutils import face_utils
from IPython.display import display, clear_output, Image
from google.colab.output import eval_js
from base64 import b64decode
import imageio  # for saving GIFs later (pip installed above)

In [4]:
def get_head_pose(shape, img_size):
    # img_size must be (height, width)
    focal_length = img_size[1]
    center = (img_size[1] / 2, img_size[0] / 2)
    camera_matrix = np.array([
        [focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 0, 1]
    ], dtype="double")
    dist_coeffs = np.zeros((4, 1))  # assume no lens distortion



In [5]:
# Colab cell 2: imports & helpers
import cv2, dlib, math, time
import numpy as np
from imutils import face_utils
from IPython.display import display, clear_output, Image
from google.colab.output import eval_js
from base64 import b64decode
import imageio  # for saving GIFs later (pip installed above)

# dlib detectors
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

# helper: convert dlib shape to numpy array (68,2)
def shape_to_np(shape):
    coords = np.zeros((68, 2), dtype="int")
    for i in range(0, 68):
        coords[i] = (shape.part(i).x, shape.part(i).y)
    return coords

# Eye Aspect Ratio (EAR)
def eye_aspect_ratio(eye):
    A = np.linalg.norm(eye[1] - eye[5])
    B = np.linalg.norm(eye[2] - eye[4])
    C = np.linalg.norm(eye[0] - eye[3])
    return (A + B) / (2.0 * C)

# Head pose helper: returns projected nose point and approximate euler angles (yaw, pitch, roll)
def get_head_pose(shape, img_size):
    image_points = np.array([
        shape[30],     # nose tip
        shape[8],      # chin
        shape[36],     # left eye left corner
        shape[45],     # right eye right corner
        shape[48],     # left mouth corner
        shape[54]      # right mouth corner
    ], dtype='double')

    model_points = np.array([
        (0.0, 0.0, 0.0),
        (0.0, -330.0, -65.0),
        (-225.0, 170.0, -135.0),
        (225.0, 170.0, -135.0),
        (-150.0, -150.0, -125.0),
        (150.0, -150.0, -125.0)
    ], dtype='double')

    focal_length = img_size[1]
    center = (img_size[1] / 2, img_size[0] / 2)
    camera_matrix = np.array([
        [focal_length, 0, center[0]],
        [0, focal_length, center[1]],
        [0, 0, 1]
    ], dtype="double")
    dist_coeffs = np.zeros((4, 1))  # assume no lens distortion

    success, rotation_vector, translation_vector = cv2.solvePnP(
        model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE
    )

    # project a point (0,0,1000) onto the image plane -> to draw nose direction
    (nose_end_point2D, _) = cv2.projectPoints(
        np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs
    )

    # rotation vector -> rotation matrix -> euler angles (approx)
    R, _ = cv2.Rodrigues(rotation_vector)
    sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
    singular = sy < 1e-6
    if not singular:
        x = math.atan2(R[2, 1], R[2, 2])
        y = math.atan2(-R[2, 0], sy)
        z = math.atan2(R[1, 0], R[0, 0])
    else:
        x = math.atan2(-R[1, 2], R[1, 1])
        y = math.atan2(-R[2, 0], sy)
        z = 0
    # convert to degrees
    roll = np.degrees(x)
    pitch = np.degrees(y)
    yaw = np.degrees(z)
    return (tuple(nose_end_point2D.reshape(2)), (yaw, pitch, roll), (rotation_vector, translation_vector))


In [6]:
# Colab cell 3: detector class
class DrowsinessDetector:
    def __init__(self, ear_thresh=0.25, ear_consec_frames=15, head_yaw_thresh=25):
        self.EAR_THRESH = ear_thresh
        self.EAR_CONSEC_FRAMES = ear_consec_frames
        self.HEAD_YAW_THRESH = head_yaw_thresh
        self.counter = 0
        self.alarm_on = False
        self.total_drowsy_events = 0

    def process_frame(self, frame):
        # input: BGR frame, output: annotated BGR frame, info dict
        orig = frame.copy()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        rects = detector(gray, 0)
        info = {"status": "No face", "ear": None, "yaw": None, "pitch": None, "roll": None}

        for rect in rects:
            shape = predictor(gray, rect)
            shape_np = shape_to_np(shape)

            leftEye = shape_np[42:48]
            rightEye = shape_np[36:42]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)
            ear = (leftEAR + rightEAR) / 2.0
            info["ear"] = ear

            # draw eye contours
            for (x, y) in np.concatenate([leftEye, rightEye]):
                cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)

            # head pose
            nose_proj, (yaw, pitch, roll), _ = get_head_pose(shape_np, frame.shape[:2])
            info["yaw"], info["pitch"], info["roll"] = (yaw, pitch, roll)

            # draw nose direction
            nose_point = tuple(shape_np[30])
            cv2.line(frame, nose_point, (int(nose_proj[0]), int(nose_proj[1])), (255, 0, 0), 2)

            # drowsiness logic: EAR-based
            if ear < self.EAR_THRESH:
                self.counter += 1
                if self.counter >= self.EAR_CONSEC_FRAMES:
                    info["status"] = "ALERT: Drowsy!"
                    if not self.alarm_on:
                        self.alarm_on = True
                        self.total_drowsy_events += 1
                else:
                    info["status"] = "Eyes maybe closing..."
            else:
                # reset counter
                if self.counter >= self.EAR_CONSEC_FRAMES:
                    # recovered from drowsiness
                    pass
                self.counter = 0
                self.alarm_on = False
                info["status"] = "Attentive"

            # distraction logic: head yaw (approx)
            if abs(yaw) > self.HEAD_YAW_THRESH:
                info["status"] = "ALERT: Distracted!"
                cv2.putText(frame, "!!! WAKE UP !!!", (100, 200),
            cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0,0,255), 4)


            # overlay info
            cv2.putText(frame, f"Status: {info['status']}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255) if "ALERT" in info["status"] else (0, 255, 0), 2)
            cv2.putText(frame, f"EAR:{ear:.2f}", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1)
            cv2.putText(frame, f"Yaw:{yaw:.1f}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1)

        return frame, info


In [7]:
# Colab cell 4: JS capture helper
capture_js = """
async function captureImage(quality=0.7) {
  const video = document.createElement('video');
  const stream = await navigator.mediaDevices.getUserMedia({video: true});
  document.body.appendChild(video);
  video.style.display = 'block';
  video.srcObject = stream;
  await video.play();
  const canvas = document.createElement('canvas');
  canvas.width = video.videoWidth;
  canvas.height = video.videoHeight;
  const ctx = canvas.getContext('2d');
  ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
  stream.getTracks().forEach(track => track.stop());
  const dataUrl = canvas.toDataURL('image/jpeg', quality);
  return dataUrl;
}
captureImage();
"""

def js_capture_frame():
    data = eval_js(capture_js)  # returns a dataURL 'data:image/jpeg;base64,...'
    header, encoded = data.split(',', 1)
    img_bytes = b64decode(encoded)
    nparr = np.frombuffer(img_bytes, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return img


In [8]:
# Colab cell 5: run the real-time simulation (press the notebook stop button to interrupt)
det = DrowsinessDetector(ear_thresh=0.25, ear_consec_frames=10, head_yaw_thresh=25)

frames_for_gif = []  # optional: collect annotated frames for a demo GIF
try:
    while True:
        frame = js_capture_frame()  # capture from webcam via browser
        annotated, info = det.process_frame(frame)
        # convert BGR->JPEG bytes and display
        _, jpg = cv2.imencode('.jpg', annotated)
        display(Image(data=jpg.tobytes()))
        frames_for_gif.append(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB))
        clear_output(wait=True)
except KeyboardInterrupt:
    print("Stopped by user")
except Exception as e:
    print("Exception:", e)


Exception: NotAllowedError: Permission denied


In [9]:
det = DrowsinessDetector(ear_thresh=0.25, ear_consec_frames=10, head_yaw_thresh=25)

try:
    while True:
        frame = js_capture_frame()
        annotated, info = det.process_frame(frame)
        _, jpg = cv2.imencode('.jpg', annotated)
        display(Image(data=jpg.tobytes()))
        clear_output(wait=True)
except KeyboardInterrupt:
    print("Stopped by user")

MessageError: NotAllowedError: Permission denied

In [None]:
# Colab cell 6: save GIF (run after you collected frames_for_gif)
if frames_for_gif:
    gif_path = '/content/drowsiness_demo.gif'
    imageio.mimsave(gif_path, frames_for_gif, fps=6)
    print("Saved demo GIF to", gif_path)
else:
    print("No frames recorded. Run the capture loop first and collect frames.")
