## MODULE:1 FACE DETECTION

In [None]:
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# -----------------------------
# CONFIG
# -----------------------------
IMAGE_PATH = "image.jpg"          # <-- put your image path
MODEL_PATH = r"E:\Projects in ML\FRAUD DETECTION SYSTEM FOR THE ONLINE PROCTORED EXAMS\src\models\face_landmarker.task"
# -----------------------------
# Initialize Face Landmarker
# -----------------------------
options = vision.FaceLandmarkerOptions(
    base_options=python.BaseOptions(model_asset_path=MODEL_PATH),
    running_mode=vision.RunningMode.IMAGE,
    num_faces=1
)
face_mesh = vision.FaceLandmarker.create_from_options(options)

# -----------------------------
# Load image
# -----------------------------

image = cv2.imread(IMAGE_PATH)
h, w = image.shape[:2]

rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)

# -----------------------------
# Detect landmarks
# -----------------------------
result = face_mesh.detect(mp_image)

if not result.face_landmarks:
    print("No face detected")
    exit()

landmarks = result.face_landmarks[0]

# -----------------------------
# 2D image points
# -----------------------------
image_points = np.array([
    (landmarks[1].x * w,   landmarks[1].y * h),   # Nose tip
    (landmarks[152].x * w, landmarks[152].y * h), # Chin
    (landmarks[33].x * w,  landmarks[33].y * h),  # Left eye
    (landmarks[263].x * w, landmarks[263].y * h), # Right eye
    (landmarks[61].x * w,  landmarks[61].y * h),  # Left mouth
    (landmarks[291].x * w, landmarks[291].y * h)  # Right mouth
], dtype=np.float64)

# -----------------------------
# 3D model points
# -----------------------------
model_points = np.array([
    (0.0, 0.0, 0.0),
    (0.0, -63.6, -12.5),
    (-43.3, 32.7, -26.0),
    (43.3, 32.7, -26.0),
    (-28.9, -28.9, -24.1),
    (28.9, -28.9, -24.1)
], dtype=np.float64)

# -----------------------------
# Camera matrix
# -----------------------------

focal_length = w
center = (w / 2, h / 2)

camera_matrix = np.array([
    [focal_length, 0, center[0]],
    [0, focal_length, center[1]],
    [0, 0, 1]
], dtype=np.float64)

dist_coeffs = np.zeros((4, 1))

# -----------------------------
# Solve PnP
# -----------------------------

success, rvec, tvec = cv2.solvePnP(
    model_points,
    image_points,
    camera_matrix,
    dist_coeffs,
    flags=cv2.SOLVEPNP_ITERATIVE
)

# -----------------------------
# Convert to Euler angles
# -----------------------------
rmat, _ = cv2.Rodrigues(rvec)
sy = np.sqrt(rmat[0, 0]**2 + rmat[1, 0]**2)

pitch = np.degrees(np.arctan2(rmat[2, 1], rmat[2, 2]))
yaw   = np.degrees(np.arctan2(-rmat[2, 0], sy))
roll  = np.degrees(np.arctan2(rmat[1, 0], rmat[0, 0]))

print(f"Pitch: {pitch:.2f}")
print(f"Yaw  : {yaw:.2f}")
print(f"Roll : {roll:.2f}")

# -----------------------------
# Draw head pose axis
# -----------------------------

nose = (int(image_points[0][0]), int(image_points[0][1]))

axis_3d = np.float32([
    [50, 0, 0],
    [0, 50, 0],
    [0, 0, 50]
])

axis_2d, _ = cv2.projectPoints(
    axis_3d, rvec, tvec, camera_matrix, dist_coeffs
)

p_x = tuple(axis_2d[0].ravel().astype(int))
p_y = tuple(axis_2d[1].ravel().astype(int))
p_z = tuple(axis_2d[2].ravel().astype(int))

cv2.line(image, nose, p_x, (0, 0, 255), 2)  # X - red
cv2.line(image, nose, p_y, (0, 255, 0), 2)  # Y - green
cv2.line(image, nose, p_z, (255, 0, 0), 2)  # Z - blue

cv2.putText(
    image,
    f"Pitch:{pitch:.1f} Yaw:{yaw:.1f} Roll:{roll:.1f}",
    (10, 30),
    cv2.FONT_HERSHEY_SIMPLEX,
    0.7,
    (255, 255, 255),
    2
)

cv2.imshow("Head Pose Estimation", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


Pitch: -169.96
Yaw  : -10.10
Roll : -3.75


# Gaze Features need to extracted from here.

In [None]:
def _extract_gaze_features(
    self,
    face_landmarks,
    width: int,
    height: int
) -> Dict:
    
    """
    Eye Gaze Tracking using Face Mesh + solvePnP + projectPoints
    """
    result = face_landmarker.detect(mp_image)
    face_landmarks = result.face_landmarks[0]
    features = {
        "gaze_point_x": 0.0,
        "gaze_point_y": 0.0,
        "gaze_direction": "None",
        "gaze_on_script": 0,
        "left_pupil_x": 0.0,
        "left_pupil_y": 0.0,
        "right_pupil_x": 0.0,
        "right_pupil_y": 0.0,
    }

    if face_landmarks is None:
        return features

    # -----------------------------
    # 3D face model points (mm)
    # -----------------------------
    face_3d = np.array([
        (0.0, 0.0, 0.0),        # Nose tip
        (0.0, -63.6, -12.5),    # Chin
        (-43.3, 32.7, -26.0),   # Left eye
        (43.3, 32.7, -26.0),    # Right eye
        (-28.9, -28.9, -24.1),  # Left mouth
        (28.9, -28.9, -24.1)    # Right mouth
    ], dtype=np.float64)

    # -----------------------------
    # Corresponding 2D points
    # -----------------------------
    image_points = np.array([
        (face_landmarks[1].x * width, face_landmarks[1].y * height),
        (face_landmarks[152].x * width, face_landmarks[152].y * height),
        (face_landmarks[33].x * width, face_landmarks[33].y * height),
        (face_landmarks[263].x * width, face_landmarks[263].y * height),
        (face_landmarks[61].x * width, face_landmarks[61].y * height),
        (face_landmarks[291].x * width, face_landmarks[291].y * height),
    ], dtype=np.float64)

    # -----------------------------
    # Camera matrix
    # -----------------------------
    focal_length = width
    cam_matrix = np.array([
        [focal_length, 0, width / 2],
        [0, focal_length, height / 2],
        [0, 0, 1]
    ], dtype=np.float64)

    dist_coeffs = np.zeros((4, 1))

    # -----------------------------
    # Solve head pose
    # -----------------------------
    success, rvec, tvec = cv2.solvePnP(
        face_3d,
        image_points,
        cam_matrix,
        dist_coeffs,
        flags=cv2.SOLVEPNP_ITERATIVE
    )

    if not success:
        return features

    # -----------------------------
    # Pupil coordinates
    # -----------------------------
    left_pupil = face_landmarks[468]
    right_pupil = face_landmarks[473]

    lp_x, lp_y = left_pupil.x * width, left_pupil.y * height
    rp_x, rp_y = right_pupil.x * width, right_pupil.y * height

    features["left_pupil_x"] = lp_x
    features["left_pupil_y"] = lp_y
    features["right_pupil_x"] = rp_x
    features["right_pupil_y"] = rp_y

    # -----------------------------
    # Gaze ray projection
    # -----------------------------
    gaze_3d = np.array([[0, 0, 1000.0]], dtype=np.float64)
    gaze_2d, _ = cv2.projectPoints(
        gaze_3d,
        rvec,
        tvec,
        cam_matrix,
        dist_coeffs
    )

    gaze_x = int(gaze_2d[0][0][0])
    gaze_y = int(gaze_2d[0][0][1])

    features["gaze_point_x"] = gaze_x
    features["gaze_point_y"] = gaze_y

    # -----------------------------
    # Gaze direction
    # -----------------------------
    dx = gaze_x - width / 2
    dy = gaze_y - height / 2

    if abs(dx) < 40 and abs(dy) < 40:
        direction = "center"
    elif abs(dx) > abs(dy):
        direction = "right" if dx > 0 else "left"
    else:
        direction = "down" if dy > 0 else "up"

    features["gaze_direction"] = direction

    # -----------------------------
    # Script detection (strong signal)
    # -----------------------------
    if direction == "down" and gaze_y > height * 0.65:
        features["gaze_on_script"] = 1

    return features


In [2]:
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# -----------------------------
# CONFIG
# -----------------------------
MODEL_PATH = r"E:\Projects in ML\FRAUD DETECTION SYSTEM FOR THE ONLINE PROCTORED EXAMS\src\models\face_landmarker.task"
IMAGE_PATH = "image.jpg"

# -----------------------------
# Initialize Face Landmarker
# -----------------------------
base_options = python.BaseOptions(model_asset_path=MODEL_PATH)

options = vision.FaceLandmarkerOptions(
    base_options=base_options,
    output_face_blendshapes=False,
    output_facial_transformation_matrixes=False,
    num_faces=1
)

face_landmarker = vision.FaceLandmarker.create_from_options(options)

# -----------------------------
# Read Image
# -----------------------------
bgr = cv2.imread(IMAGE_PATH)
if bgr is None:
    raise ValueError("Image not found")

rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
h, w, _ = rgb.shape

mp_image = mp.Image(
    image_format=mp.ImageFormat.SRGB,
    data=rgb
)

# -----------------------------
# Detect Face Landmarks
# -----------------------------
result = face_landmarker.detect(mp_image)

if not result.face_landmarks:
    print("No face detected")
    exit()

face_landmarks = result.face_landmarks[0]  # 468 landmarks

print(f"Detected {len(face_landmarks)} face landmarks")

# -----------------------------
# Convert to pixel coordinates
# -----------------------------
landmarks_px = []
for lm in face_landmarks:
    x = int(lm.x * w)
    y = int(lm.y * h)
    z = lm.z
    landmarks_px.append((x, y, z))

# -----------------------------
# Visualize landmarks
# -----------------------------
for (x, y, _) in landmarks_px:
    cv2.circle(bgr, (x, y), 1, (0, 255, 0), -1)

cv2.imshow("Face Landmarks", bgr)
cv2.waitKey(0)
cv2.destroyAllWindows()


Detected 478 face landmarks
