<a href="https://colab.research.google.com/github/IrfanESD/SE_Detection/blob/main/prj_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 !pip install opencv-python-headless

SNIPPING IMAGES FROM VIDEO..

In [None]:
import cv2
import os

# === 🔧 Input: Set video path ===
video_path = '/content/drive/MyDrive/Clips/C0016.MP4'
output_folder = '/content/drive/MyDrive/clear_student_frames/MainSet14'
os.makedirs(output_folder, exist_ok=True)

# === 🧮 Parameters ===
sharpness_threshold = 100.0  # Higher = only clearer frames
sampling_interval = 30      # Sample every 30 frames

# === 🎬 Read and Process the Video ===
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("❌ Error: Could not open video.")
    exit()

# Get and print video FPS (optional, for info)
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"🎥 Video FPS: {fps}")

frame_count = 0
saved_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # Only sample every 120 frames
    if frame_count % sampling_interval != 0:
        continue

    # Convert frame to grayscale for sharpness detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()

    # Save only sharp frames
    if sharpness > sharpness_threshold:
        filename = os.path.join(output_folder, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(filename, frame)
        saved_count += 1
        print(f"✅ Saved frame {frame_count} | Sharpness: {sharpness:.2f}")

cap.release()

print(f"\n✅ Done: {saved_count} sharp frames saved to ➜ '{output_folder}'")


RENAME IMAGES IN ASCENDING ORDER..

In [None]:
from google.colab import drive
import os
import glob

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Set path to the parent folder containing 10 folders
parent_dir = '/content/drive/MyDrive/clear_student_frames'  # ⬅️ Change this

# Step 3: Get list of all image paths from all subfolders
image_extensions = ('*.jpg', '*.jpeg', '*.png')  # Extend if needed
all_images = []

for ext in image_extensions:
    all_images.extend(glob.glob(os.path.join(parent_dir, '*', ext)))

# Optional: Sort for consistent ordering
all_images.sort()

# Step 4: Rename all images serially and print progress
start_serial = 1  # ⬅️ Change this if you want to start from a different number
renamed_count = 0

for idx, img_path in enumerate(all_images, start_serial):
    folder = os.path.dirname(img_path)
    ext = os.path.splitext(img_path)[1]
    new_name = f"{idx}{ext}"
    new_path = os.path.join(folder, new_name)

    try:
        os.rename(img_path, new_path)
        folder_name = os.path.basename(folder)
        print(f"{idx}: Renamed in '{folder_name}' → {new_name}")
        renamed_count += 1
    except Exception as e:
        print(f"⚠️ Error renaming '{img_path}': {e}")

# Step 5: Output total renamed images
print(f"\n✅ Total images renamed: {renamed_count}")


MERGING IN A SINGLE FOLDER..

In [None]:
import os
import shutil

# 🔁 Replace these with the actual 6 folder paths
source_folders = [
    '/content/drive/MyDrive/clear_student_frames/SET01',
    '/content/drive/MyDrive/clear_student_frames/SET02',
    '/content/drive/MyDrive/clear_student_frames/SET03',
    '/content/drive/MyDrive/clear_student_frames/SET04',
    '/content/drive/MyDrive/clear_student_frames/SET05',
    '/content/drive/MyDrive/clear_student_frames/SET06'
]

# 🗂️ Target folder where all images will be copied
target_folder = '/content/drive/MyDrive/clear_student_frames/merged_set'
os.makedirs(target_folder, exist_ok=True)

# 📦 Copy files from each folder
for folder in source_folders:
    for file in os.listdir(folder):
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            src = os.path.join(folder, file)
            dst = os.path.join(target_folder, file)
            if not os.path.exists(dst):  # Avoid overwriting duplicates
                shutil.copy(src, dst)
            else:
                print(f"⚠️ Skipped duplicate: {file}")

print("✅ All images merged into:", target_folder)
#Total copied images//TOTAL - 2038 IMAGES: Copied from 'MainSet


✅ All images merged into: /content/drive/MyDrive/clear_student_frames/merged_set


IMPORT ULTRALYTICS..

In [None]:
pip install ultralytics mediapipe opencv-python

SPLIT IMG TO TRAIN, VAL, TEST..

In [None]:
import os
import random
import shutil

# === Paths ===
image_dir = '/content/drive/MyDrive/MMimages'
output_base = '/content/drive/MyDrive/image_split'

# Create output folders
splits = ['train', 'val', 'test']
for split in splits:
    os.makedirs(os.path.join(output_base, split), exist_ok=True)

# Get and shuffle image files
all_images = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
random.seed(42)  # For reproducibility
random.shuffle(all_images)

# Define splits
train_imgs = all_images[:1500]
val_imgs = all_images[1500:1900]
test_imgs = all_images[1900:]  # all remaining images

# Function to move images
def move_images(file_list, split_name):
    for file in file_list:
        src = os.path.join(image_dir, file)
        dst = os.path.join(output_base, split_name, file)
        shutil.move(src, dst)

# Move images to folders
move_images(train_imgs, 'train')
move_images(val_imgs, 'val')
move_images(test_imgs, 'test')

# Report
print(f"✅ Done: {len(train_imgs)} train, {len(val_imgs)} val, {len(test_imgs)} test images MOVED to '{output_base}'")


✅ Done: 1500 train, 400 val, 142 test images MOVED to '/content/drive/MyDrive/image_split'


LABELING IMAGES = v1

In [None]:
import os
import cv2
import math
from collections import Counter
from ultralytics import YOLO
import mediapipe as mp

# Paths
image_folder = '/content/drive/MyDrive/Saif/images'
label_folder = '/content/drive/MyDrive/Saif/labels'
visual_folder = '/content/drive/MyDrive/Saif/labeled_images'

os.makedirs(label_folder, exist_ok=True)
os.makedirs(visual_folder, exist_ok=True)

# Load YOLOv8 and MediaPipe
model = YOLO('yolov8n.pt')
pose = mp.solutions.pose.Pose(static_image_mode=True, model_complexity=2)

# Class names: 0 = Engaged, 1 = Distracted
class_names = ['Engaged', 'Distracted']

# Estimate head direction based on eyes
def estimate_head_direction(landmarks):
    try:
        left_eye = landmarks[2]
        right_eye = landmarks[5]
        dx = left_eye.x - right_eye.x
        if dx > 0.04:
            return 'left'
        elif dx < -0.04:
            return 'right'
        else:
            return 'front'
    except:
        return 'unknown'

# Classify using head direction compared to majority (teacher_dir)
def classify_posture_binary(landmarks, teacher_dir):
    direction = estimate_head_direction(landmarks)
    if direction == teacher_dir:
        return 0  # Engaged
    else:
        return 1  # Distracted

# Process images
for file in os.listdir(image_folder):
    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_folder, file)
    image = cv2.imread(image_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]
    results = model(image_path)[0]
    boxes = results.boxes.data

    all_directions = []
    temp_landmarks = []
    temp_boxes = []

    for box in boxes:
        cls = int(box[5])
        if cls != 0:
            continue  # only person class

        x1, y1, x2, y2 = map(int, box[:4])
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(img_w, x2), min(img_h, y2)

        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue

        try:
            crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
            pose_result = pose.process(crop_rgb)
        except:
            pose_result = None

        if pose_result and pose_result.pose_landmarks:
            landmarks = pose_result.pose_landmarks.landmark
            dir = estimate_head_direction(landmarks)
            all_directions.append(dir)
            temp_landmarks.append(landmarks)
            temp_boxes.append((x1, y1, x2, y2))
        else:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))

    # Decide teacher direction (majority vote)
    teacher_dir = 'front'
    if all_directions:
        teacher_dir = Counter(all_directions).most_common(1)[0][0]

    label_lines = []
    engaged_count = 0
    total_count = 0

    for i, landmarks in enumerate(temp_landmarks):
        x1, y1, x2, y2 = temp_boxes[i]

        if landmarks:
            posture_class = classify_posture_binary(landmarks, teacher_dir)

            if posture_class == 0:
                engaged_count += 1
            total_count += 1

            x_center = ((x1 + x2) / 2) / img_w
            y_center = ((y1 + y2) / 2) / img_h
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h

            label_line = f"{posture_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

            # Draw bounding box with label
            color = (0, 255, 0) if posture_class == 0 else (0, 0, 255)
            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(image, class_names[posture_class], (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        else:
            # Pose not detected
            cv2.rectangle(image, (x1, y1), (x2, y2), (128, 128, 128), 2)
            cv2.putText(image, 'no pose', (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128, 128, 128), 1)

    # Save YOLO-format label file
    label_file = os.path.join(label_folder, file.rsplit('.', 1)[0] + '.txt')
    with open(label_file, 'w') as f:
        for line in label_lines:
            f.write(line + '\n')

    # Save annotated image
    visual_path = os.path.join(visual_folder, file)
    cv2.imwrite(visual_path, image)

    # Print engagement stats
    engagement_percent = (engaged_count / total_count * 100) if total_count > 0 else 0
    print(f"{file}: {engaged_count}/{total_count} engaged ({engagement_percent:.1f}%) - Teacher direction: {teacher_dir}")


LABELING IMAGES = v2

In [None]:
import os
import cv2
import math
from collections import Counter
from ultralytics import YOLO
import mediapipe as mp

# Paths
image_folder = '/content/drive/MyDrive/Dataset/images/val'
label_folder = '/content/drive/MyDrive/Dataset/labels/val'

os.makedirs(label_folder, exist_ok=True)

# Load YOLOv8 and MediaPipe
model = YOLO('yolov8n.pt')
pose = mp.solutions.pose.Pose(static_image_mode=True, model_complexity=2)

# Estimate head direction based on eyes
def estimate_head_direction(landmarks):
    try:
        left_eye = landmarks[2]
        right_eye = landmarks[5]
        dx = left_eye.x - right_eye.x
        if dx > 0.04:
            return 'left'
        elif dx < -0.04:
            return 'right'
        else:
            return 'front'
    except:
        return 'unknown'

# Classify using head direction compared to majority (teacher_dir)
def classify_posture_binary(landmarks, teacher_dir):
    direction = estimate_head_direction(landmarks)
    return 0 if direction == teacher_dir else 1  # 0: Engaged, 1: Distracted

# Process images
for file in os.listdir(image_folder):
    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_folder, file)
    image = cv2.imread(image_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]
    results = model(image_path)[0]
    boxes = results.boxes.data

    all_directions = []
    temp_landmarks = []
    temp_boxes = []

    for box in boxes:
        cls = int(box[5])
        if cls != 0:
            continue  # only person class

        x1, y1, x2, y2 = map(int, box[:4])
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(img_w, x2), min(img_h, y2)

        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue

        try:
            crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
            pose_result = pose.process(crop_rgb)
        except:
            pose_result = None

        if pose_result and pose_result.pose_landmarks:
            landmarks = pose_result.pose_landmarks.landmark
            dir = estimate_head_direction(landmarks)
            all_directions.append(dir)
            temp_landmarks.append(landmarks)
            temp_boxes.append((x1, y1, x2, y2))
        else:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))

    # Determine teacher direction by majority
    teacher_dir = 'front'
    if all_directions:
        teacher_dir = Counter(all_directions).most_common(1)[0][0]

    label_lines = []

    for i, landmarks in enumerate(temp_landmarks):
        x1, y1, x2, y2 = temp_boxes[i]

        if landmarks:
            posture_class = classify_posture_binary(landmarks, teacher_dir)

            x_center = ((x1 + x2) / 2) / img_w
            y_center = ((y1 + y2) / 2) / img_h
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h

            label_line = f"{posture_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

    # Save YOLO-format label file
    label_file = os.path.join(label_folder, file.rsplit('.', 1)[0] + '.txt')
    with open(label_file, 'w') as f:
        for line in label_lines:
            f.write(line + '\n')

    print(f"{file}: {len(label_lines)} labels saved (Teacher direction: {teacher_dir})")



image 1/1 /content/drive/MyDrive/Dataset/images/val/1044.jpg: 384x640 12 persons, 15 chairs, 3 dining tables, 288.1ms
Speed: 5.6ms preprocess, 288.1ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
1044.jpg: 10 labels saved (Teacher direction: left)

image 1/1 /content/drive/MyDrive/Dataset/images/val/1047.jpg: 384x640 12 persons, 17 chairs, 3 dining tables, 158.6ms
Speed: 3.5ms preprocess, 158.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
1047.jpg: 11 labels saved (Teacher direction: left)

image 1/1 /content/drive/MyDrive/Dataset/images/val/1049.jpg: 384x640 14 persons, 16 chairs, 3 dining tables, 145.4ms
Speed: 3.4ms preprocess, 145.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
1049.jpg: 13 labels saved (Teacher direction: left)

image 1/1 /content/drive/MyDrive/Dataset/images/val/1050.jpg: 384x640 11 persons, 17 chairs, 3 dining tables, 142.7ms
Speed: 3.1ms preprocess, 142.7ms inference, 1.9ms postprocess per image at

LABELING IMAGES = v3

FaceMesh 1

In [None]:
import os
import cv2
from collections import Counter
from ultralytics import YOLO
import mediapipe as mp

# Paths
image_folder = '/content/drive/MyDrive/Saif/img'
label_folder = '/content/drive/MyDrive/Saif/labels'
visualized_folder = '/content/drive/MyDrive/Saif/visual'

os.makedirs(label_folder, exist_ok=True)
os.makedirs(visualized_folder, exist_ok=True)

# Load YOLOv8 model for person detection
model = YOLO('yolov8n.pt')

# Initialize MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True)

def estimate_head_direction(landmarks):
    try:
        left_eye = landmarks[33]
        right_eye = landmarks[263]
        dx = left_eye.x - right_eye.x
        if dx > 0.04:
            return 'left'
        elif dx < -0.04:
            return 'right'
        else:
            return 'front'
    except:
        return 'unknown'

def classify_posture_binary(landmarks, teacher_dir):
    direction = estimate_head_direction(landmarks)
    return 0 if direction == teacher_dir else 1  # 0: Engaged, 1: Distracted

# Process all images
for file in os.listdir(image_folder):
    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_folder, file)
    image = cv2.imread(image_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]

    results = model(image_path)[0]
    boxes = results.boxes.data

    all_directions = []
    temp_landmarks = []
    temp_boxes = []

    for box in boxes:
        cls = int(box[5])
        if cls != 0:
            continue

        x1, y1, x2, y2 = map(int, box[:4])
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(img_w, x2), min(img_h, y2)

        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))
            continue

        try:
            crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
            face_result = face_mesh.process(crop_rgb)
        except:
            face_result = None

        if face_result and face_result.multi_face_landmarks:
            landmarks = face_result.multi_face_landmarks[0].landmark
            direction = estimate_head_direction(landmarks)
            all_directions.append(direction)
            temp_landmarks.append(landmarks)
            temp_boxes.append((x1, y1, x2, y2))
        else:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))

    teacher_dir = 'front'
    if all_directions:
        teacher_dir = Counter(all_directions).most_common(1)[0][0]

    label_lines = []

    for i, landmarks in enumerate(temp_landmarks):
        x1, y1, x2, y2 = temp_boxes[i]
        box_color = (0, 255, 0)
        label_text = 'unknown'

        if landmarks:
            posture_class = classify_posture_binary(landmarks, teacher_dir)
            direction = estimate_head_direction(landmarks)
            label_text = f"{direction}, {'Engaged' if posture_class == 0 else 'Distracted'}"
            box_color = (0, 255, 0) if posture_class == 0 else (0, 0, 255)

            x_center = ((x1 + x2) / 2) / img_w
            y_center = ((y1 + y2) / 2) / img_h
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h
            label_line = f"{posture_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

            # Get landmarks in absolute coordinates
            def to_abs(lm):
                return (int(x1 + lm.x * (x2 - x1)), int(y1 + lm.y * (y2 - y1)))

            left_eye = to_abs(landmarks[33])
            right_eye = to_abs(landmarks[263])
            nose = to_abs(landmarks[1])
            chin = to_abs(landmarks[152])
            forehead = to_abs(landmarks[10])  # Forehead landmark

            # Calculate midpoints for reference
            eye_mid = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2)
            head_mid = ((nose[0] + chin[0]) // 2, (nose[1] + chin[1]) // 2)

            vx = head_mid[0] - eye_mid[0]
            vy = head_mid[1] - eye_mid[1]

            poi_dist = 150
            norm = (vx ** 2 + vy ** 2) ** 0.5
            if norm == 0:
                norm = 1
            vx_norm = vx / norm
            vy_norm = vy / norm

            poi_x = int(eye_mid[0] + vx_norm * poi_dist)
            poi_y = int(eye_mid[1] + vy_norm * poi_dist)

            # Draw bounding box and label
            cv2.rectangle(image, (x1, y1), (x2, y2), box_color, 2)
            cv2.putText(image, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            # Draw gaze lines from left_eye, right_eye, nose, chin, forehead to POI
            for pt in [left_eye, right_eye, nose, chin, forehead]:
                cv2.line(image, pt, (poi_x, poi_y), (0, 0, 255), 2)
                cv2.circle(image, pt, 4, (0, 255, 255), -1)  # mark landmarks in yellow

            # Draw POI
            cv2.circle(image, (poi_x, poi_y), 6, (0, 0, 255), -1)

        else:
            label_text = "No face"
            cv2.rectangle(image, (x1, y1), (x2, y2), (128, 128, 128), 2)
            cv2.putText(image, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

    # Save labels
    label_file = os.path.join(label_folder, file.rsplit('.', 1)[0] + '.txt')
    with open(label_file, 'w') as f:
        for line in label_lines:
            f.write(line + '\n')

    # Save visualized image
    out_path = os.path.join(visualized_folder, file)
    cv2.imwrite(out_path, image)

    print(f"{file}: {len(label_lines)} labels saved (Teacher direction: {teacher_dir})")


LABELING IMAGES = v4

FaceMesh 2

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

# --- Paths ---
input_folder = '/content/drive/MyDrive/Saif/img'
output_folder = '/content/drive/MyDrive/Saif/vvsual'

os.makedirs(output_folder, exist_ok=True)

# --- Load YOLOv8 face detector ---
model = YOLO('/content/drive/MyDrive/Saif/yolov8l-face-lindevs.pt')  # Make sure this path is correct!

# --- Setup MediaPipe Face Mesh ---
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True,
                                  max_num_faces=10,
                                  refine_landmarks=True,
                                  min_detection_confidence=0.5)

# --- Helper function: convert normalized landmarks to image coords ---
def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

# --- Process images ---
for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.png', '.jpeg')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue
    orig_h, orig_w = image.shape[:2]

    # --- Detect faces ---
    results = model(image)[0]
    boxes = results.boxes.xyxy.cpu().numpy()  # (x1, y1, x2, y2)

    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        # Add margin around face crop for better landmark detection
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]

        # Convert BGR to RGB for MediaPipe
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)

        # --- MediaPipe Face Mesh detection ---
        mp_results = face_mesh.process(face_rgb)
        if not mp_results.multi_face_landmarks:
            continue
        landmarks = mp_results.multi_face_landmarks[0]

        # --- Map key landmarks ---
        # Nose tip: 1
        # Chin: 152
        # Left eye left corner: 33
        # Right eye right corner: 263
        # Left forehead (approx): 10
        # Mouth left corner: 61
        # Mouth right corner: 291

        points_ids = {
            'nose_tip': 1,
            'chin': 152,
            'left_eye_outer': 33,
            'right_eye_outer': 263,
            'left_forehead': 10,
            'mouth_left': 61,
            'mouth_right': 291
        }

        pts = {}
        for name, idx in points_ids.items():
            pt = landmark_to_point(landmarks.landmark[idx], face_crop.shape)
            pts[name] = (pt[0] + x1m, pt[1] + y1m)  # Map back to original image coords

        # --- Draw face bounding box ---
        cv2.rectangle(image, (x1, y1), (x2, y2), (0,255,0), 2)

        # --- Draw gaze lines ---
        # For simplicity, draw lines from nose tip to forehead, chin, and eyes

        # Nose tip point
        p_nose = pts['nose_tip']

        # Forehead
        p_forehead = pts['left_forehead']
        cv2.line(image, p_nose, p_forehead, (255, 0, 0), 2)

        # Chin
        p_chin = pts['chin']
        cv2.line(image, p_nose, p_chin, (0, 255, 255), 2)

        # Left eye
        p_left_eye = pts['left_eye_outer']
        cv2.line(image, p_nose, p_left_eye, (0, 0, 255), 2)

        # Right eye
        p_right_eye = pts['right_eye_outer']
        cv2.line(image, p_nose, p_right_eye, (0, 0, 255), 2)

        # Draw points for visibility
        for p in pts.values():
            cv2.circle(image, p, 3, (0,0,255), -1)

    # --- Save result ---
    cv2.imwrite(os.path.join(output_folder, filename), image)

print("Done processing all images.")


LABELING IMAGES = v5

In [None]:
pip install ultralytics mediapipe opencv-python

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

# --- Paths ---
input_folder = '/content/drive/MyDrive/Saif/img'
output_folder = '/content/drive/MyDrive/Saif/visuuualll'
os.makedirs(output_folder, exist_ok=True)

# --- Load YOLOv8 face detector ---
model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt')

# --- Setup MediaPipe Face Mesh ---
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=10,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

# --- Helper: Convert landmark to image point ---
def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

# --- Process each image ---
for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]
    results = model(image)[0]
    boxes = results.boxes.xyxy.cpu().numpy()

    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        if not mp_results.multi_face_landmarks:
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                # Define landmark indices
                points_ids = {
                    'nose_tip': 1,
                    'chin': 152,
                    'left_eye_outer': 33,
                    'right_eye_outer': 263,
                    'left_forehead': 10,
                    'mouth_left': 61,
                    'mouth_right': 291
                }

                # Get landmark coordinates
                pts = {}
                for name, idx in points_ids.items():
                    pt = landmark_to_point(landmarks.landmark[idx], face_crop.shape)
                    pts[name] = (pt[0] + x1m, pt[1] + y1m)

                # Compute average direction to nose
                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                if norm < 1e-6:
                    continue
                avg_dir /= norm

                # Extended nose point
                nose_extended = (p_nose + avg_dir * 190).astype(int) #############################################################################

                # Define unique line colors per landmark
                line_colors = {
                    'left_forehead': (0, 255, 255),     # Yellow
                    'chin': (255, 0, 0),              # Blue
                    'left_eye_outer': (0, 0, 255),    # Red
                    'right_eye_outer': (255, 255, 0), # Cyan
                    'mouth_left': (255, 0, 255),      # Magenta
                    'mouth_right': (0, 165, 255)      # Orange
                }


                # Draw colored gaze lines
                for key, color in line_colors.items():
                    cv2.line(image, pts[key], tuple(nose_extended), color, 2)

                # Draw landmark points (without names)
                for pt in pts.values():
                    cv2.circle(image, pt, 3, (0, 0, 255), -1)

                # Highlight extended nose point
                cv2.circle(image, tuple(nose_extended), 4, (0, 0, 255), -1)

            except Exception as e:
                print(f"⚠️ Error processing landmarks in {filename}: {e}")
                continue

    # Save processed image
    output_path = os.path.join(output_folder, filename)
    cv2.imwrite(output_path, image)

print("✅ Done. Keypoint labels removed and multicolor gaze lines applied.")


LABELING IMAGES = v6

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

input_folder = '/content/drive/MyDrive/Saif/img'
output_folder = '/content/drive/MyDrive/Saif/yyy'
os.makedirs(output_folder, exist_ok=True)

face_model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt', verbose=False)
body_model = YOLO('yolov8n-pose.pt', verbose=False)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=30,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def cosine_similarity(v1, v2):
    if v1 is None or v2 is None:
        return 0.0
    dot = np.dot(v1, v2)
    norm_prod = np.linalg.norm(v1) * np.linalg.norm(v2)
    return dot / norm_prod if norm_prod > 1e-6 else 0.0

MIN_FACE_HEIGHT_FOR_DETAILED = 70
MIN_FACE_WIDTH_FOR_DETAILED = 50
SLEEPY_EYE_RATIO_THRESH = 0.01
TALKING_MOUTH_RATIO_THRESH = 2.8
GAZE_SIMILARITY_THRESH = 0.4

for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]
    body_results = body_model(image)[0]
    person_boxes = [b for b in body_results.boxes.data.cpu().numpy() if int(b[5]) == 0]

    face_results = face_model(image)[0]
    boxes = face_results.boxes.xyxy.cpu().numpy().astype(int)
    face_data = []

    for (x1, y1, x2, y2) in boxes:
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
        face_w, face_h = x2 - x1, y2 - y1

        if not mp_results.multi_face_landmarks:
            face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                points_ids = {
                    'nose_tip': 1, 'chin': 152,
                    'left_eye_outer': 33, 'right_eye_outer': 263,
                    'left_forehead': 10, 'mouth_left': 61,
                    'mouth_right': 291, 'left_eye_top': 159,
                    'left_eye_bottom': 145, 'right_eye_top': 386,
                    'right_eye_bottom': 374
                }

                pts = {
                    name: (landmark_to_point(landmarks.landmark[idx], face_crop.shape)[0] + x1m,
                           landmark_to_point(landmarks.landmark[idx], face_crop.shape)[1] + y1m)
                    for name, idx in points_ids.items()
                }

                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in
                                     ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                gaze_vec = avg_dir / norm if norm > 1e-6 else None
                nose_extended = (p_nose + gaze_vec * 190).astype(int) if gaze_vec is not None else p_nose

                for key, color in {
                    'left_forehead': (0, 255, 255),
                    'chin': (255, 0, 0),
                    'left_eye_outer': (0, 0, 255),
                    'right_eye_outer': (255, 255, 0),
                    'mouth_left': (255, 0, 255),
                    'mouth_right': (0, 165, 255)
                }.items():
                    if gaze_vec is not None:
                        cv2.line(image, pts[key], tuple(nose_extended), color, 2)

                def eye_ratio_calc():
                    left_eye_h = np.linalg.norm(np.array(pts['left_eye_top']) - np.array(pts['left_eye_bottom']))
                    left_eye_w = np.linalg.norm(np.array(pts['left_eye_outer']) - np.array(pts['left_forehead']))
                    right_eye_h = np.linalg.norm(np.array(pts['right_eye_top']) - np.array(pts['right_eye_bottom']))
                    right_eye_w = np.linalg.norm(np.array(pts['right_eye_outer']) - np.array(pts['left_forehead']))
                    left_ratio = left_eye_h / left_eye_w if left_eye_w > 1e-6 else 0
                    right_ratio = right_eye_h / right_eye_w if right_eye_w > 1e-6 else 0
                    return (left_ratio + right_ratio) / 2

                eye_ratio = eye_ratio_calc()
                mouth_w = np.linalg.norm(np.array(pts['mouth_right']) - np.array(pts['mouth_left']))
                mouth_h = np.linalg.norm(np.array(pts['chin']) - np.array(pts['nose_tip']))
                mouth_ratio = mouth_h / mouth_w if mouth_w > 1e-6 else 0

                face_data.append({
                    'center': face_center, 'label': 'Unknown', 'gaze_vector': gaze_vec,
                    'engagement_percent': None,
                    'eye_ratio': eye_ratio, 'mouth_ratio': mouth_ratio,
                    'face_w': face_w, 'face_h': face_h
                })

            except:
                face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
                continue

    valid_gaze_vectors = [f['gaze_vector'] for f in face_data if f['gaze_vector'] is not None]
    if valid_gaze_vectors:
        mean_gaze = np.mean(valid_gaze_vectors, axis=0)
        mean_gaze /= np.linalg.norm(mean_gaze)

        for f in face_data:
            gv = f['gaze_vector']
            if gv is not None:
                similarity = cosine_similarity(gv, mean_gaze)
                percent = int(similarity * 100)
                f['engagement_percent'] = percent

                if f['face_w'] < MIN_FACE_WIDTH_FOR_DETAILED or f['face_h'] < MIN_FACE_HEIGHT_FOR_DETAILED:
                    f['label'] = 'Engaged' if similarity >= GAZE_SIMILARITY_THRESH else 'Distracted'
                else:
                    if f['eye_ratio'] < SLEEPY_EYE_RATIO_THRESH:
                        f['label'] = 'Sleepy'
                    elif f['mouth_ratio'] > TALKING_MOUTH_RATIO_THRESH:
                        f['label'] = 'Talking'
                    elif similarity < GAZE_SIMILARITY_THRESH:
                        f['label'] = 'Distracted'
                    else:
                        f['label'] = 'Engaged'
            else:
                f['engagement_percent'] = None
                f['label'] = 'Turned'
    else:
        for f in face_data:
            f['engagement_percent'] = None
            f['label'] = 'Turned'

    for body in person_boxes:
        x1, y1, x2, y2, _, _ = map(int, body[:6])
        body_center = ((x1 + x2) // 2, (y1 + y2) // 2)

        closest_face = None
        min_dist = float('inf')
        for face in face_data:
            fx, fy = face['center']
            dist = np.linalg.norm(np.array([fx, fy]) - np.array(body_center))
            if dist < min_dist and dist < 150:
                min_dist = dist
                closest_face = face

        cv2.rectangle(image, (x1, y1), (x2, y2), (100, 200, 100), 2)

        if closest_face:
            label = closest_face['label']
            percent = closest_face.get('engagement_percent', None)
            text = f"{label} {percent}%" if percent is not None else label

            if label.lower() == 'engaged':
                color = (0, 255, 255)  # Yellow
            elif label.lower() == 'distracted':
                color = (0, 0, 255)    # Red
            else:
                color = (100, 200, 100)

            cv2.putText(image, text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    # ✅ Save labeled image for this file
    cv2.imwrite(os.path.join(output_folder, filename), image)

print("✅ All done. Check output folder for labeled images.")


LABELING IMAGES = v7

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

input_folder = '/content/drive/MyDrive/Saif/img'

output_folder = '/content/drive/MyDrive/Saif/percenttt'

os.makedirs(output_folder, exist_ok=True)

face_model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt')
body_model = YOLO('yolov8n-pose.pt')  # body detection model


mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=30,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def unit_vector(v):
    norm = np.linalg.norm(v)
    if norm < 1e-6:
        return None
    return v / norm

def cosine_similarity(v1, v2):
    dot = np.dot(v1, v2)
    norm_prod = np.linalg.norm(v1) * np.linalg.norm(v2)
    if norm_prod < 1e-6:
        return 0.0
    return dot / norm_prod

for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]

    # Detect bodies
    body_results = body_model(image)[0]
    person_boxes = [b for b in body_results.boxes.data.cpu().numpy() if int(b[5]) == 0]

    # Detect faces
    face_results = face_model(image)[0]
    boxes = face_results.boxes.xyxy.cpu().numpy().astype(int)

    face_data = []

    for (x1, y1, x2, y2) in boxes:
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        if not mp_results.multi_face_landmarks:
            face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
            # If no face landmarks detected, mark as Turned
            face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                points_ids = {
                    'nose_tip': 1,
                    'chin': 152,
                    'left_eye_outer': 33,
                    'right_eye_outer': 263,
                    'left_forehead': 10,
                    'mouth_left': 61,
                    'mouth_right': 291
                }

                pts = {
                    name: (landmark_to_point(landmarks.landmark[idx], face_crop.shape)[0] + x1m,
                           landmark_to_point(landmarks.landmark[idx], face_crop.shape)[1] + y1m)
                    for name, idx in points_ids.items()
                }

                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                if norm < 1e-6:
                    # Unable to calculate gaze vector - consider engaged by default
                    gaze_vec = None
                else:
                    gaze_vec = avg_dir / norm

                nose_extended = (p_nose + gaze_vec * 190).astype(int) if gaze_vec is not None else p_nose

                line_colors = {
                    'left_forehead': (0, 255, 255),
                    'chin': (255, 0, 0),
                    'left_eye_outer': (0, 0, 255),
                    'right_eye_outer': (255, 255, 0),
                    'mouth_left': (255, 0, 255),
                    'mouth_right': (0, 165, 255)
                }
                for key, color in line_colors.items():
                    if gaze_vec is not None:
                        cv2.line(image, pts[key], tuple(nose_extended), color, 2)
                for pt in pts.values():
                    cv2.circle(image, pt, 3, (0, 0, 255), -1)
                if gaze_vec is not None:
                    cv2.circle(image, tuple(nose_extended), 4, (0, 0, 255), -1)

                face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
                face_data.append({'center': face_center, 'label': 'Engaged', 'gaze_vector': gaze_vec, 'engagement_percent': None})

            except Exception as e:
                print(f"⚠️ Error in {filename}: {e}")
                continue

    # Calculate consensus gaze vector using average of non-None gaze vectors
    valid_gaze_vectors = [f['gaze_vector'] for f in face_data if f['gaze_vector'] is not None]

    if valid_gaze_vectors:
        # Calculate mean gaze vector
        mean_gaze = np.mean(valid_gaze_vectors, axis=0)
        mean_gaze /= np.linalg.norm(mean_gaze)

        # Compute cosine similarity of each gaze to mean gaze
        for f in face_data:
            gv = f['gaze_vector']
            if gv is not None:
                similarity = cosine_similarity(gv, mean_gaze)
                percent = int(similarity * 100)
                f['engagement_percent'] = percent
                # Threshold for engaged vs distracted
                if similarity < 0.5:
                    f['label'] = 'Distracted'
                else:
                    f['label'] = 'Engaged'
            else:
                f['engagement_percent'] = None
    else:
        # No valid gaze vectors: mark all as None
        for f in face_data:
            f['engagement_percent'] = None

    # Draw body boxes and labels (only if label != 'Unknown')
    for body in person_boxes:
        x1, y1, x2, y2, _, _ = map(int, body[:6])
        body_center = ((x1 + x2) // 2, (y1 + y2) // 2)

        closest_face = None
        min_dist = float('inf')
        for face in face_data:
            fx, fy = face['center']
            dist = np.linalg.norm(np.array([fx, fy]) - np.array(body_center))
            if dist < min_dist and dist < 150:
                min_dist = dist
                closest_face = face

        cv2.rectangle(image, (x1, y1), (x2, y2), (100, 200, 100), 2)

        if closest_face is not None:
            label = closest_face['label']
            percent = closest_face.get('engagement_percent', None)
            if percent is not None:
                text = f"{label}: {percent}%"
            else:
                text = f"{label}: ."

            if label != "Unknown":
                cv2.putText(image, text, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (100, 200, 100), 2)

    cv2.imwrite(os.path.join(output_folder, filename), image)

print("✅ Done. Improved gaze detection with cosine similarity, thresholding, and engagement percentages.")



0: 480x640 12 persons, 253.0ms
Speed: 12.0ms preprocess, 253.0ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 19 faces, 512.6ms
Speed: 5.4ms preprocess, 512.6ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 13 persons, 200.2ms
Speed: 5.8ms preprocess, 200.2ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 14 faces, 471.1ms
Speed: 5.4ms preprocess, 471.1ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 6 persons, 203.7ms
Speed: 5.5ms preprocess, 203.7ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 7 faces, 478.3ms
Speed: 5.9ms preprocess, 478.3ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 13 persons, 198.4ms
Speed: 6.3ms preprocess, 198.4ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 21 faces, 500.1ms
Speed: 4.9ms preprocess, 500.1ms inference, 1.3ms postprocess per i

This version with 5 class to label dataset 23 july morning.....
LABELING IMAGES = v8

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

input_folder = '/content/drive/MyDrive/Dataset/images/val'
output_folder = '/content/drive/MyDrive/Dataset/labels/val'
os.makedirs(output_folder, exist_ok=True)

face_model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt', verbose=False)
body_model = YOLO('yolov8n-pose.pt', verbose=False)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=30,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def cosine_similarity(v1, v2):
    if v1 is None or v2 is None:
        return 0.0
    dot = np.dot(v1, v2)
    norm_prod = np.linalg.norm(v1) * np.linalg.norm(v2)
    return dot / norm_prod if norm_prod > 1e-6 else 0.0

MIN_FACE_HEIGHT_FOR_DETAILED = 70
MIN_FACE_WIDTH_FOR_DETAILED = 50
SLEEPY_EYE_RATIO_THRESH = 0.01
TALKING_MOUTH_RATIO_THRESH = 2.8
GAZE_SIMILARITY_THRESH = 0.4

for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]
    body_results = body_model(image)[0]
    person_boxes = [b for b in body_results.boxes.data.cpu().numpy() if int(b[5]) == 0]

    face_results = face_model(image)[0]
    boxes = face_results.boxes.xyxy.cpu().numpy().astype(int)
    face_data = []

    for (x1, y1, x2, y2) in boxes:
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
        face_w, face_h = x2 - x1, y2 - y1

        if not mp_results.multi_face_landmarks:
            face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                points_ids = {
                    'nose_tip': 1, 'chin': 152,
                    'left_eye_outer': 33, 'right_eye_outer': 263,
                    'left_forehead': 10, 'mouth_left': 61,
                    'mouth_right': 291, 'left_eye_top': 159,
                    'left_eye_bottom': 145, 'right_eye_top': 386,
                    'right_eye_bottom': 374
                }

                pts = {
                    name: (landmark_to_point(landmarks.landmark[idx], face_crop.shape)[0] + x1m,
                           landmark_to_point(landmarks.landmark[idx], face_crop.shape)[1] + y1m)
                    for name, idx in points_ids.items()
                }

                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in
                                     ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                gaze_vec = avg_dir / norm if norm > 1e-6 else None

                def eye_ratio_calc():
                    left_eye_h = np.linalg.norm(np.array(pts['left_eye_top']) - np.array(pts['left_eye_bottom']))
                    left_eye_w = np.linalg.norm(np.array(pts['left_eye_outer']) - np.array(pts['left_forehead']))
                    right_eye_h = np.linalg.norm(np.array(pts['right_eye_top']) - np.array(pts['right_eye_bottom']))
                    right_eye_w = np.linalg.norm(np.array(pts['right_eye_outer']) - np.array(pts['left_forehead']))
                    left_ratio = left_eye_h / left_eye_w if left_eye_w > 1e-6 else 0
                    right_ratio = right_eye_h / right_eye_w if right_eye_w > 1e-6 else 0
                    return (left_ratio + right_ratio) / 2

                eye_ratio = eye_ratio_calc()
                mouth_w = np.linalg.norm(np.array(pts['mouth_right']) - np.array(pts['mouth_left']))
                mouth_h = np.linalg.norm(np.array(pts['chin']) - np.array(pts['nose_tip']))
                mouth_ratio = mouth_h / mouth_w if mouth_w > 1e-6 else 0

                face_data.append({
                    'center': face_center, 'label': 'Unknown', 'gaze_vector': gaze_vec,
                    'engagement_percent': None,
                    'eye_ratio': eye_ratio, 'mouth_ratio': mouth_ratio,
                    'face_w': face_w, 'face_h': face_h
                })

            except:
                face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
                continue

    valid_gaze_vectors = [f['gaze_vector'] for f in face_data if f['gaze_vector'] is not None]
    if valid_gaze_vectors:
        mean_gaze = np.mean(valid_gaze_vectors, axis=0)
        mean_gaze /= np.linalg.norm(mean_gaze)

        for f in face_data:
            gv = f['gaze_vector']
            if gv is not None:
                similarity = cosine_similarity(gv, mean_gaze)
                percent = int(similarity * 100)
                f['engagement_percent'] = percent

                if f['face_w'] < MIN_FACE_WIDTH_FOR_DETAILED or f['face_h'] < MIN_FACE_HEIGHT_FOR_DETAILED:
                    f['label'] = 'Engaged' if similarity >= GAZE_SIMILARITY_THRESH else 'Distracted'
                else:
                    if f['eye_ratio'] < SLEEPY_EYE_RATIO_THRESH:
                        f['label'] = 'Sleepy'
                    elif f['mouth_ratio'] > TALKING_MOUTH_RATIO_THRESH:
                        f['label'] = 'Talking'
                    elif similarity < GAZE_SIMILARITY_THRESH:
                        f['label'] = 'Distracted'
                    else:
                        f['label'] = 'Engaged'
            else:
                f['engagement_percent'] = None
                f['label'] = 'Turned'
    else:
        for f in face_data:
            f['engagement_percent'] = None
            f['label'] = 'Turned'

    # Save YOLO label file
    txt_filename = os.path.splitext(filename)[0] + ".txt"
    txt_path = os.path.join(output_folder, txt_filename)
    with open(txt_path, 'w') as f:
        for body in person_boxes:
            x1, y1, x2, y2, _, _ = map(int, body[:6])
            body_center = ((x1 + x2) // 2, (y1 + y2) // 2)

            closest_face = None
            min_dist = float('inf')
            for face in face_data:
                fx, fy = face['center']
                dist = np.linalg.norm(np.array([fx, fy]) - np.array(body_center))
                if dist < min_dist and dist < 150:
                    min_dist = dist
                    closest_face = face

            if closest_face:
                label = closest_face['label']
                class_id = {'Engaged': 0, 'Distracted': 1, 'Talking': 2, 'Sleepy': 3, 'Turned': 4}.get(label, 4)

                xc = (x1 + x2) / 2 / orig_w
                yc = (y1 + y2) / 2 / orig_h
                w = (x2 - x1) / orig_w
                h = (y2 - y1) / orig_h

                f.write(f"{class_id} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n")

print("✅ Label .txt files saved for all images in output folder.")


MODEL TRAINING..

In [None]:
!pip install git+https://github.com/ultralytics/ultralytics.git@main



In [None]:
from ultralytics import YOLO
# Load a base model
model = YOLO('yolo11m.pt')

# Train the model and save outputs directly to Drive
model.train(
    data='/content/drive/MyDrive/Dataset/data.yml',
    epochs=30,
    imgsz=640,
    batch=16,
    project='/content/drive/MyDrive/yolo_runs',  # <--- saves results in Drive
    name='Exp1'  # optional: custom experiment name
)


In [None]:
from ultralytics import YOLO

# Load the trained model
model = YOLO('/content/drive/MyDrive/yolo_runs/Exp1/weights/best.pt')  # Update path if needed

# Run inference on an image or folder
#results = model('/content/drive/MyDrive/Saif/test/1590.jpg', save=True)  # Single image

# Or on a folder
results = model('/content/drive/MyDrive/Saif/test', save=True)


In [12]:
import shutil
from google.colab import files

# Zip the folder (e.g., runs/detect/train)
shutil.make_archive('detect_resu', 'zip', '/content/runs/detect/predict2')  # Change the path if needed

# Download the zipped folder
files.download('detect_resu.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>