<a href="https://colab.research.google.com/github/IrfanESD/SE_Detection/blob/main/prj_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 !pip install opencv-python-headless

In [None]:
import cv2
import os

# === 🔧 Input: Set video path ===
video_path = '/content/drive/MyDrive/Clips/C0016.MP4'
output_folder = '/content/drive/MyDrive/clear_student_frames/MainSet14'
os.makedirs(output_folder, exist_ok=True)

# === 🧮 Parameters ===
sharpness_threshold = 100.0  # Higher = only clearer frames
sampling_interval = 30      # Sample every 30 frames

# === 🎬 Read and Process the Video ===
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("❌ Error: Could not open video.")
    exit()

# Get and print video FPS (optional, for info)
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"🎥 Video FPS: {fps}")

frame_count = 0
saved_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # Only sample every 120 frames
    if frame_count % sampling_interval != 0:
        continue

    # Convert frame to grayscale for sharpness detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()

    # Save only sharp frames
    if sharpness > sharpness_threshold:
        filename = os.path.join(output_folder, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(filename, frame)
        saved_count += 1
        print(f"✅ Saved frame {frame_count} | Sharpness: {sharpness:.2f}")

cap.release()

print(f"\n✅ Done: {saved_count} sharp frames saved to ➜ '{output_folder}'")


In [None]:
import os

# Replace this with your actual folder path in Drive
folder_path = '/content/drive/MyDrive/clear_student_frames/MainSet'

# Get list of image files
image_files = sorted([
    f for f in os.listdir(folder_path)
    if f.lower().endswith(('.jpg', '.jpeg', '.png'))
])

# Rename images as a1.jpg, a2.jpg, ...
for idx, filename in enumerate(image_files, start=1):
    ext = os.path.splitext(filename)[1]  # Keeps original file extension
    new_name = f'img{idx}{ext}'

    old_path = os.path.join(folder_path, filename)
    new_path = os.path.join(folder_path, new_name)

    os.rename(old_path, new_path)



print(f"✅ Renamed {len(image_files)} images to ..., img{idx}")


✅ Renamed 348 images to ..., img348


In [None]:
from google.colab import drive
import os
import glob

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Set path to the parent folder containing 10 folders
parent_dir = '/content/drive/MyDrive/clear_student_frames'  # ⬅️ Change this

# Step 3: Get list of all image paths from all subfolders
image_extensions = ('*.jpg', '*.jpeg', '*.png')  # Extend if needed
all_images = []

for ext in image_extensions:
    all_images.extend(glob.glob(os.path.join(parent_dir, '*', ext)))

# Optional: Sort for consistent ordering
all_images.sort()

# Step 4: Rename all images serially and print progress
start_serial = 1  # ⬅️ Change this if you want to start from a different number
renamed_count = 0

for idx, img_path in enumerate(all_images, start_serial):
    folder = os.path.dirname(img_path)
    ext = os.path.splitext(img_path)[1]
    new_name = f"{idx}{ext}"
    new_path = os.path.join(folder, new_name)

    try:
        os.rename(img_path, new_path)
        folder_name = os.path.basename(folder)
        print(f"{idx}: Renamed in '{folder_name}' → {new_name}")
        renamed_count += 1
    except Exception as e:
        print(f"⚠️ Error renaming '{img_path}': {e}")

# Step 5: Output total renamed images
print(f"\n✅ Total images renamed: {renamed_count}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
1: Renamed in 'MainSet1' → 1.jpg
2: Renamed in 'MainSet1' → 2.jpg
3: Renamed in 'MainSet1' → 3.jpg
4: Renamed in 'MainSet1' → 4.jpg
5: Renamed in 'MainSet1' → 5.jpg
6: Renamed in 'MainSet1' → 6.jpg
7: Renamed in 'MainSet1' → 7.jpg
8: Renamed in 'MainSet1' → 8.jpg
9: Renamed in 'MainSet1' → 9.jpg
10: Renamed in 'MainSet1' → 10.jpg
11: Renamed in 'MainSet1' → 11.jpg
12: Renamed in 'MainSet1' → 12.jpg
13: Renamed in 'MainSet1' → 13.jpg
14: Renamed in 'MainSet1' → 14.jpg
15: Renamed in 'MainSet1' → 15.jpg
16: Renamed in 'MainSet1' → 16.jpg
17: Renamed in 'MainSet1' → 17.jpg
18: Renamed in 'MainSet1' → 18.jpg
19: Renamed in 'MainSet1' → 19.jpg
20: Renamed in 'MainSet1' → 20.jpg
21: Renamed in 'MainSet1' → 21.jpg
22: Renamed in 'MainSet1' → 22.jpg
23: Renamed in 'MainSet1' → 23.jpg
24: Renamed in 'MainSet1' → 24.jpg
25: Renamed in 'MainSet1' → 25.jpg
26: Renamed in

In [None]:
import os
import shutil

# 🔁 Replace these with the actual 6 folder paths
source_folders = [
    '/content/drive/MyDrive/clear_student_frames/SET01',
    '/content/drive/MyDrive/clear_student_frames/SET02',
    '/content/drive/MyDrive/clear_student_frames/SET03',
    '/content/drive/MyDrive/clear_student_frames/SET04',
    '/content/drive/MyDrive/clear_student_frames/SET05',
    '/content/drive/MyDrive/clear_student_frames/SET06'
]

# 🗂️ Target folder where all images will be copied
target_folder = '/content/drive/MyDrive/clear_student_frames/merged_set'
os.makedirs(target_folder, exist_ok=True)

# 📦 Copy files from each folder
for folder in source_folders:
    for file in os.listdir(folder):
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            src = os.path.join(folder, file)
            dst = os.path.join(target_folder, file)
            if not os.path.exists(dst):  # Avoid overwriting duplicates
                shutil.copy(src, dst)
            else:
                print(f"⚠️ Skipped duplicate: {file}")

print("✅ All images merged into:", target_folder)


✅ All images merged into: /content/drive/MyDrive/clear_student_frames/merged_set


In [None]:
from google.colab import drive
import os
import glob
import shutil

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Set source parent folder and target merge folder
source_parent = '/content/drive/MyDrive/clear_student_frames'  # ⬅️ contains the 10 folders
target_folder = '/content/drive/MyDrive/MMimages'               # ⬅️ all images will go here

# Step 3: Create target folder if it doesn't exist
os.makedirs(target_folder, exist_ok=True)

# Step 4: Collect all images from the 10 folders
image_extensions = ('*.jpg', '*.jpeg', '*.png')  # Add others if needed
all_images = []

for ext in image_extensions:
    all_images.extend(glob.glob(os.path.join(source_parent, '*', ext)))

# Optional: Sort for consistent order
all_images.sort()

# Step 5: Copy and rename to the target folder
start_serial = 1  # Change if needed
copied_count = 0

for idx, img_path in enumerate(all_images, start_serial):
    ext = os.path.splitext(img_path)[1]
    new_name = f"{idx}{ext}"
    new_path = os.path.join(target_folder, new_name)

    try:
        shutil.copy(img_path, new_path)
        folder_name = os.path.basename(os.path.dirname(img_path))
        print(f"{idx}: Copied from '{folder_name}' → {new_name}")
        copied_count += 1
    except Exception as e:
        print(f"⚠️ Error copying '{img_path}': {e}")

# Step 6: Output total copied images
print(f"\n✅ Total images copied and renamed: {copied_count}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
1: Copied from 'MainSet1' → 1.jpg
2: Copied from 'MainSet1' → 2.jpg
3: Copied from 'MainSet1' → 3.jpg
4: Copied from 'MainSet1' → 4.jpg
5: Copied from 'MainSet1' → 5.jpg
6: Copied from 'MainSet1' → 6.jpg
7: Copied from 'MainSet1' → 7.jpg
8: Copied from 'MainSet1' → 8.jpg
9: Copied from 'MainSet1' → 9.jpg
10: Copied from 'MainSet1' → 10.jpg
11: Copied from 'MainSet1' → 11.jpg
12: Copied from 'MainSet1' → 12.jpg
13: Copied from 'MainSet1' → 13.jpg
14: Copied from 'MainSet1' → 14.jpg
15: Copied from 'MainSet1' → 15.jpg
16: Copied from 'MainSet1' → 16.jpg
17: Copied from 'MainSet1' → 17.jpg
18: Copied from 'MainSet1' → 18.jpg
19: Copied from 'MainSet1' → 19.jpg
20: Copied from 'MainSet1' → 20.jpg
21: Copied from 'MainSet1' → 21.jpg
22: Copied from 'MainSet1' → 22.jpg
23: Copied from 'MainSet1' → 23.jpg
24: Copied from 'MainSet1' → 24.jpg
25: Copied from 'MainSet1

In [None]:
pip install ultralytics mediapipe opencv-python

In [None]:
import os
import cv2
import math
from collections import Counter
from ultralytics import YOLO
import mediapipe as mp

# Paths
image_folder = '/content/drive/MyDrive/Saif/images'
label_folder = '/content/drive/MyDrive/Saif/labels'
visual_folder = '/content/drive/MyDrive/Saif/labeled_images'

os.makedirs(label_folder, exist_ok=True)
os.makedirs(visual_folder, exist_ok=True)

# Load YOLOv8 and MediaPipe
model = YOLO('yolov8n.pt')
pose = mp.solutions.pose.Pose(static_image_mode=True, model_complexity=2)

# Class names: 0 = Engaged, 1 = Distracted
class_names = ['Engaged', 'Distracted']

# Estimate head direction based on eyes
def estimate_head_direction(landmarks):
    try:
        left_eye = landmarks[2]
        right_eye = landmarks[5]
        dx = left_eye.x - right_eye.x
        if dx > 0.04:
            return 'left'
        elif dx < -0.04:
            return 'right'
        else:
            return 'front'
    except:
        return 'unknown'

# Classify using head direction compared to majority (teacher_dir)
def classify_posture_binary(landmarks, teacher_dir):
    direction = estimate_head_direction(landmarks)
    if direction == teacher_dir:
        return 0  # Engaged
    else:
        return 1  # Distracted

# Process images
for file in os.listdir(image_folder):
    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_folder, file)
    image = cv2.imread(image_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]
    results = model(image_path)[0]
    boxes = results.boxes.data

    all_directions = []
    temp_landmarks = []
    temp_boxes = []

    for box in boxes:
        cls = int(box[5])
        if cls != 0:
            continue  # only person class

        x1, y1, x2, y2 = map(int, box[:4])
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(img_w, x2), min(img_h, y2)

        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue

        try:
            crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
            pose_result = pose.process(crop_rgb)
        except:
            pose_result = None

        if pose_result and pose_result.pose_landmarks:
            landmarks = pose_result.pose_landmarks.landmark
            dir = estimate_head_direction(landmarks)
            all_directions.append(dir)
            temp_landmarks.append(landmarks)
            temp_boxes.append((x1, y1, x2, y2))
        else:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))

    # Decide teacher direction (majority vote)
    teacher_dir = 'front'
    if all_directions:
        teacher_dir = Counter(all_directions).most_common(1)[0][0]

    label_lines = []
    engaged_count = 0
    total_count = 0

    for i, landmarks in enumerate(temp_landmarks):
        x1, y1, x2, y2 = temp_boxes[i]

        if landmarks:
            posture_class = classify_posture_binary(landmarks, teacher_dir)

            if posture_class == 0:
                engaged_count += 1
            total_count += 1

            x_center = ((x1 + x2) / 2) / img_w
            y_center = ((y1 + y2) / 2) / img_h
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h

            label_line = f"{posture_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

            # Draw bounding box with label
            color = (0, 255, 0) if posture_class == 0 else (0, 0, 255)
            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(image, class_names[posture_class], (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        else:
            # Pose not detected
            cv2.rectangle(image, (x1, y1), (x2, y2), (128, 128, 128), 2)
            cv2.putText(image, 'no pose', (x1, y1 - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128, 128, 128), 1)

    # Save YOLO-format label file
    label_file = os.path.join(label_folder, file.rsplit('.', 1)[0] + '.txt')
    with open(label_file, 'w') as f:
        for line in label_lines:
            f.write(line + '\n')

    # Save annotated image
    visual_path = os.path.join(visual_folder, file)
    cv2.imwrite(visual_path, image)

    # Print engagement stats
    engagement_percent = (engaged_count / total_count * 100) if total_count > 0 else 0
    print(f"{file}: {engaged_count}/{total_count} engaged ({engagement_percent:.1f}%) - Teacher direction: {teacher_dir}")



image 1/1 /content/drive/MyDrive/Saif/images/img1.jpg: 384x640 23 persons, 2 bottles, 4 chairs, 4 laptops, 436.8ms
Speed: 17.9ms preprocess, 436.8ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)
img1.jpg: 19/21 engaged (90.5%) - Teacher direction: left

image 1/1 /content/drive/MyDrive/Saif/images/img2.jpg: 384x640 24 persons, 2 bottles, 1 cup, 3 chairs, 4 laptops, 151.5ms
Speed: 4.5ms preprocess, 151.5ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
img2.jpg: 20/21 engaged (95.2%) - Teacher direction: left

image 1/1 /content/drive/MyDrive/Saif/images/img3.jpg: 384x640 24 persons, 2 bottles, 1 cup, 2 chairs, 3 laptops, 154.1ms
Speed: 3.2ms preprocess, 154.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)
img3.jpg: 19/21 engaged (90.5%) - Teacher direction: left

image 1/1 /content/drive/MyDrive/Saif/images/img4.jpg: 384x640 24 persons, 2 bottles, 1 cup, 2 chairs, 2 laptops, 216.7ms
Speed: 4.5ms preprocess, 216.7ms inference, 2.

In [None]:
import os
import random
import shutil

# === Paths ===
image_dir = '/content/drive/MyDrive/MMimages'
output_base = '/content/drive/MyDrive/image_split'

# Create output folders
splits = ['train', 'val', 'test']
for split in splits:
    os.makedirs(os.path.join(output_base, split), exist_ok=True)

# Get and shuffle image files
all_images = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
random.seed(42)  # For reproducibility
random.shuffle(all_images)

# Define splits
train_imgs = all_images[:1500]
val_imgs = all_images[1500:1900]
test_imgs = all_images[1900:]  # all remaining images

# Function to move images
def move_images(file_list, split_name):
    for file in file_list:
        src = os.path.join(image_dir, file)
        dst = os.path.join(output_base, split_name, file)
        shutil.move(src, dst)

# Move images to folders
move_images(train_imgs, 'train')
move_images(val_imgs, 'val')
move_images(test_imgs, 'test')

# Report
print(f"✅ Done: {len(train_imgs)} train, {len(val_imgs)} val, {len(test_imgs)} test images MOVED to '{output_base}'")


✅ Done: 1500 train, 400 val, 142 test images MOVED to '/content/drive/MyDrive/image_split'


In [None]:
import os
import cv2
import math
from collections import Counter
from ultralytics import YOLO
import mediapipe as mp

# Paths
image_folder = '/content/drive/MyDrive/Dataset/images/val'
label_folder = '/content/drive/MyDrive/Dataset/labels/val'

os.makedirs(label_folder, exist_ok=True)

# Load YOLOv8 and MediaPipe
model = YOLO('yolov8n.pt')
pose = mp.solutions.pose.Pose(static_image_mode=True, model_complexity=2)

# Estimate head direction based on eyes
def estimate_head_direction(landmarks):
    try:
        left_eye = landmarks[2]
        right_eye = landmarks[5]
        dx = left_eye.x - right_eye.x
        if dx > 0.04:
            return 'left'
        elif dx < -0.04:
            return 'right'
        else:
            return 'front'
    except:
        return 'unknown'

# Classify using head direction compared to majority (teacher_dir)
def classify_posture_binary(landmarks, teacher_dir):
    direction = estimate_head_direction(landmarks)
    return 0 if direction == teacher_dir else 1  # 0: Engaged, 1: Distracted

# Process images
for file in os.listdir(image_folder):
    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_folder, file)
    image = cv2.imread(image_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]
    results = model(image_path)[0]
    boxes = results.boxes.data

    all_directions = []
    temp_landmarks = []
    temp_boxes = []

    for box in boxes:
        cls = int(box[5])
        if cls != 0:
            continue  # only person class

        x1, y1, x2, y2 = map(int, box[:4])
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(img_w, x2), min(img_h, y2)

        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            continue

        try:
            crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
            pose_result = pose.process(crop_rgb)
        except:
            pose_result = None

        if pose_result and pose_result.pose_landmarks:
            landmarks = pose_result.pose_landmarks.landmark
            dir = estimate_head_direction(landmarks)
            all_directions.append(dir)
            temp_landmarks.append(landmarks)
            temp_boxes.append((x1, y1, x2, y2))
        else:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))

    # Determine teacher direction by majority
    teacher_dir = 'front'
    if all_directions:
        teacher_dir = Counter(all_directions).most_common(1)[0][0]

    label_lines = []

    for i, landmarks in enumerate(temp_landmarks):
        x1, y1, x2, y2 = temp_boxes[i]

        if landmarks:
            posture_class = classify_posture_binary(landmarks, teacher_dir)

            x_center = ((x1 + x2) / 2) / img_w
            y_center = ((y1 + y2) / 2) / img_h
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h

            label_line = f"{posture_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

    # Save YOLO-format label file
    label_file = os.path.join(label_folder, file.rsplit('.', 1)[0] + '.txt')
    with open(label_file, 'w') as f:
        for line in label_lines:
            f.write(line + '\n')

    print(f"{file}: {len(label_lines)} labels saved (Teacher direction: {teacher_dir})")



image 1/1 /content/drive/MyDrive/Dataset/images/val/1044.jpg: 384x640 12 persons, 15 chairs, 3 dining tables, 288.1ms
Speed: 5.6ms preprocess, 288.1ms inference, 4.0ms postprocess per image at shape (1, 3, 384, 640)
1044.jpg: 10 labels saved (Teacher direction: left)

image 1/1 /content/drive/MyDrive/Dataset/images/val/1047.jpg: 384x640 12 persons, 17 chairs, 3 dining tables, 158.6ms
Speed: 3.5ms preprocess, 158.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
1047.jpg: 11 labels saved (Teacher direction: left)

image 1/1 /content/drive/MyDrive/Dataset/images/val/1049.jpg: 384x640 14 persons, 16 chairs, 3 dining tables, 145.4ms
Speed: 3.4ms preprocess, 145.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
1049.jpg: 13 labels saved (Teacher direction: left)

image 1/1 /content/drive/MyDrive/Dataset/images/val/1050.jpg: 384x640 11 persons, 17 chairs, 3 dining tables, 142.7ms
Speed: 3.1ms preprocess, 142.7ms inference, 1.9ms postprocess per image at

.....................................................

FaceMesh 1st code

In [None]:
import os
import cv2
from collections import Counter
from ultralytics import YOLO
import mediapipe as mp

# Paths
image_folder = '/content/drive/MyDrive/Saif/img'
label_folder = '/content/drive/MyDrive/Saif/labels'
visualized_folder = '/content/drive/MyDrive/Saif/visual'

os.makedirs(label_folder, exist_ok=True)
os.makedirs(visualized_folder, exist_ok=True)

# Load YOLOv8 model for person detection
model = YOLO('yolov8n.pt')

# Initialize MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True)

def estimate_head_direction(landmarks):
    try:
        left_eye = landmarks[33]
        right_eye = landmarks[263]
        dx = left_eye.x - right_eye.x
        if dx > 0.04:
            return 'left'
        elif dx < -0.04:
            return 'right'
        else:
            return 'front'
    except:
        return 'unknown'

def classify_posture_binary(landmarks, teacher_dir):
    direction = estimate_head_direction(landmarks)
    return 0 if direction == teacher_dir else 1  # 0: Engaged, 1: Distracted

# Process all images
for file in os.listdir(image_folder):
    if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    image_path = os.path.join(image_folder, file)
    image = cv2.imread(image_path)
    if image is None:
        continue

    img_h, img_w = image.shape[:2]

    results = model(image_path)[0]
    boxes = results.boxes.data

    all_directions = []
    temp_landmarks = []
    temp_boxes = []

    for box in boxes:
        cls = int(box[5])
        if cls != 0:
            continue

        x1, y1, x2, y2 = map(int, box[:4])
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(img_w, x2), min(img_h, y2)

        person_crop = image[y1:y2, x1:x2]
        if person_crop.size == 0:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))
            continue

        try:
            crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
            face_result = face_mesh.process(crop_rgb)
        except:
            face_result = None

        if face_result and face_result.multi_face_landmarks:
            landmarks = face_result.multi_face_landmarks[0].landmark
            direction = estimate_head_direction(landmarks)
            all_directions.append(direction)
            temp_landmarks.append(landmarks)
            temp_boxes.append((x1, y1, x2, y2))
        else:
            temp_landmarks.append(None)
            temp_boxes.append((x1, y1, x2, y2))

    teacher_dir = 'front'
    if all_directions:
        teacher_dir = Counter(all_directions).most_common(1)[0][0]

    label_lines = []

    for i, landmarks in enumerate(temp_landmarks):
        x1, y1, x2, y2 = temp_boxes[i]
        box_color = (0, 255, 0)
        label_text = 'unknown'

        if landmarks:
            posture_class = classify_posture_binary(landmarks, teacher_dir)
            direction = estimate_head_direction(landmarks)
            label_text = f"{direction}, {'Engaged' if posture_class == 0 else 'Distracted'}"
            box_color = (0, 255, 0) if posture_class == 0 else (0, 0, 255)

            x_center = ((x1 + x2) / 2) / img_w
            y_center = ((y1 + y2) / 2) / img_h
            w = (x2 - x1) / img_w
            h = (y2 - y1) / img_h
            label_line = f"{posture_class} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            label_lines.append(label_line)

            # Get landmarks in absolute coordinates
            def to_abs(lm):
                return (int(x1 + lm.x * (x2 - x1)), int(y1 + lm.y * (y2 - y1)))

            left_eye = to_abs(landmarks[33])
            right_eye = to_abs(landmarks[263])
            nose = to_abs(landmarks[1])
            chin = to_abs(landmarks[152])
            forehead = to_abs(landmarks[10])  # Forehead landmark

            # Calculate midpoints for reference
            eye_mid = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2)
            head_mid = ((nose[0] + chin[0]) // 2, (nose[1] + chin[1]) // 2)

            vx = head_mid[0] - eye_mid[0]
            vy = head_mid[1] - eye_mid[1]

            poi_dist = 150
            norm = (vx ** 2 + vy ** 2) ** 0.5
            if norm == 0:
                norm = 1
            vx_norm = vx / norm
            vy_norm = vy / norm

            poi_x = int(eye_mid[0] + vx_norm * poi_dist)
            poi_y = int(eye_mid[1] + vy_norm * poi_dist)

            # Draw bounding box and label
            cv2.rectangle(image, (x1, y1), (x2, y2), box_color, 2)
            cv2.putText(image, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            # Draw gaze lines from left_eye, right_eye, nose, chin, forehead to POI
            for pt in [left_eye, right_eye, nose, chin, forehead]:
                cv2.line(image, pt, (poi_x, poi_y), (0, 0, 255), 2)
                cv2.circle(image, pt, 4, (0, 255, 255), -1)  # mark landmarks in yellow

            # Draw POI
            cv2.circle(image, (poi_x, poi_y), 6, (0, 0, 255), -1)

        else:
            label_text = "No face"
            cv2.rectangle(image, (x1, y1), (x2, y2), (128, 128, 128), 2)
            cv2.putText(image, label_text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

    # Save labels
    label_file = os.path.join(label_folder, file.rsplit('.', 1)[0] + '.txt')
    with open(label_file, 'w') as f:
        for line in label_lines:
            f.write(line + '\n')

    # Save visualized image
    out_path = os.path.join(visualized_folder, file)
    cv2.imwrite(out_path, image)

    print(f"{file}: {len(label_lines)} labels saved (Teacher direction: {teacher_dir})")



image 1/1 /content/drive/MyDrive/Saif/img/img4.JPG: 480x640 17 persons, 7 chairs, 164.2ms
Speed: 3.9ms preprocess, 164.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)
img4.JPG: 13 labels saved (Teacher direction: right)

image 1/1 /content/drive/MyDrive/Saif/img/img7.JPG: 480x640 12 persons, 1 surfboard, 5 chairs, 231.8ms
Speed: 5.5ms preprocess, 231.8ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)
img7.JPG: 5 labels saved (Teacher direction: right)

image 1/1 /content/drive/MyDrive/Saif/img/img19.JPG: 480x640 9 persons, 5 chairs, 1 couch, 1 dining table, 2 tvs, 2 laptops, 225.6ms
Speed: 5.3ms preprocess, 225.6ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)
img19.JPG: 7 labels saved (Teacher direction: right)

image 1/1 /content/drive/MyDrive/Saif/img/img26.JPG: 480x640 18 persons, 1 tie, 1 chair, 2 laptops, 3 books, 232.4ms
Speed: 5.3ms preprocess, 232.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)
img

2nd version

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

# --- Paths ---
input_folder = '/content/drive/MyDrive/Saif/img'
output_folder = '/content/drive/MyDrive/Saif/vvsual'

os.makedirs(output_folder, exist_ok=True)

# --- Load YOLOv8 face detector ---
model = YOLO('/content/drive/MyDrive/Saif/yolov8l-face-lindevs.pt')  # Make sure this path is correct!

# --- Setup MediaPipe Face Mesh ---
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True,
                                  max_num_faces=10,
                                  refine_landmarks=True,
                                  min_detection_confidence=0.5)

# --- Helper function: convert normalized landmarks to image coords ---
def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

# --- Process images ---
for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.png', '.jpeg')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue
    orig_h, orig_w = image.shape[:2]

    # --- Detect faces ---
    results = model(image)[0]
    boxes = results.boxes.xyxy.cpu().numpy()  # (x1, y1, x2, y2)

    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        # Add margin around face crop for better landmark detection
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]

        # Convert BGR to RGB for MediaPipe
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)

        # --- MediaPipe Face Mesh detection ---
        mp_results = face_mesh.process(face_rgb)
        if not mp_results.multi_face_landmarks:
            continue
        landmarks = mp_results.multi_face_landmarks[0]

        # --- Map key landmarks ---
        # Nose tip: 1
        # Chin: 152
        # Left eye left corner: 33
        # Right eye right corner: 263
        # Left forehead (approx): 10
        # Mouth left corner: 61
        # Mouth right corner: 291

        points_ids = {
            'nose_tip': 1,
            'chin': 152,
            'left_eye_outer': 33,
            'right_eye_outer': 263,
            'left_forehead': 10,
            'mouth_left': 61,
            'mouth_right': 291
        }

        pts = {}
        for name, idx in points_ids.items():
            pt = landmark_to_point(landmarks.landmark[idx], face_crop.shape)
            pts[name] = (pt[0] + x1m, pt[1] + y1m)  # Map back to original image coords

        # --- Draw face bounding box ---
        cv2.rectangle(image, (x1, y1), (x2, y2), (0,255,0), 2)

        # --- Draw gaze lines ---
        # For simplicity, draw lines from nose tip to forehead, chin, and eyes

        # Nose tip point
        p_nose = pts['nose_tip']

        # Forehead
        p_forehead = pts['left_forehead']
        cv2.line(image, p_nose, p_forehead, (255, 0, 0), 2)

        # Chin
        p_chin = pts['chin']
        cv2.line(image, p_nose, p_chin, (0, 255, 255), 2)

        # Left eye
        p_left_eye = pts['left_eye_outer']
        cv2.line(image, p_nose, p_left_eye, (0, 0, 255), 2)

        # Right eye
        p_right_eye = pts['right_eye_outer']
        cv2.line(image, p_nose, p_right_eye, (0, 0, 255), 2)

        # Draw points for visibility
        for p in pts.values():
            cv2.circle(image, p, 3, (0,0,255), -1)

    # --- Save result ---
    cv2.imwrite(os.path.join(output_folder, filename), image)

print("Done processing all images.")



0: 480x640 20 faces, 2077.6ms
Speed: 5.8ms preprocess, 2077.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 16 faces, 1903.6ms
Speed: 5.0ms preprocess, 1903.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 6 faces, 2002.6ms
Speed: 4.8ms preprocess, 2002.6ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 20 faces, 3043.6ms
Speed: 6.9ms preprocess, 3043.6ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 17 faces, 2339.9ms
Speed: 5.0ms preprocess, 2339.9ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 16 faces, 1859.6ms
Speed: 5.1ms preprocess, 1859.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 384x640 18 faces, 1517.8ms
Speed: 4.2ms preprocess, 1517.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 faces, 1519.4ms
Speed: 3.8ms preprocess, 1519.4ms inference, 1.0ms postproce

FUll and final version

In [None]:
pip install ultralytics mediapipe opencv-python

Improved...............

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

# --- Paths ---
input_folder = '/content/drive/MyDrive/Saif/img'
output_folder = '/content/drive/MyDrive/Saif/visuuualll'
os.makedirs(output_folder, exist_ok=True)

# --- Load YOLOv8 face detector ---
model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt')

# --- Setup MediaPipe Face Mesh ---
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=10,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

# --- Helper: Convert landmark to image point ---
def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

# --- Process each image ---
for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]
    results = model(image)[0]
    boxes = results.boxes.xyxy.cpu().numpy()

    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        if not mp_results.multi_face_landmarks:
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                # Define landmark indices
                points_ids = {
                    'nose_tip': 1,
                    'chin': 152,
                    'left_eye_outer': 33,
                    'right_eye_outer': 263,
                    'left_forehead': 10,
                    'mouth_left': 61,
                    'mouth_right': 291
                }

                # Get landmark coordinates
                pts = {}
                for name, idx in points_ids.items():
                    pt = landmark_to_point(landmarks.landmark[idx], face_crop.shape)
                    pts[name] = (pt[0] + x1m, pt[1] + y1m)

                # Compute average direction to nose
                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                if norm < 1e-6:
                    continue
                avg_dir /= norm

                # Extended nose point
                nose_extended = (p_nose + avg_dir * 190).astype(int) #############################################################################

                # Define unique line colors per landmark
                line_colors = {
                    'left_forehead': (0, 255, 255),     # Yellow
                    'chin': (255, 0, 0),              # Blue
                    'left_eye_outer': (0, 0, 255),    # Red
                    'right_eye_outer': (255, 255, 0), # Cyan
                    'mouth_left': (255, 0, 255),      # Magenta
                    'mouth_right': (0, 165, 255)      # Orange
                }


                # Draw colored gaze lines
                for key, color in line_colors.items():
                    cv2.line(image, pts[key], tuple(nose_extended), color, 2)

                # Draw landmark points (without names)
                for pt in pts.values():
                    cv2.circle(image, pt, 3, (0, 0, 255), -1)

                # Highlight extended nose point
                cv2.circle(image, tuple(nose_extended), 4, (0, 0, 255), -1)

            except Exception as e:
                print(f"⚠️ Error processing landmarks in {filename}: {e}")
                continue

    # Save processed image
    output_path = os.path.join(output_folder, filename)
    cv2.imwrite(output_path, image)

print("✅ Done. Keypoint labels removed and multicolor gaze lines applied.")


2D2D2D2D2D2D2D2D2D.......................

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

input_folder = '/content/drive/MyDrive/Saif/img'
output_folder = '/content/drive/MyDrive/Saif/yyy'
os.makedirs(output_folder, exist_ok=True)

face_model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt', verbose=False)
body_model = YOLO('yolov8n-pose.pt', verbose=False)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=30,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def cosine_similarity(v1, v2):
    if v1 is None or v2 is None:
        return 0.0
    dot = np.dot(v1, v2)
    norm_prod = np.linalg.norm(v1) * np.linalg.norm(v2)
    return dot / norm_prod if norm_prod > 1e-6 else 0.0

MIN_FACE_HEIGHT_FOR_DETAILED = 70
MIN_FACE_WIDTH_FOR_DETAILED = 50
SLEEPY_EYE_RATIO_THRESH = 0.01
TALKING_MOUTH_RATIO_THRESH = 2.8
GAZE_SIMILARITY_THRESH = 0.4

for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]
    body_results = body_model(image)[0]
    person_boxes = [b for b in body_results.boxes.data.cpu().numpy() if int(b[5]) == 0]

    face_results = face_model(image)[0]
    boxes = face_results.boxes.xyxy.cpu().numpy().astype(int)
    face_data = []

    for (x1, y1, x2, y2) in boxes:
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
        face_w, face_h = x2 - x1, y2 - y1

        if not mp_results.multi_face_landmarks:
            face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                points_ids = {
                    'nose_tip': 1, 'chin': 152,
                    'left_eye_outer': 33, 'right_eye_outer': 263,
                    'left_forehead': 10, 'mouth_left': 61,
                    'mouth_right': 291, 'left_eye_top': 159,
                    'left_eye_bottom': 145, 'right_eye_top': 386,
                    'right_eye_bottom': 374
                }

                pts = {
                    name: (landmark_to_point(landmarks.landmark[idx], face_crop.shape)[0] + x1m,
                           landmark_to_point(landmarks.landmark[idx], face_crop.shape)[1] + y1m)
                    for name, idx in points_ids.items()
                }

                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in
                                     ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                gaze_vec = avg_dir / norm if norm > 1e-6 else None
                nose_extended = (p_nose + gaze_vec * 190).astype(int) if gaze_vec is not None else p_nose

                for key, color in {
                    'left_forehead': (0, 255, 255),
                    'chin': (255, 0, 0),
                    'left_eye_outer': (0, 0, 255),
                    'right_eye_outer': (255, 255, 0),
                    'mouth_left': (255, 0, 255),
                    'mouth_right': (0, 165, 255)
                }.items():
                    if gaze_vec is not None:
                        cv2.line(image, pts[key], tuple(nose_extended), color, 2)

                def eye_ratio_calc():
                    left_eye_h = np.linalg.norm(np.array(pts['left_eye_top']) - np.array(pts['left_eye_bottom']))
                    left_eye_w = np.linalg.norm(np.array(pts['left_eye_outer']) - np.array(pts['left_forehead']))
                    right_eye_h = np.linalg.norm(np.array(pts['right_eye_top']) - np.array(pts['right_eye_bottom']))
                    right_eye_w = np.linalg.norm(np.array(pts['right_eye_outer']) - np.array(pts['left_forehead']))
                    left_ratio = left_eye_h / left_eye_w if left_eye_w > 1e-6 else 0
                    right_ratio = right_eye_h / right_eye_w if right_eye_w > 1e-6 else 0
                    return (left_ratio + right_ratio) / 2

                eye_ratio = eye_ratio_calc()
                mouth_w = np.linalg.norm(np.array(pts['mouth_right']) - np.array(pts['mouth_left']))
                mouth_h = np.linalg.norm(np.array(pts['chin']) - np.array(pts['nose_tip']))
                mouth_ratio = mouth_h / mouth_w if mouth_w > 1e-6 else 0

                face_data.append({
                    'center': face_center, 'label': 'Unknown', 'gaze_vector': gaze_vec,
                    'engagement_percent': None,
                    'eye_ratio': eye_ratio, 'mouth_ratio': mouth_ratio,
                    'face_w': face_w, 'face_h': face_h
                })

            except:
                face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
                continue

    valid_gaze_vectors = [f['gaze_vector'] for f in face_data if f['gaze_vector'] is not None]
    if valid_gaze_vectors:
        mean_gaze = np.mean(valid_gaze_vectors, axis=0)
        mean_gaze /= np.linalg.norm(mean_gaze)

        for f in face_data:
            gv = f['gaze_vector']
            if gv is not None:
                similarity = cosine_similarity(gv, mean_gaze)
                percent = int(similarity * 100)
                f['engagement_percent'] = percent

                if f['face_w'] < MIN_FACE_WIDTH_FOR_DETAILED or f['face_h'] < MIN_FACE_HEIGHT_FOR_DETAILED:
                    f['label'] = 'Engaged' if similarity >= GAZE_SIMILARITY_THRESH else 'Distracted'
                else:
                    if f['eye_ratio'] < SLEEPY_EYE_RATIO_THRESH:
                        f['label'] = 'Sleepy'
                    elif f['mouth_ratio'] > TALKING_MOUTH_RATIO_THRESH:
                        f['label'] = 'Talking'
                    elif similarity < GAZE_SIMILARITY_THRESH:
                        f['label'] = 'Distracted'
                    else:
                        f['label'] = 'Engaged'
            else:
                f['engagement_percent'] = None
                f['label'] = 'Turned'
    else:
        for f in face_data:
            f['engagement_percent'] = None
            f['label'] = 'Turned'

    for body in person_boxes:
        x1, y1, x2, y2, _, _ = map(int, body[:6])
        body_center = ((x1 + x2) // 2, (y1 + y2) // 2)

        closest_face = None
        min_dist = float('inf')
        for face in face_data:
            fx, fy = face['center']
            dist = np.linalg.norm(np.array([fx, fy]) - np.array(body_center))
            if dist < min_dist and dist < 150:
                min_dist = dist
                closest_face = face

        cv2.rectangle(image, (x1, y1), (x2, y2), (100, 200, 100), 2)

        if closest_face:
            label = closest_face['label']
            percent = closest_face.get('engagement_percent', None)
            text = f"{label} {percent}%" if percent is not None else label

            if label.lower() == 'engaged':
                color = (0, 255, 255)  # Yellow
            elif label.lower() == 'distracted':
                color = (0, 0, 255)    # Red
            else:
                color = (100, 200, 100)

            cv2.putText(image, text, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    # ✅ Save labeled image for this file
    cv2.imwrite(os.path.join(output_folder, filename), image)

print("✅ All done. Check output folder for labeled images.")



0: 480x640 12 persons, 987.8ms
Speed: 29.6ms preprocess, 987.8ms inference, 10.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 19 faces, 971.9ms
Speed: 16.6ms preprocess, 971.9ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 6 persons, 263.2ms
Speed: 7.9ms preprocess, 263.2ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 7 faces, 592.5ms
Speed: 10.8ms preprocess, 592.5ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 13 persons, 266.5ms
Speed: 8.1ms preprocess, 266.5ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 21 faces, 543.6ms
Speed: 9.4ms preprocess, 543.6ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 6 persons, 259.7ms
Speed: 7.9ms preprocess, 259.7ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 15 faces, 592.2ms
Speed: 9.3ms preprocess, 592.2ms inference, 2.5ms postprocess per

Final version deadend

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

input_folder = '/content/drive/MyDrive/Saif/img'

output_folder = '/content/drive/MyDrive/Saif/percenttt'

os.makedirs(output_folder, exist_ok=True)

face_model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt')
body_model = YOLO('yolov8n-pose.pt')  # body detection model


mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=30,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def unit_vector(v):
    norm = np.linalg.norm(v)
    if norm < 1e-6:
        return None
    return v / norm

def cosine_similarity(v1, v2):
    dot = np.dot(v1, v2)
    norm_prod = np.linalg.norm(v1) * np.linalg.norm(v2)
    if norm_prod < 1e-6:
        return 0.0
    return dot / norm_prod

for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]

    # Detect bodies
    body_results = body_model(image)[0]
    person_boxes = [b for b in body_results.boxes.data.cpu().numpy() if int(b[5]) == 0]

    # Detect faces
    face_results = face_model(image)[0]
    boxes = face_results.boxes.xyxy.cpu().numpy().astype(int)

    face_data = []

    for (x1, y1, x2, y2) in boxes:
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        if not mp_results.multi_face_landmarks:
            face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
            # If no face landmarks detected, mark as Turned
            face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                points_ids = {
                    'nose_tip': 1,
                    'chin': 152,
                    'left_eye_outer': 33,
                    'right_eye_outer': 263,
                    'left_forehead': 10,
                    'mouth_left': 61,
                    'mouth_right': 291
                }

                pts = {
                    name: (landmark_to_point(landmarks.landmark[idx], face_crop.shape)[0] + x1m,
                           landmark_to_point(landmarks.landmark[idx], face_crop.shape)[1] + y1m)
                    for name, idx in points_ids.items()
                }

                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                if norm < 1e-6:
                    # Unable to calculate gaze vector - consider engaged by default
                    gaze_vec = None
                else:
                    gaze_vec = avg_dir / norm

                nose_extended = (p_nose + gaze_vec * 190).astype(int) if gaze_vec is not None else p_nose

                line_colors = {
                    'left_forehead': (0, 255, 255),
                    'chin': (255, 0, 0),
                    'left_eye_outer': (0, 0, 255),
                    'right_eye_outer': (255, 255, 0),
                    'mouth_left': (255, 0, 255),
                    'mouth_right': (0, 165, 255)
                }
                for key, color in line_colors.items():
                    if gaze_vec is not None:
                        cv2.line(image, pts[key], tuple(nose_extended), color, 2)
                for pt in pts.values():
                    cv2.circle(image, pt, 3, (0, 0, 255), -1)
                if gaze_vec is not None:
                    cv2.circle(image, tuple(nose_extended), 4, (0, 0, 255), -1)

                face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
                face_data.append({'center': face_center, 'label': 'Engaged', 'gaze_vector': gaze_vec, 'engagement_percent': None})

            except Exception as e:
                print(f"⚠️ Error in {filename}: {e}")
                continue

    # Calculate consensus gaze vector using average of non-None gaze vectors
    valid_gaze_vectors = [f['gaze_vector'] for f in face_data if f['gaze_vector'] is not None]

    if valid_gaze_vectors:
        # Calculate mean gaze vector
        mean_gaze = np.mean(valid_gaze_vectors, axis=0)
        mean_gaze /= np.linalg.norm(mean_gaze)

        # Compute cosine similarity of each gaze to mean gaze
        for f in face_data:
            gv = f['gaze_vector']
            if gv is not None:
                similarity = cosine_similarity(gv, mean_gaze)
                percent = int(similarity * 100)
                f['engagement_percent'] = percent
                # Threshold for engaged vs distracted
                if similarity < 0.5:
                    f['label'] = 'Distracted'
                else:
                    f['label'] = 'Engaged'
            else:
                f['engagement_percent'] = None
    else:
        # No valid gaze vectors: mark all as None
        for f in face_data:
            f['engagement_percent'] = None

    # Draw body boxes and labels (only if label != 'Unknown')
    for body in person_boxes:
        x1, y1, x2, y2, _, _ = map(int, body[:6])
        body_center = ((x1 + x2) // 2, (y1 + y2) // 2)

        closest_face = None
        min_dist = float('inf')
        for face in face_data:
            fx, fy = face['center']
            dist = np.linalg.norm(np.array([fx, fy]) - np.array(body_center))
            if dist < min_dist and dist < 150:
                min_dist = dist
                closest_face = face

        cv2.rectangle(image, (x1, y1), (x2, y2), (100, 200, 100), 2)

        if closest_face is not None:
            label = closest_face['label']
            percent = closest_face.get('engagement_percent', None)
            if percent is not None:
                text = f"{label}: {percent}%"
            else:
                text = f"{label}: ."

            if label != "Unknown":
                cv2.putText(image, text, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (100, 200, 100), 2)

    cv2.imwrite(os.path.join(output_folder, filename), image)

print("✅ Done. Improved gaze detection with cosine similarity, thresholding, and engagement percentages.")



0: 480x640 12 persons, 253.0ms
Speed: 12.0ms preprocess, 253.0ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 19 faces, 512.6ms
Speed: 5.4ms preprocess, 512.6ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 13 persons, 200.2ms
Speed: 5.8ms preprocess, 200.2ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 14 faces, 471.1ms
Speed: 5.4ms preprocess, 471.1ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 6 persons, 203.7ms
Speed: 5.5ms preprocess, 203.7ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 7 faces, 478.3ms
Speed: 5.9ms preprocess, 478.3ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 13 persons, 198.4ms
Speed: 6.3ms preprocess, 198.4ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 21 faces, 500.1ms
Speed: 4.9ms preprocess, 500.1ms inference, 1.3ms postprocess per i

This version with 5 class to label dataset 23 july morning

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import os

input_folder = '/content/drive/MyDrive/Dataset/images/val'
output_folder = '/content/drive/MyDrive/Dataset/labels/val'
os.makedirs(output_folder, exist_ok=True)

face_model = YOLO('/content/drive/MyDrive/Saif/yolov8s-face-lindevs.pt', verbose=False)
body_model = YOLO('yolov8n-pose.pt', verbose=False)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=30,
    refine_landmarks=True,
    min_detection_confidence=0.5
)

def landmark_to_point(landmark, shape):
    h, w = shape[:2]
    return int(landmark.x * w), int(landmark.y * h)

def cosine_similarity(v1, v2):
    if v1 is None or v2 is None:
        return 0.0
    dot = np.dot(v1, v2)
    norm_prod = np.linalg.norm(v1) * np.linalg.norm(v2)
    return dot / norm_prod if norm_prod > 1e-6 else 0.0

MIN_FACE_HEIGHT_FOR_DETAILED = 70
MIN_FACE_WIDTH_FOR_DETAILED = 50
SLEEPY_EYE_RATIO_THRESH = 0.01
TALKING_MOUTH_RATIO_THRESH = 2.8
GAZE_SIMILARITY_THRESH = 0.4

for filename in os.listdir(input_folder):
    if not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue
    img_path = os.path.join(input_folder, filename)
    image = cv2.imread(img_path)
    if image is None:
        continue

    orig_h, orig_w = image.shape[:2]
    body_results = body_model(image)[0]
    person_boxes = [b for b in body_results.boxes.data.cpu().numpy() if int(b[5]) == 0]

    face_results = face_model(image)[0]
    boxes = face_results.boxes.xyxy.cpu().numpy().astype(int)
    face_data = []

    for (x1, y1, x2, y2) in boxes:
        margin = 20
        x1m = max(0, x1 - margin)
        y1m = max(0, y1 - margin)
        x2m = min(orig_w, x2 + margin)
        y2m = min(orig_h, y2 + margin)

        face_crop = image[y1m:y2m, x1m:x2m]
        face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        mp_results = face_mesh.process(face_rgb)

        face_center = ((x1 + x2) // 2, (y1 + y2) // 2)
        face_w, face_h = x2 - x1, y2 - y1

        if not mp_results.multi_face_landmarks:
            face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
            continue

        for landmarks in mp_results.multi_face_landmarks:
            try:
                points_ids = {
                    'nose_tip': 1, 'chin': 152,
                    'left_eye_outer': 33, 'right_eye_outer': 263,
                    'left_forehead': 10, 'mouth_left': 61,
                    'mouth_right': 291, 'left_eye_top': 159,
                    'left_eye_bottom': 145, 'right_eye_top': 386,
                    'right_eye_bottom': 374
                }

                pts = {
                    name: (landmark_to_point(landmarks.landmark[idx], face_crop.shape)[0] + x1m,
                           landmark_to_point(landmarks.landmark[idx], face_crop.shape)[1] + y1m)
                    for name, idx in points_ids.items()
                }

                p_nose = np.array(pts['nose_tip'])
                direction_vectors = [p_nose - np.array(pts[key]) for key in
                                     ['chin', 'left_eye_outer', 'right_eye_outer', 'mouth_left', 'mouth_right']]
                avg_dir = np.mean(direction_vectors, axis=0)
                norm = np.linalg.norm(avg_dir)
                gaze_vec = avg_dir / norm if norm > 1e-6 else None

                def eye_ratio_calc():
                    left_eye_h = np.linalg.norm(np.array(pts['left_eye_top']) - np.array(pts['left_eye_bottom']))
                    left_eye_w = np.linalg.norm(np.array(pts['left_eye_outer']) - np.array(pts['left_forehead']))
                    right_eye_h = np.linalg.norm(np.array(pts['right_eye_top']) - np.array(pts['right_eye_bottom']))
                    right_eye_w = np.linalg.norm(np.array(pts['right_eye_outer']) - np.array(pts['left_forehead']))
                    left_ratio = left_eye_h / left_eye_w if left_eye_w > 1e-6 else 0
                    right_ratio = right_eye_h / right_eye_w if right_eye_w > 1e-6 else 0
                    return (left_ratio + right_ratio) / 2

                eye_ratio = eye_ratio_calc()
                mouth_w = np.linalg.norm(np.array(pts['mouth_right']) - np.array(pts['mouth_left']))
                mouth_h = np.linalg.norm(np.array(pts['chin']) - np.array(pts['nose_tip']))
                mouth_ratio = mouth_h / mouth_w if mouth_w > 1e-6 else 0

                face_data.append({
                    'center': face_center, 'label': 'Unknown', 'gaze_vector': gaze_vec,
                    'engagement_percent': None,
                    'eye_ratio': eye_ratio, 'mouth_ratio': mouth_ratio,
                    'face_w': face_w, 'face_h': face_h
                })

            except:
                face_data.append({'center': face_center, 'label': 'Turned', 'gaze_vector': None, 'engagement_percent': None})
                continue

    valid_gaze_vectors = [f['gaze_vector'] for f in face_data if f['gaze_vector'] is not None]
    if valid_gaze_vectors:
        mean_gaze = np.mean(valid_gaze_vectors, axis=0)
        mean_gaze /= np.linalg.norm(mean_gaze)

        for f in face_data:
            gv = f['gaze_vector']
            if gv is not None:
                similarity = cosine_similarity(gv, mean_gaze)
                percent = int(similarity * 100)
                f['engagement_percent'] = percent

                if f['face_w'] < MIN_FACE_WIDTH_FOR_DETAILED or f['face_h'] < MIN_FACE_HEIGHT_FOR_DETAILED:
                    f['label'] = 'Engaged' if similarity >= GAZE_SIMILARITY_THRESH else 'Distracted'
                else:
                    if f['eye_ratio'] < SLEEPY_EYE_RATIO_THRESH:
                        f['label'] = 'Sleepy'
                    elif f['mouth_ratio'] > TALKING_MOUTH_RATIO_THRESH:
                        f['label'] = 'Talking'
                    elif similarity < GAZE_SIMILARITY_THRESH:
                        f['label'] = 'Distracted'
                    else:
                        f['label'] = 'Engaged'
            else:
                f['engagement_percent'] = None
                f['label'] = 'Turned'
    else:
        for f in face_data:
            f['engagement_percent'] = None
            f['label'] = 'Turned'

    # Save YOLO label file
    txt_filename = os.path.splitext(filename)[0] + ".txt"
    txt_path = os.path.join(output_folder, txt_filename)
    with open(txt_path, 'w') as f:
        for body in person_boxes:
            x1, y1, x2, y2, _, _ = map(int, body[:6])
            body_center = ((x1 + x2) // 2, (y1 + y2) // 2)

            closest_face = None
            min_dist = float('inf')
            for face in face_data:
                fx, fy = face['center']
                dist = np.linalg.norm(np.array([fx, fy]) - np.array(body_center))
                if dist < min_dist and dist < 150:
                    min_dist = dist
                    closest_face = face

            if closest_face:
                label = closest_face['label']
                class_id = {'Engaged': 0, 'Distracted': 1, 'Talking': 2, 'Sleepy': 3, 'Turned': 4}.get(label, 4)

                xc = (x1 + x2) / 2 / orig_w
                yc = (y1 + y2) / 2 / orig_h
                w = (x2 - x1) / orig_w
                h = (y2 - y1) / orig_h

                f.write(f"{class_id} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n")

print("✅ Label .txt files saved for all images in output folder.")



0: 384x640 11 persons, 351.5ms
Speed: 10.5ms preprocess, 351.5ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 faces, 767.8ms
Speed: 8.3ms preprocess, 767.8ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 311.8ms
Speed: 7.3ms preprocess, 311.8ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 faces, 721.3ms
Speed: 9.4ms preprocess, 721.3ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 232.0ms
Speed: 6.8ms preprocess, 232.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 faces, 500.3ms
Speed: 5.5ms preprocess, 500.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 persons, 215.0ms
Speed: 6.0ms preprocess, 215.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 faces, 507.8ms
Speed: 6.3ms preprocess, 507.8ms inference, 1.6ms postprocess per

Training model

In [None]:
!pip install ultralytics


In [None]:
from ultralytics import YOLO

# Load a base model (you can use yolov8n.pt for fast training)
model = YOLO('yolov8n.pt')

# Train
model.train(
    data='/content/drive/MyDrive/Dataset/data.yml',
    epochs=30,
    imgsz=640,
    batch=16
)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 103MB/s]


Ultralytics 8.3.169 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/Dataset/data.yml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=Tru

100%|██████████| 755k/755k [00:00<00:00, 22.1MB/s]

Overriding model.yaml nc=80 with nc=5

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics




 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    752287  ultralytics.nn.modules.head.Detect           [5, [64, 128, 256]]           
Model summary: 129 layers, 3,011,823 parameters, 3,011,807 gradients, 8.2 GFLOPs

Transferred 319/355 items from pretrained weights
Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|██████████| 5.35M/5.35M [00:00<00:00, 99.5MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.4±0.1 ms, read: 1.2±0.5 MB/s, size: 513.7 KB)


[34m[1mtrain: [0mScanning /content/drive/MyDrive/Dataset/labels/train.cache... 1500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1500/1500 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.4±0.1 ms, read: 1.1±0.3 MB/s, size: 514.9 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/Dataset/labels/val.cache... 400 images, 0 backgrounds, 0 corrupt: 100%|██████████| 400/400 [00:00<?, ?it/s]


Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      2.17G      1.048        2.1      1.053        171        640: 100%|██████████| 94/94 [09:14<00:00,  5.90s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:08<00:00,  1.55it/s]

                   all        400       2889      0.791      0.155       0.27      0.203






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      2.28G     0.9042       1.26     0.9977        147        640: 100%|██████████| 94/94 [00:42<00:00,  2.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.74it/s]

                   all        400       2889      0.616      0.403      0.391      0.307






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30       2.3G     0.8733      1.168     0.9976        125        640: 100%|██████████| 94/94 [00:43<00:00,  2.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.27it/s]

                   all        400       2889      0.634      0.462      0.404      0.316






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30       2.3G     0.8494      1.095      0.988        148        640: 100%|██████████| 94/94 [00:45<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.71it/s]


                   all        400       2889      0.634      0.463      0.432       0.34

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30       2.3G     0.8292      1.037     0.9802        197        640: 100%|██████████| 94/94 [00:43<00:00,  2.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.23it/s]


                   all        400       2889      0.637      0.501      0.434      0.348

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30       2.3G     0.8028     0.9692     0.9716        132        640: 100%|██████████| 94/94 [00:43<00:00,  2.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.73it/s]

                   all        400       2889      0.605      0.516      0.433      0.344






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30       2.3G     0.7869     0.9549     0.9639        130        640: 100%|██████████| 94/94 [00:43<00:00,  2.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:08<00:00,  1.60it/s]

                   all        400       2889      0.621      0.499      0.443      0.355






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30       2.3G     0.7893     0.9293     0.9665        197        640: 100%|██████████| 94/94 [00:43<00:00,  2.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:06<00:00,  2.11it/s]

                   all        400       2889      0.632      0.484      0.465      0.376






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30      2.31G      0.768     0.9078     0.9582        130        640: 100%|██████████| 94/94 [00:44<00:00,  2.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:06<00:00,  2.10it/s]

                   all        400       2889      0.646      0.515      0.478       0.39






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30      2.31G     0.7606     0.9018     0.9563        156        640: 100%|██████████| 94/94 [00:43<00:00,  2.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.22it/s]


                   all        400       2889      0.626      0.507      0.455      0.375

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      2.31G     0.7466     0.8742     0.9508        137        640: 100%|██████████| 94/94 [00:43<00:00,  2.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.75it/s]


                   all        400       2889      0.648      0.487      0.468      0.384

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      2.31G     0.7466     0.8643     0.9546        184        640: 100%|██████████| 94/94 [00:44<00:00,  2.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.24it/s]

                   all        400       2889      0.655      0.487      0.489      0.403






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30      2.31G     0.7343     0.8337     0.9508        164        640: 100%|██████████| 94/94 [00:44<00:00,  2.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:06<00:00,  2.06it/s]

                   all        400       2889      0.642       0.52      0.526      0.436






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      2.31G     0.7241     0.8262     0.9447        167        640: 100%|██████████| 94/94 [00:43<00:00,  2.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.81it/s]

                   all        400       2889      0.659      0.513      0.524      0.429






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      2.31G     0.7191     0.8197     0.9441        151        640: 100%|██████████| 94/94 [00:42<00:00,  2.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.22it/s]


                   all        400       2889      0.687      0.516      0.511      0.423

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      2.31G     0.7023      0.794      0.936        155        640: 100%|██████████| 94/94 [00:43<00:00,  2.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.78it/s]

                   all        400       2889      0.678      0.493      0.496      0.414






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      2.31G     0.6957     0.7952     0.9335        196        640: 100%|██████████| 94/94 [00:43<00:00,  2.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.26it/s]

                   all        400       2889      0.699       0.49      0.517      0.432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      2.31G     0.6954       0.79      0.934        165        640: 100%|██████████| 94/94 [00:43<00:00,  2.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.82it/s]

                   all        400       2889      0.365      0.602      0.508      0.422






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      2.31G     0.6844     0.7734      0.931         93        640: 100%|██████████| 94/94 [00:44<00:00,  2.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.78it/s]

                   all        400       2889       0.48      0.522       0.52      0.437






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      2.32G     0.6848     0.7822     0.9293        188        640: 100%|██████████| 94/94 [00:42<00:00,  2.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.32it/s]

                   all        400       2889      0.556      0.578      0.541      0.454





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      2.33G     0.6519     0.7434     0.9163        102        640: 100%|██████████| 94/94 [00:43<00:00,  2.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.75it/s]

                   all        400       2889      0.689      0.535       0.53      0.443






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      2.33G     0.6385     0.7368     0.9119         94        640: 100%|██████████| 94/94 [00:42<00:00,  2.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.25it/s]


                   all        400       2889      0.717      0.518      0.547      0.463

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      2.35G     0.6356     0.7126     0.9132        100        640: 100%|██████████| 94/94 [00:41<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.85it/s]


                   all        400       2889      0.527      0.619       0.58       0.49

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30      2.35G     0.6274     0.7013     0.9042         99        640: 100%|██████████| 94/94 [00:41<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.40it/s]

                   all        400       2889      0.502      0.699      0.596      0.508






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      2.35G     0.6178     0.6915      0.906         81        640: 100%|██████████| 94/94 [00:41<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.81it/s]

                   all        400       2889      0.681      0.642      0.624      0.528






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30      2.35G     0.6112     0.6836     0.8998         82        640: 100%|██████████| 94/94 [00:42<00:00,  2.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.41it/s]

                   all        400       2889      0.643      0.558      0.595      0.507






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      2.35G     0.6009     0.6724     0.9003         60        640: 100%|██████████| 94/94 [00:41<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.79it/s]

                   all        400       2889      0.604      0.576      0.588      0.504






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30      2.37G     0.5972     0.6659     0.8974         92        640: 100%|██████████| 94/94 [00:41<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.30it/s]

                   all        400       2889       0.69      0.548      0.584      0.504






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30      2.37G     0.5933     0.6591     0.8935         99        640: 100%|██████████| 94/94 [00:41<00:00,  2.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:07<00:00,  1.68it/s]


                   all        400       2889      0.578      0.559      0.589      0.506

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      2.38G     0.5894     0.6464     0.8904         87        640: 100%|██████████| 94/94 [00:42<00:00,  2.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:05<00:00,  2.43it/s]

                   all        400       2889      0.606      0.564      0.596      0.514






30 epochs completed in 0.562 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.169 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,006,623 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:08<00:00,  1.50it/s]


                   all        400       2889      0.681      0.641      0.624      0.528
               Engaged        389       2182      0.763      0.904      0.912      0.789
            Distracted        115        216      0.412      0.585      0.492      0.422
               Talking         11         14          1      0.344      0.424      0.357
                Turned        259        477      0.548       0.73      0.669      0.543
Speed: 0.2ms preprocess, 1.8ms inference, 0.0ms loss, 4.1ms postprocess per image
Results saved to [1mruns/detect/train[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 4])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7ef0878bb710>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0

In [None]:
from ultralytics import YOLO

# Load the trained model
model = YOLO('/content/runs/detect/train/weights/best.pt')  # Update path if needed

# Run inference on an image or folder
#results = model('/content/drive/MyDrive/Saif/test/1590.jpg', save=True)  # Single image

# Or on a folder
results = model('/content/drive/MyDrive/Saif/test', save=True)



image 1/142 /content/drive/MyDrive/Saif/test/1003.jpg: 384x640 11 Engageds, 1 Turned, 8.2ms
image 2/142 /content/drive/MyDrive/Saif/test/1031.jpg: 384x640 11 Engageds, 2 Turneds, 7.5ms
image 3/142 /content/drive/MyDrive/Saif/test/1039.jpg: 384x640 11 Engageds, 2 Turneds, 7.5ms
image 4/142 /content/drive/MyDrive/Saif/test/1056.jpg: 384x640 11 Engageds, 1 Turned, 6.7ms
image 5/142 /content/drive/MyDrive/Saif/test/1094.jpg: 384x640 4 Engageds, 1 Turned, 27.0ms
image 6/142 /content/drive/MyDrive/Saif/test/1097.jpg: 384x640 6 Engageds, 1 Turned, 7.0ms
image 7/142 /content/drive/MyDrive/Saif/test/1104.jpg: 384x640 5 Engageds, 3 Turneds, 6.7ms
image 8/142 /content/drive/MyDrive/Saif/test/1108.jpg: 384x640 4 Engageds, 2 Turneds, 7.8ms
image 9/142 /content/drive/MyDrive/Saif/test/1131.jpg: 384x640 5 Engageds, 1 Distracted, 2 Turneds, 6.7ms
image 10/142 /content/drive/MyDrive/Saif/test/1136.jpg: 384x640 5 Engageds, 2 Turneds, 7.0ms
image 11/142 /content/drive/MyDrive/Saif/test/1157.jpg: 384x640

In [None]:
import shutil
from google.colab import files

# Zip the folder (e.g., runs/detect/train)
shutil.make_archive('detect_res', 'zip', '/content/runs/detect/predict')  # Change the path if needed

# Download the zipped folder
files.download('detect_res.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>