In [None]:
from tqdm import tqdm
import face_recognition
import cv2, os
from tqdm import tqdm
import numpy as np

In [None]:
dataset_path=r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\P2E_S5\P2E_S5_C1\P2E_S5_C1.1"
output_dir =r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\extracted_faces"

os.makedirs(output_dir, exist_ok=True)

# Extracting Faces

In [None]:
empty_log = os.path.join(output_dir, "empty_images_c1.txt")
empty_images = []
haar_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

for img_name in tqdm(os.listdir(dataset_path), desc="Extracting Faces"):
    if not img_name.lower().endswith(('.jpg', '.png', '.jpeg')):
        continue

    img_path = os.path.join(dataset_path, img_name)
    image = cv2.imread(img_path)
    if image is None:
        empty_images.append(img_name)
        continue

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)
    rgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)

    face_locations = face_recognition.face_locations(rgb, model="hog")

    if not face_locations:
        faces_haar = haar_cascade.detectMultiScale(gray, 1.1, 3, minSize=(20, 20))
        face_locations = [(y, x+w, y+h, x) for (x, y, w, h) in faces_haar]

    if not face_locations:
        empty_images.append(img_name)
        continue

    for i, (top, right, bottom, left) in enumerate(face_locations):
        face = image[top:bottom, left:right]
        cv2.imwrite(os.path.join(output_dir, f"{os.path.splitext(img_name)[0]}_f{i+1}.jpg"), face)

with open(empty_log, 'w') as f:
    f.write('\n'.join(empty_images))

print(f"✅ Done! Extracted faces saved in: {output_dir}")
print(f"❌ No-face images: {len(empty_images)} logged in {empty_log}")


## Group By Person

In [None]:
import os
import numpy as np
import face_recognition
from sklearn.cluster import DBSCAN
from tqdm import tqdm
import shutil

faces_path = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\extracted_faces"
output_path = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\clustered_faces"
employee_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\employee_images"  
os.makedirs(output_path, exist_ok=True)
os.makedirs(employee_dir, exist_ok=True)



In [None]:
from sklearn.cluster import DBSCAN
import numpy as np
import os, face_recognition, shutil
from tqdm import tqdm

faces_path = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\extracted_faces"
output_path = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\clustered_faces"
employee_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\employee_images"

os.makedirs(output_path, exist_ok=True)
os.makedirs(employee_dir, exist_ok=True)

encodings, filenames = [], []

# ---- Step 1: Encode ----
for img_name in tqdm(os.listdir(faces_path), desc="Encoding faces"):
    if not img_name.lower().endswith(('.jpg', '.png')): continue
    path = os.path.join(faces_path, img_name)
    img = face_recognition.load_image_file(path)
    enc = face_recognition.face_encodings(img)
    if enc:
        encodings.append(enc[0])
        filenames.append(path)

encodings = np.array(encodings)
print(f"✅ Encoded {len(encodings)} faces")

# ---- Step 2: Try smaller eps ----
# You can experiment with 0.4, 0.35, or 0.3 depending on data diversity
clt = DBSCAN(eps=0.38, min_samples=3, metric="euclidean").fit(encodings)
labels = clt.labels_

unique_labels = sorted(set(labels) - {-1})
print(f"🧠 Found {len(unique_labels)} distinct people")
print(f"🗑️ Noise faces: {(labels == -1).sum()}")

# ---- Step 3: Save ----
for label in unique_labels:
    cluster_dir = os.path.join(output_path, f"person_{label}")
    os.makedirs(cluster_dir, exist_ok=True)
    cluster_files = [f for f, l in zip(filenames, labels) if l == label]
    
    # copy only 2-3 samples to keep small
    for f in cluster_files[:3]:
        shutil.copy(f, cluster_dir)

    # representative image
    shutil.copy(cluster_files[0], os.path.join(employee_dir, f"Employee_{label+1}.jpg"))

print("✅ Done clustering!")


In [None]:
employee_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\employee_images"
employee_files = [f for f in os.listdir(employee_dir) if f.lower().endswith(('.jpg', '.png'))]
print(f"Found {len(employee_files)} employee images:")
print(employee_files)

# Checking Camera 3

In [None]:
from datetime import datetime

# Load employee faces
employee_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\employee_images"
if not os.path.exists(employee_dir):
    os.makedirs(employee_dir)
    print(f"Created {employee_dir} - Add face images here!")
    exit()
    
images = []
classNames = []
for img_name in os.listdir(employee_dir):
    if img_name.lower().endswith(('.jpg', '.png')):
        img = cv2.imread(os.path.join(employee_dir, img_name))
        if img is None:
            print(f"Failed to load {img_name}")
            continue
        images.append(img)
        classNames.append(os.path.splitext(img_name)[0])
        
def findEncodings(images):
    encodeList = []
    for img_name, img in zip(os.listdir(employee_dir), images):
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        encodes = face_recognition.face_encodings(img_rgb)
        if encodes:
            encodeList.append(encodes[0])
        else:
            print(f"No encoding for {img_name}")
    return encodeList

encoded_face_train = findEncodings(images)
print(f"Encoded {len(encoded_face_train)} known faces.")

# YOLO

### for the full body

In [None]:
# from datetime import datetime
# import cv2, os, numpy as np, face_recognition, torch
# from ultralytics import YOLO
# from tqdm import tqdm
# import time

# # ---------------- YOLO Model ----------------
# model_path = "yolov8n-face.pt" if os.path.exists("yolov8n-face.pt") else "yolov8n.pt"
# model = YOLO(model_path)
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)
# print(f"✅ Using {device.upper()} for inference.")

# # ---------------- Load Known Faces ----------------
# employee_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\employee_images"
# images, classNames = [], []
# for img_name in os.listdir(employee_dir):
#     if img_name.lower().endswith(('.jpg', '.png')):
#         path = os.path.join(employee_dir, img_name)
#         img = cv2.imread(path)
#         if img is not None:
#             images.append(img)
#             classNames.append(os.path.splitext(img_name)[0])

# def findEncodings(imgs):
#     encs = []
#     for img in imgs:
#         rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#         e = face_recognition.face_encodings(rgb)
#         if e:
#             encs.append(e[0])
#     return encs

# encoded_face_train = findEncodings(images)
# print(f"✅ Encoded {len(encoded_face_train)} known faces.")

# # ---------------- Attendance ----------------
# def markAttendance(name):
#     with open('Attendance_C3.csv', 'a') as f:
#         now = datetime.now()
#         f.write(f'\n{name},{now.strftime("%I:%M:%S %p")},{now.strftime("%d-%B-%Y")}')

# # ---------------- Paths ----------------
# image_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\P2E_S5\P2E_S5_C3\P2E_S5_C3.1"
# boxed_dir = os.path.join(image_dir, "boxed_output_fast")
# os.makedirs(boxed_dir, exist_ok=True)

# processed_faces = set()
# image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.png'))])

# # ---------------- Process Frames ----------------
# frame_skip = 10
# t0 = time.time()

# for i, img_file in enumerate(tqdm(image_files, desc="Processing Fast C3 Frames")):
#     if i % frame_skip != 0:
#         continue  # Skip frames for speed

#     path = os.path.join(image_dir, img_file)
#     img = cv2.imread(path)
#     if img is None:
#         continue

#     # ✅ YOLO detection (low resolution for speed)
#     results = model.predict(img, imgsz=256, conf=0.6, device=device, verbose=False)
#     r = results[0]
#     boxes = r.boxes.xyxy.cpu().numpy().astype(int)

#     for (x1, y1, x2, y2) in boxes:
#         y1, y2 = max(0, y1+8), min(img.shape[0], y2-8)
#         face_crop = img[y1:y2, x1:x2]
#         if face_crop.size == 0:
#             continue

#         rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
#         encs = face_recognition.face_encodings(rgb)
#         if not encs:
#             continue

#         encode = encs[0]
#         dists = face_recognition.face_distance(encoded_face_train, encode)
#         idx = np.argmin(dists) if len(dists) > 0 else -1

#         if idx != -1 and dists[idx] < 0.55:
#             name = classNames[idx].upper()
#             color = (0, 255, 0)
#             if name not in processed_faces:
#                 markAttendance(name)
#                 processed_faces.add(name)
#                 print(f"✅ {name} recognized in {img_file}")
#         else:
#             name = "UNKNOWN"
#             color = (0, 0, 255)

#         cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
#         cv2.putText(img, name, (x1+6, y2-10),
#                     cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)

#     # ✅ Smaller display to speed up
#     disp = cv2.resize(img, (480, 360))
#     cv2.imshow("YOLOv8n-Face (Ultra-Fast)", disp)
#     cv2.imwrite(os.path.join(boxed_dir, f"boxed_{img_file}"), img)

#     # ✅ Non-blocking & quick refresh
#     if cv2.waitKey(1) == ord('q'):
#         break

# cv2.destroyAllWindows()
# fps = len(image_files) / (time.time() - t0)
# print(f"⚡ Avg FPS: {fps:.2f}")
# print("✅ Done! Fast output saved to:", boxed_dir)


✅ Using CPU for inference.
✅ Encoded 28 known faces.


Processing Fast C3 Frames:  16%|█▋        | 131/806 [00:01<00:03, 171.63it/s]

✅ EMPLOYEE_2 recognized in 00000176.jpg


Processing Fast C3 Frames:  22%|██▏       | 178/806 [00:03<00:22, 28.51it/s] 

✅ EMPLOYEE_1 recognized in 00000206.jpg


Processing Fast C3 Frames:  24%|██▍       | 192/806 [00:04<00:30, 20.08it/s]

✅ EMPLOYEE_18 recognized in 00000236.jpg


Processing Fast C3 Frames:  25%|██▍       | 200/806 [00:06<00:19, 31.71it/s]

✅ EMPLOYEE_20 recognized in 00000236.jpg
⚡ Avg FPS: 127.38
✅ Done! Fast output saved to: C:\Users\noura\OneDrive\Documents\vid_atten_proj\P2E_S5\P2E_S5_C3\P2E_S5_C3.1\boxed_output_fast





### for the faces only

In [11]:
from datetime import datetime
import cv2, os, numpy as np, face_recognition, torch, time
from ultralytics import YOLO
from tqdm import tqdm

# ---------------- YOLO Models ----------------
face_model_path = "yolov8n-face-lindevs.pt"
fallback_model_path = "yolov8n.pt"

face_model = YOLO(face_model_path)
fallback_model = YOLO(fallback_model_path)


# ---------------- Load Known Faces ----------------
employee_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\employee_images"
images, classNames = [], []

for img_name in os.listdir(employee_dir):
    if img_name.lower().endswith(('.jpg', '.png')):
        path = os.path.join(employee_dir, img_name)
        img = cv2.imread(path)
        if img is not None:
            images.append(img)
            classNames.append(os.path.splitext(img_name)[0])

def findEncodings(imgs):
    encs = []
    for img in imgs:
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        e = face_recognition.face_encodings(rgb)
        if e:
            encs.append(e[0])
    return encs

encoded_face_train = findEncodings(images)
print(f"✅ Encoded {len(encoded_face_train)} known faces.")

# ---------------- Attendance ----------------
def markAttendance(name):
    with open('Attendance_C3.csv', 'a') as f:
        now = datetime.now()
        f.write(f'\n{name},{now.strftime("%I:%M:%S %p")},{now.strftime("%d-%B-%Y")}')

# ---------------- Paths ----------------
image_dir = r"C:\Users\noura\OneDrive\Documents\vid_atten_proj\P2E_S5\P2E_S5_C3\P2E_S5_C3.1"
boxed_dir = os.path.join(image_dir, "boxed_output_fast")
os.makedirs(boxed_dir, exist_ok=True)

processed_faces = set()
image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.png'))])

# ---------------- Process Frames ----------------
frame_skip = 8  
t0 = time.time()

for i, img_file in enumerate(tqdm(image_files, desc="Processing C3 Frames (Hybrid YOLO)")):
    if i % frame_skip != 0:
        continue  # Skip frames for speed

    path = os.path.join(image_dir, img_file)
    img = cv2.imread(path)
    if img is None:
        continue

    # ✅ YOLO-FACE detection (higher resolution + lower conf for more recall)
    results = face_model.predict(img, imgsz=640, conf=0.8, verbose=False)
    r = results[0]
    boxes = r.boxes.xyxy.cpu().numpy().astype(int)

    # ✅ Fallback to normal YOLO if YOLO-Face detects nothing
    if len(boxes) == 0:
        results = fallback_model.predict(img, imgsz=640, conf=0.8, verbose=False)
        r = results[0]
        boxes = r.boxes.xyxy.cpu().numpy().astype(int)

    for (x1, y1, x2, y2) in boxes:
        # ✅ Make box slightly larger for more accurate crops
        h, w = img.shape[:2]
        pad = 20  # enlarge each side by 20px
        x1 = max(0, x1 - pad)
        y1 = max(0, y1 - pad)
        x2 = min(w, x2 + pad)
        y2 = min(h, y2 + pad)

        face_crop = img[y1:y2, x1:x2]
        if face_crop.size == 0:
            continue

        rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
        encs = face_recognition.face_encodings(rgb)
        if not encs:
            continue

        encode = encs[0]
        dists = face_recognition.face_distance(encoded_face_train, encode)
        idx = np.argmin(dists) if len(dists) > 0 else -1

        if idx != -1 and dists[idx] < 0.55:
            name = classNames[idx].upper()
            color = (0, 255, 0)
            if name not in processed_faces:
                markAttendance(name)
                processed_faces.add(name)
                print(f"✅ {name} recognized in {img_file}")
        else:
            name = "UNKNOWN"
            color = (0, 0, 255)

        # ✅ Bigger and thicker bounding boxes
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 4)  # thickness=4
        cv2.putText(img, name, (x1 + 10, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2)  # larger font

    # ✅ Display smaller window for speed
    disp = cv2.resize(img, (480, 360))
    cv2.imshow("Hybrid YOLOv8 Face Attendance", disp)
    cv2.imwrite(os.path.join(boxed_dir, f"boxed_{img_file}"), img)

    if cv2.waitKey(1) == ord('q'):
        break

cv2.destroyAllWindows()
fps = len(image_files) / (time.time() - t0)
print(f"⚡ Avg FPS: {fps:.2f}")
print("✅ Done! Output saved to:", boxed_dir)


✅ Encoded 28 known faces.


Processing C3 Frames (Hybrid YOLO):  17%|█▋        | 137/806 [00:04<00:26, 25.56it/s]

✅ EMPLOYEE_2 recognized in 00000172.jpg


Processing C3 Frames (Hybrid YOLO):  21%|██        | 169/806 [00:07<00:55, 11.58it/s]

✅ EMPLOYEE_19 recognized in 00000204.jpg


Processing C3 Frames (Hybrid YOLO):  22%|██▏       | 177/806 [00:08<00:59, 10.57it/s]

✅ EMPLOYEE_1 recognized in 00000228.jpg
✅ EMPLOYEE_6 recognized in 00000228.jpg


Processing C3 Frames (Hybrid YOLO):  25%|██▍       | 201/806 [00:10<00:52, 11.50it/s]

✅ EMPLOYEE_18 recognized in 00000236.jpg


Processing C3 Frames (Hybrid YOLO):  27%|██▋       | 217/806 [00:12<01:05,  9.00it/s]

✅ EMPLOYEE_10 recognized in 00000260.jpg


Processing C3 Frames (Hybrid YOLO):  29%|██▉       | 233/806 [00:14<01:11,  8.07it/s]

✅ EMPLOYEE_4 recognized in 00000276.jpg


Processing C3 Frames (Hybrid YOLO):  30%|██▉       | 241/806 [00:16<01:16,  7.36it/s]

✅ EMPLOYEE_3 recognized in 00000284.jpg


Processing C3 Frames (Hybrid YOLO):  31%|███       | 249/806 [00:17<01:20,  6.89it/s]

✅ EMPLOYEE_5 recognized in 00000292.jpg


Processing C3 Frames (Hybrid YOLO):  33%|███▎      | 265/806 [00:20<01:21,  6.60it/s]

✅ EMPLOYEE_12 recognized in 00000308.jpg


Processing C3 Frames (Hybrid YOLO):  35%|███▍      | 281/806 [00:21<01:06,  7.84it/s]

✅ EMPLOYEE_7 recognized in 00000316.jpg


Processing C3 Frames (Hybrid YOLO):  36%|███▌      | 289/806 [00:22<01:03,  8.12it/s]

✅ EMPLOYEE_9 recognized in 00000332.jpg
✅ EMPLOYEE_14 recognized in 00000332.jpg


Processing C3 Frames (Hybrid YOLO):  38%|███▊      | 305/806 [00:26<01:25,  5.87it/s]

✅ EMPLOYEE_27 recognized in 00000340.jpg


Processing C3 Frames (Hybrid YOLO):  39%|███▊      | 312/806 [00:28<00:44, 11.14it/s]

⚡ Avg FPS: 28.74
✅ Done! Output saved to: C:\Users\noura\OneDrive\Documents\vid_atten_proj\P2E_S5\P2E_S5_C3\P2E_S5_C3.1\boxed_output_fast



