In [2]:
!pip install ultralytics opencv-python tensorflow numpy

Collecting ultralytics
  Downloading ultralytics-8.3.118-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [62]:
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
from ultralytics import YOLO
import cv2
import numpy as np
import joblib


In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [64]:
cnn_model = load_model('/content/drive/MyDrive/FaceTrackAI/weights/cnn_face_classifier_EPOCHS_5.keras')
yolo_model = YOLO('/content/drive/MyDrive/FaceTrackAI/weights/yolov8n_face_best.pt')

rf_model = joblib.load('/content/drive/MyDrive/FaceTrackAI/weights/rf_model.joblib')
scaler = joblib.load('/content/drive/MyDrive/FaceTrackAI/weights/scaler.joblib')  # якщо треба масштабування!


In [65]:
cap = cv2.VideoCapture('/content/drive/MyDrive/FaceTrackAI/data/video/song_10s.mp4')

In [66]:
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Video FPS: {fps}")

Video FPS: 24.048459093776316


In [67]:
IMG_SIZE = (128, 128)
CONFIDENCE_THRESHOLD = 0.5

In [68]:
def detect_faces_cnn(frame):
    height, width = frame.shape[:2]
    step_size = 32  # або 32 для кращої точності (можна зменшити)
    detected_faces = []
    IMG_SIZE = (64, 64)

    for y in range(0, height - IMG_SIZE[1], step_size):
        for x in range(0, width - IMG_SIZE[0], step_size):
            patch = frame[y:y+IMG_SIZE[1], x:x+IMG_SIZE[0]]
            if patch.shape[0] != IMG_SIZE[1] or patch.shape[1] != IMG_SIZE[0]:
                continue
            patch_norm = patch / 255.0  # нормалізація в 0-1
            patch_input = np.expand_dims(patch_norm, axis=0)  # форма (1, 64, 64, 3)
            prediction = cnn_model.predict(patch_input, verbose=0)
            if prediction[0][1] > 0.8:  # якщо це обличчя (друга нейронка класифікації)
                detected_faces.append((x, y, IMG_SIZE[0], IMG_SIZE[1]))
    return detected_faces

In [69]:
def predict_face_rf(face_img):
    # face_img — це кадр обличчя (обрізаний за bounding box)

    # 1. Зміна розміру на те, що очікує Random Forest (наприклад 64x64 або 48x48)
    resized_face = cv2.resize(face_img, (64, 64))

    # 2. Перетворення в вектор
    flat_face = resized_face.flatten().reshape(1, -1)

    # 3. Масштабування
    flat_face_scaled = scaler.transform(flat_face)

    # 4. Прогнозування
    prediction = rf_model.predict(flat_face_scaled)

    return prediction[0]  # 0 або 1, або інший клас

In [70]:
def detect_faces_yolo(frame):
    results = yolo_model(frame)[0]
    faces = []
    for box in results.boxes.xyxy:
        x1, y1, x2, y2 = box[:4]
        x, y, w, h = int(x1), int(y1), int(x2 - x1), int(y2 - y1)
        faces.append((x, y, w, h))
    return faces

In [71]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = None

In [72]:
frame_idx = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    if out is None:
        out = cv2.VideoWriter('/content/drive/MyDrive/FaceTrackAI/data/video/output.avi', fourcc, 20.0, (frame.shape[1], frame.shape[0]))

    # ==== CNN: класифікація всього кадру ====
    frame_resized_cnn = cv2.resize(frame, (64, 64))
    frame_normalized_cnn = frame_resized_cnn / 255.0
    frame_input_cnn = np.expand_dims(frame_normalized_cnn, axis=0)  # (1, 64, 64, 3)

    cnn_prediction = cnn_model.predict(frame_input_cnn, verbose=0)
    cnn_label = "Face" if np.argmax(cnn_prediction) == 1 else "No Face"

    # ==== Random Forest: класифікація всього кадру ====
    frame_resized_rf = cv2.resize(frame, (64, 64))
    frame_flat_rf = frame_resized_rf.flatten().reshape(1, -1)
    frame_flat_rf_scaled = scaler.transform(frame_flat_rf)

    rf_prediction = rf_model.predict(frame_flat_rf_scaled)
    rf_label = "Face" if rf_prediction[0] == 1 else "No Face"

    # ==== YOLO: детекція облич ====
    faces_yolo = detect_faces_yolo(frame)

    # Малюємо знайдені обличчя через YOLO
    for (x, y, w, h) in faces_yolo:
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)  # Зелений прямокутник

    # ==== Підпис знизу кадру ====
    text = f'CNN: {cnn_label} | RF: {rf_label} | YOLO Faces: {len(faces_yolo)}'
    cv2.rectangle(frame, (0, frame.shape[0] - 30), (frame.shape[1], frame.shape[0]), (50, 50, 50), -1)
    cv2.putText(frame, text, (10, frame.shape[0] - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    # ==== Запис кадру у відео ====
    out.write(frame)

    # ==== Показувати кожний 10-й кадр у Colab ====
    # if frame_idx % 10 == 0:
    #     plt.figure(figsize=(10, 6))
    #     plt.axis('off')
    #     plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    #     plt.show()

    frame_idx += 1



0: 256x416 (no detections), 91.8ms
Speed: 2.0ms preprocess, 91.8ms inference, 0.9ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 82.3ms
Speed: 2.9ms preprocess, 82.3ms inference, 0.6ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 73.7ms
Speed: 2.6ms preprocess, 73.7ms inference, 0.6ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 72.5ms
Speed: 2.9ms preprocess, 72.5ms inference, 0.7ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 72.6ms
Speed: 3.3ms preprocess, 72.6ms inference, 0.6ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 1 face, 90.5ms
Speed: 3.0ms preprocess, 90.5ms inference, 1.3ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 1 face, 68.5ms
Speed: 2.5ms preprocess, 68.5ms inference, 1.1ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 1 face, 68.5ms
Speed: 2.1ms preprocess, 68.5ms inference, 1.0ms postprocess

In [73]:
cap.release()
if out is not None:
    out.release()
cv2.destroyAllWindows()

In [74]:
from google.colab import files

In [75]:
files.download('/content/drive/MyDrive/FaceTrackAI/data/video/output.avi')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>