In [1]:
# !pip install ultralytics
# !pip install deep_sort_realtime
# !pip install openvino openvino-dev
!pip install onnx onnxruntime-openvino


Collecting onnx
  Downloading onnx-1.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting onnxruntime-openvino
  Downloading onnxruntime_openvino-1.22.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting coloredlogs (from onnxruntime-openvino)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting sympy (from onnxruntime-openvino)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-openvino)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy->onnxruntime-openvino)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading onnx-1.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading onnxru

In [47]:
from ultralytics import YOLO
import os
import cv2

# STEP 0: Ensure dependencies are installed:
#    pip install ultralytics opencv-python

# STEP 1: Load the YOLOv8x PyTorch model
model = YOLO('yolov8x.pt')

# STEP 2: Run inference on your frames folder
#    returns a Python list of Results, one per image
results = model.predict(
    source='Data/sample_frames/',
    conf=0.01,         # catch distant people
    classes=[0],       # only 'person'
    imgsz=3616,        # resize input to 1280×1280
    max_det=1500,                   # Increase max detections (default=300)
    agnostic_nms=True,  
    device='cpu',      # or 'cuda' if available
)

# STEP 3: Prepare output folder
output_dir = "runs/detect/head_centers"
os.makedirs(output_dir, exist_ok=True)

# STEP 4: Loop through results, draw head‑center, and save
for idx, res in enumerate(results):
    # res.orig_img is the H×W×3 RGB array
    img = res.orig_img.copy()
    # convert to BGR for OpenCV
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # Iterate all detected person boxes
    for box in res.boxes:
        # get integer coords
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        # head center = mid x of box, top y of box
        head_x = int((x1 + x2) / 2)
        head_y = y1

        # draw a solid red dot (radius=8 px) at the head center
        cv2.circle(img_bgr, (head_x, head_y), radius=8, color=(0, 0, 255), thickness=-1)

    # save annotated frame
    frame_name = f"frame_{idx:04d}.jpg"
    cv2.imwrite(os.path.join(output_dir, frame_name), img_bgr)

print(f"Done! Annotated frames with head‑centers saved to: {output_dir}")



image 1/5 /home/mra/Desktop/fyp_prototype/Data/sample_frames/frame_0001.jpg: 2048x3616 702 persons, 49551.0ms
image 2/5 /home/mra/Desktop/fyp_prototype/Data/sample_frames/frame_0002.jpg: 2048x3616 555 persons, 49695.0ms
image 3/5 /home/mra/Desktop/fyp_prototype/Data/sample_frames/frame_0003.jpg: 2048x3616 379 persons, 50167.9ms
image 4/5 /home/mra/Desktop/fyp_prototype/Data/sample_frames/frame_0004.jpg: 2048x3616 488 persons, 50683.6ms
image 5/5 /home/mra/Desktop/fyp_prototype/Data/sample_frames/frame_0005.jpg: 2048x3616 493 persons, 50172.8ms
Speed: 74.2ms preprocess, 50054.1ms inference, 20.4ms postprocess per image at shape (1, 3, 2048, 3616)
Done! Annotated frames with head‑centers saved to: runs/detect/head_centers


In [None]:
from ultralytics import YOLO
model = YOLO('yolov8x.pt')
model.export(format='openvino', imgsz=3616)

Ultralytics 8.3.161 🚀 Python-3.12.3 torch-2.7.1+cu126 CPU (12th Gen Intel Core(TM) i5-1235U)
YOLOv8x summary (fused): 112 layers, 68,200,608 parameters, 0 gradients, 257.8 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8x.pt' with input shape (1, 3, 3616, 3616) BCHW and output shape(s) (1, 84, 268149) (130.5 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2024.6.0-17404-4c0f47d2335-releases/2024/6...
[34m[1mOpenVINO:[0m export success ✅ 469.8s, saved as 'yolov8x_openvino_model/' (263.6 MB)

Export complete (653.9s)
Results saved to [1m/home/mra/Desktop/fyp_prototype[0m
Predict:         yolo predict task=detect model=yolov8x_openvino_model imgsz=3616  
Validate:        yolo val task=detect model=yolov8x_openvino_model imgsz=3616 data=coco.yaml  
Visualize:       https://netron.app


'yolov8x_openvino_model'

In [6]:
from ultralytics import YOLO
import os
import cv2

model = YOLO('yolov8x_openvino_model/')
output_dir = "runs/detect/head_centers_tiles"
os.makedirs(output_dir, exist_ok=True)

# Parameters
rows, cols = 4, 4
keep_rows = range(1, 4)   # only row‑indices 1, 2, 3 (zero‑based) → 12 tiles

for idx, frame_path in enumerate(sorted(os.listdir('Data/sample_frames/'))):
    img = cv2.imread(os.path.join('Data/sample_frames/', frame_path))
    H, W = img.shape[:2]
    tile_h, tile_w = H // rows, W // cols

    all_boxes = []  # to collect detections across tiles

    # 1) Loop over grid
    for i in range(rows):
        if i not in keep_rows:
            continue
        for j in range(cols):
            # define tile origin
            y0, x0 = i * tile_h, j * tile_w
            # crop tile
            tile = img[y0:y0 + tile_h, x0:x0 + tile_w]
            # resize to match OpenVINO export
            tile_resized = cv2.resize(tile, (3616, 3616))

            # inference
            res = model.predict(
                source=tile_resized,
                conf=0.01,
                classes=[0],
                device='cpu'
            )

            # rebase boxes
            for box in res[0].boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                # scale back to original tile coords
                x1 = x1 * (tile_w / 3616) + x0
                x2 = x2 * (tile_w / 3616) + x0
                y1 = y1 * (tile_h / 3616) + y0
                y2 = y2 * (tile_h / 3616) + y0
                all_boxes.append((x1, y1, x2, y2))

    # 2) Draw head centers on the full image
    for (x1, y1, x2, y2) in all_boxes:
        cx = int((x1 + x2) / 2)
        head_y = int(y1)
        cv2.circle(img, (cx, head_y), radius=8, color=(0, 0, 255), thickness=-1)

    # 3) Save result
    cv2.imwrite(os.path.join(output_dir, f"frame_{idx:04d}.jpg"), img)

print("Done – check", output_dir)


Loading yolov8x_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...

0: 3616x3616 3 persons, 77943.5ms
Speed: 242.6ms preprocess, 77943.5ms inference, 28.5ms postprocess per image at shape (1, 3, 3616, 3616)

0: 3616x3616 1 person, 8694.1ms
Speed: 207.7ms preprocess, 8694.1ms inference, 9.1ms postprocess per image at shape (1, 3, 3616, 3616)

0: 3616x3616 (no detections), 10724.6ms
Speed: 134.2ms preprocess, 10724.6ms inference, 12.2ms postprocess per image at shape (1, 3, 3616, 3616)

0: 3616x3616 (no detections), 10904.5ms
Speed: 145.2ms preprocess, 10904.5ms inference, 10.4ms postprocess per image at shape (1, 3, 3616, 3616)

0: 3616x3616 5 persons, 11586.6ms
Speed: 209.5ms preprocess, 11586.6ms inference, 12.0ms postprocess per image at shape (1, 3, 3616, 3616)

0: 3616x3616 2 persons, 11175.4ms
Speed: 138.9ms preprocess, 11175.4ms inference, 10.8ms postprocess per image at shape (1, 3, 3616, 3616)

0: 3616x3616 5 persons, 11239.2ms
Speed: 

# YOLO 12

In [None]:
from ultralytics import YOLO
model = YOLO('yolov12m.pt')
model.export(format='openvino', imgsz=1920)