# Dataset Selection & Preparation

In [1]:
"""import os
import shutil
from PIL import Image
import random
import glob

# === Paths ===
base_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC"
train_image_path = os.path.join(base_path, "images/train")
val_image_path   = os.path.join(base_path, "images/val")
test_image_path  = os.path.join(base_path, "images/test")
train_label_path = os.path.join(base_path, "labels/train")
val_label_path   = os.path.join(base_path, "labels/val")
test_label_path  = os.path.join(base_path, "labels/test")
calib_image_path = os.path.join(base_path, "images/calib")
calib_label_path = os.path.join(base_path, "labels/calib")

# === Ensure target directories exist ===
for path in [test_image_path, test_label_path, calib_image_path, calib_label_path]:
    os.makedirs(path, exist_ok=True)

# === Train / Val Image Stats ===
train_images = [f for f in os.listdir(train_image_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
val_images = sorted([f for f in os.listdir(val_image_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

print(f"Number of training images: {len(train_images)}")
print(f"Number of validation images (before split): {len(val_images)}")

# === Sample Resolution ===
sample_image = train_images[0]
sample_path = os.path.join(train_image_path, sample_image)
with Image.open(sample_path) as img:
    width, height = img.size
    print(f"Resolution of sample image ({sample_image}): {width}x{height}")

# === Move 50% of val to test ===
test_split = int(len(val_images) * 0.5)
test_files = val_images[:test_split]

for fname in test_files:
    src_img = os.path.join(val_image_path, fname)
    dst_img = os.path.join(test_image_path, fname)
    shutil.move(src_img, dst_img)

    label_name = os.path.splitext(fname)[0] + ".txt"
    src_lbl = os.path.join(val_label_path, label_name)
    dst_lbl = os.path.join(test_label_path, label_name)
    if os.path.exists(src_lbl):
        shutil.move(src_lbl, dst_lbl)

print(f"Moved {len(test_files)} val images and labels to test set.")

# === Create 500-image Calibration Set ===
remaining_val_images = sorted(
    glob.glob(os.path.join(val_image_path, "*.jpg")) +
    glob.glob(os.path.join(val_image_path, "*.png"))
)

num_calib = min(500, len(remaining_val_images))
calib_images = random.sample(remaining_val_images, num_calib)

for img_path in calib_images:
    img_name = os.path.basename(img_path)
    lbl_name = os.path.splitext(img_name)[0] + ".txt"

    shutil.copy(img_path, os.path.join(calib_image_path, img_name))

    src_lbl = os.path.join(val_label_path, lbl_name)
    if os.path.isfile(src_lbl):
        shutil.copy(src_lbl, os.path.join(calib_label_path, lbl_name))

print(f"Copied {len(calib_images)} images (+ labels if present) to calibration set.")

# === Final image counts ===
def count_images(folder):
    return len([f for f in os.listdir(folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

print("\nFinal counts:")
print(f"Train images: {count_images(train_image_path)}")
print(f"Val images:   {count_images(val_image_path)}")
print(f"Test images:  {count_images(test_image_path)}")
print(f"Calib images: {count_images(calib_image_path)}")
"""

'import os\nimport shutil\nfrom PIL import Image\nimport random\nimport glob\n\n# === Paths ===\nbase_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC"\ntrain_image_path = os.path.join(base_path, "images/train")\nval_image_path   = os.path.join(base_path, "images/val")\ntest_image_path  = os.path.join(base_path, "images/test")\ntrain_label_path = os.path.join(base_path, "labels/train")\nval_label_path   = os.path.join(base_path, "labels/val")\ntest_label_path  = os.path.join(base_path, "labels/test")\ncalib_image_path = os.path.join(base_path, "images/calib")\ncalib_label_path = os.path.join(base_path, "labels/calib")\n\n# === Ensure target directories exist ===\nfor path in [test_image_path, test_label_path, calib_image_path, calib_label_path]:\n    os.makedirs(path, exist_ok=True)\n\n# === Train / Val Image Stats ===\ntrain_images = [f for f in os.listdir(train_image_path) if f.lower().endswith((\'.jpg\', \'.jpeg\', \'.png\'))]\nval_images = sorted([f for f in os.listdir(val_image_p

# Model Selection

In [2]:
from ultralytics import YOLO

# Load pretrained model (already done)
model = YOLO("yolo11m.pt")

print(model.model)


DetectionModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C3k2(
      (cv1): Conv(
        (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(192, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
   

# Fine Tune

In [3]:
"""# Fine-tune the model
results = model.train(
    data="/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml",
    epochs=25,
    imgsz=640,
    batch=16,
    workers=4,
    patience=10,
    freeze=10,
    # Augmentation options
    degrees=10,      # rotation
    translate=0.1,   # translation
    scale=0.5,       # scale range
    shear=2,         # shear
    perspective=0.001,
    flipud=0.5,      # vertical flip prob
    fliplr=0.5,      # horizontal flip prob
    mosaic=1.0,      # mosaic aug
    mixup=0.1,       # mixup aug
    hsv_h=0.015,     # HSV hue aug
    hsv_s=0.7,       # HSV saturation aug
    hsv_v=0.4,        # HSV value aug (brightness)
    device='cuda:0',  # Use GPU if available
)
"""

'# Fine-tune the model\nresults = model.train(\n    data="/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml",\n    epochs=25,\n    imgsz=640,\n    batch=16,\n    workers=4,\n    patience=10,\n    freeze=10,\n    # Augmentation options\n    degrees=10,      # rotation\n    translate=0.1,   # translation\n    scale=0.5,       # scale range\n    shear=2,         # shear\n    perspective=0.001,\n    flipud=0.5,      # vertical flip prob\n    fliplr=0.5,      # horizontal flip prob\n    mosaic=1.0,      # mosaic aug\n    mixup=0.1,       # mixup aug\n    hsv_h=0.015,     # HSV hue aug\n    hsv_s=0.7,       # HSV saturation aug\n    hsv_v=0.4,        # HSV value aug (brightness)\n    device=\'cuda:0\',  # Use GPU if available\n)\n'

# Evaluation

In [4]:
from ultralytics import YOLO

# Load pretrained model (already done)
model = YOLO("/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.pt")


metrics = model.val(
    data="/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml",
    imgsz=640,
    batch=16,
    device = 'cuda:0',
    split = 'test'
)
print(metrics)

Ultralytics 8.3.146 🚀 Python-3.9.18 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 10822MiB)
YOLO11m summary (fused): 125 layers, 20,033,116 parameters, 0 gradients, 67.7 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 689.7±616.0 MB/s, size: 73.9 KB)


[34m[1mval: [0mScanning /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/labels/test.cache... 27998 images, 172 backgrounds, 0 corrupt: 100%|██████████| 28170/28170 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1761/1761 [04:04<00:00,  7.21it/s]


                   all      28170     260676      0.718      0.712      0.712      0.507
                 truck       4299       4299      0.664      0.429      0.447      0.298
                   car      27595     209448      0.768      0.769      0.815      0.593
                   van       7743       9744      0.665      0.769      0.738      0.553
                   bus      15795      37185      0.777      0.882      0.849      0.586
Speed: 0.1ms preprocess, 4.8ms inference, 0.0ms loss, 0.5ms postprocess per image
Results saved to [1mruns/detect/val[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f968f12b700>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.0050

# Tracking Functions

### Batch Mode

In [5]:
import os
import time
import cv2
from collections import defaultdict


def Real_Time_Inference(model, video_path ,label_dir, device = "cuda:0"):

    # Get frame count
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()

    # Measure time
    start_time = time.time()

    # Run tracking
    results = model.track(
        source=video_path,
        save=True,           
        device = device,
        verbose=True,
        exist_ok=True,
        tracker="bytetrack.yaml",
        save_txt=True             
    )

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Tracking completed in {elapsed_time:.2f} seconds.")

    fps = total_frames / elapsed_time
    print(f"✅ FPS: {fps:.2f} — This is the end-to-end FPS, including print statements, result saving, etc.")

    unique_ids = set()
    class_ids = defaultdict(set)

    for file in os.listdir(label_dir):
        if file.endswith(".txt"):
            with open(os.path.join(label_dir, file), 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) >= 6:
                        class_id = int(parts[0])         # Class ID at index 0
                        track_id = int(parts[5])         # Track ID at index 5
                        unique_ids.add(track_id)
                        class_ids[class_id].add(track_id)

    # Output
    print(f"✅ Total unique vehicles: {len(unique_ids)}")
    names = {0: 'truck', 1: 'car', 2: 'van', 3: 'bus'}
    for cls_id, ids in class_ids.items():
        print(f"{names.get(cls_id, f'class_{cls_id}')}: {len(ids)} unique vehicles")


### Frame-by-Frame Mode

In [6]:
import time
import cv2
from collections import defaultdict

def Real_Time_Inference_FrameByFrame(model, video_path, device="cuda:0", show_window=False):
    """
    Real‐time inference + tracking, frame by frame, to minimize latency.

    Args:
        model:           a loaded YOLO object (e.g. YOLO("yolo11n.onnx") or YOLO("yolo11n.pt")).
        video_path:      path to the input video file (e.g. "/mnt/.../Traffic_Cut.mp4").
        device:          "cuda:0" or "cpu". Must match how the model was loaded.
        show_window:     if True, will call cv2.imshow(...) for each annotated frame.

    Prints:
        - total elapsed time
        - end‐to‐end FPS
        - total unique vehicles per class
    """

    # Open video and read total frame count
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Prepare containers for counting unique track IDs
    unique_ids = set()
    class_ids = defaultdict(set)
    names = {0: "truck", 1: "car", 2: "van", 3: "bus"}

    start_time = time.time()
    frame_idx = 0

    # Process one frame at a time
    while True:
        success, frame = cap.read()
        if not success:
            break

        # Run YOLO tracking on this single frame; persist=True preserves ByteTrack state
        #   (make sure 'model' was created on the same device)
        results = model.track(frame, persist=True)[0]

        # results.boxes.xyxy / .xywh  holds bounding boxes, results.boxes.id holds track IDs
        if results.boxes is not None and results.boxes.id is not None:
            # Extract all class IDs and track IDs for this frame
            #   NOTE: .cls or .class_prob may differ by version—.boxes.cls is often the class index
            classes = results.boxes.cls.int().cpu().tolist()
            track_ids = results.boxes.id.int().cpu().tolist()

            # Count each track_id only once, and also group by class
            for cls_i, tid in zip(classes, track_ids):
                unique_ids.add(tid)
                class_ids[cls_i].add(tid)

        # (Optional) draw boxes + IDs on the frame
        annotated = results.plot()

        if show_window:
            cv2.imshow("RealTime Tracking", annotated)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                break

        frame_idx += 1

    end_time = time.time()
    elapsed = end_time - start_time
    cap.release()
    if show_window:
        cv2.destroyAllWindows()

    # Compute end‐to‐end FPS (no file I/O, pure loop)
    fps = total_frames / elapsed if elapsed > 0 else 0.0

    # Print timing + counts
    print(f"Processed {total_frames} frames in {elapsed:.2f} s → FPS: {fps:.2f}")
    print(f"✅ Total unique vehicles: {len(unique_ids)}")
    for cls_i, ids in class_ids.items():
        print(f"  {names.get(cls_i, f'class_{cls_i}')}: {len(ids)}")

    stats = {
        "elapsed_s": elapsed,
        "fps": fps,
        "total_unique": len(unique_ids),
        "per_class": {names.get(k, f"class_{k}"): len(v) for k, v in class_ids.items()}
    }
    print("\nDetailed stats dictionary:")
    print(stats)

### Multithreaded Tracking

In [7]:
#import threading
#
#import cv2
#
#from ultralytics import YOLO
#
## Define model names and video sources
#MODEL_NAMES = ["model_1", "model_2"]
#SOURCES = ["path/to/video.mp4", "0"]  # local video, 0 for webcam
#
#
#def run_tracker_in_thread(model_name, filename):
#    """
#    Run YOLO tracker in its own thread for concurrent processing.
#
#    Args:
#        model_name (str): The YOLO11 model object.
#        filename (str): The path to the video file or the identifier for the webcam/external camera source.
#    """
#    model = YOLO(model_name)
#    results = model.track(filename, save=True, stream=True)
#    for r in results:
#        pass
#
#
## Create and start tracker threads using a for loop
#tracker_threads = []
#for video_file, model_name in zip(SOURCES, MODEL_NAMES):
#    thread = threading.Thread(target=run_tracker_in_thread, args=(model_name, video_file), daemon=True)
#    tracker_threads.append(thread)
#    thread.start()
#
## Wait for all tracker threads to finish
#for thread in tracker_threads:
#    thread.join()
#
## Clean up and close windows
#cv2.destroyAllWindows()

## Real-Time Inference Benchmark (FPS)

In [8]:

# Load model and input video
model = YOLO("/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.pt")
video_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4"
label_dir = "/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/track/labels"

Real_Time_Inference(model= model , video_path = video_path , label_dir= label_dir)
#Real_Time_Inference_FrameByFrame(model, video_path)



inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 8 cars, 2 vans, 37.6ms
video 1/1 (frame 2/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 6 cars, 3 vans, 9.1ms
video 1/1 (frame 3/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 5 cars, 2 vans, 9.0ms
video 1/1 (frame 4/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 1 truck, 7 cars, 2 vans, 9.0ms
video 1/1 (frame 5/1760) /mnt/sda1/FYP_202

# Exporting YOLO11 Model to ONNX

In [9]:
from ultralytics import YOLO

model = YOLO("runs/detect/train/weights/best.pt") 

#convert to ONNX format
model.export(format="onnx", imgsz=640 , device= 'cuda')

Ultralytics 8.3.146 🚀 Python-3.9.18 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 10822MiB)
YOLO11m summary (fused): 125 layers, 20,033,116 parameters, 0 gradients, 67.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'runs/detect/train/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 8, 8400) (38.6 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.55...
[34m[1mONNX:[0m export success ✅ 3.1s, saved as 'runs/detect/train/weights/best.onnx' (76.7 MB)

Export complete (3.4s)
Results saved to [1m/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights[0m
Predict:         yolo predict task=detect model=runs/detect/train/weights/best.onnx imgsz=640  
Validate:        yolo val task=detect model=runs/detect/train/weights/best.onnx imgsz=640 data=/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml  
Visualize:       https://netron.app


'runs/detect/train/weights/best.onnx'

In [10]:
import onnxruntime as ort
print(ort.get_available_providers())


['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'AzureExecutionProvider', 'CPUExecutionProvider']


## Infrence with ONNX model

In [11]:
import matplotlib.pyplot as plt
import cv2

# Load the exported ONNX model
onnx_model = YOLO("/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.onnx")

# Run inference
results = onnx_model("/mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/images/test/MVI_39031_img00322.jpg" ,  device='cuda:0')

# show the image with predictions
# draw the boxes (BGR array)
boxed = results[0].plot()             

# show in notebook
plt.imshow(cv2.cvtColor(boxed, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.show()

Loading /mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.onnx for ONNX Runtime inference...
Using ONNX Runtime CUDAExecutionProvider

image 1/1 /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/images/test/MVI_39031_img00322.jpg: 640x640 6 cars, 1 van, 21.3ms
Speed: 8.5ms preprocess, 21.3ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)


<Figure size 640x480 with 1 Axes>

## Evaluation on Test Dataset

In [12]:

model = onnx_model
metrics = model.val(
    data="/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml",
    imgsz=640,
    batch=16,
    device = 'cuda:0',
    split = 'test',
    project = "/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/onnx"
)
print(metrics)

Ultralytics 8.3.146 🚀 Python-3.9.18 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 10822MiB)
Loading /mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.onnx for ONNX Runtime inference...
Using ONNX Runtime CUDAExecutionProvider
Setting batch=1 input of shape (1, 3, 640, 640)
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.1 ms, read: 346.5±577.4 MB/s, size: 86.8 KB)


[34m[1mval: [0mScanning /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/labels/test.cache... 27998 images, 172 backgrounds, 0 corrupt: 100%|██████████| 28170/28170 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 28170/28170 [08:27<00:00, 55.51it/s]


                   all      28170     260676      0.721      0.706      0.711      0.486
                 truck       4299       4299      0.639      0.429      0.438      0.278
                   car      27595     209448       0.76      0.767      0.806      0.564
                   van       7743       9744      0.692      0.761      0.742      0.533
                   bus      15795      37185      0.792      0.867      0.858      0.568
Speed: 0.2ms preprocess, 14.3ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1m/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/onnx/val[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f969dd47fd0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.00200

## Real Time Inference Benchmark (FPS)

In [13]:

video_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4"


# Load model and input video
video_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4"
label_dir = "runs/detect/predict/labels"

Real_Time_Inference(model= onnx_model , video_path = video_path , label_dir= label_dir)
#Real_Time_Inference_FrameByFrame(onnx_model, video_path)




inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 7 cars, 1 van, 19.0ms
video 1/1 (frame 2/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 6 cars, 2 vans, 17.5ms
video 1/1 (frame 3/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 6 cars, 2 vans, 18.2ms
video 1/1 (frame 4/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 1 truck, 6 cars, 2 vans, 18.3ms
video 1/1 (frame 5/1760) /mnt/sda1/FYP_2

# Exporting YOLO11 Model to TensorRT

In [14]:
from ultralytics import YOLO


model = YOLO("runs/detect/train/weights/best.pt")

model.export(
    format="engine",
    int8=True,
    device='cuda:0', 
    data="/mnt/sda1/FYP_2024/Helitha/CCTV/calib.yaml",
    imgsz=640
)


Ultralytics 8.3.146 🚀 Python-3.9.18 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 10822MiB)
YOLO11m summary (fused): 125 layers, 20,033,116 parameters, 0 gradients, 67.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'runs/detect/train/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 8, 8400) (38.6 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.55...
[34m[1mONNX:[0m export success ✅ 4.3s, saved as 'runs/detect/train/weights/best.onnx' (76.6 MB)

[34m[1mTensorRT:[0m starting export with TensorRT 10.11.0.33...
[34m[1mTensorRT:[0m collecting INT8 calibration images from 'data=/mnt/sda1/FYP_2024/Helitha/CCTV/calib.yaml'
Fast image access ✅ (ping: 0.1±0.1 ms, read: 3.9±1.8 MB/s, size: 74.1 KB)


Scanning /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/labels/calib.cache... 500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]


[06/04/2025-00:34:39] [TRT] [I] [MemUsageChange] Init CUDA: CPU -2, GPU +0, now: CPU 24161, GPU 2590 (MiB)
[06/04/2025-00:34:41] [TRT] [I] [MemUsageChange] Init builder kernel library: CPU -647, GPU +2, now: CPU 23312, GPU 2592 (MiB)
[06/04/2025-00:34:41] [TRT] [I] ----------------------------------------------------------------
[06/04/2025-00:34:41] [TRT] [I] Input filename:   runs/detect/train/weights/best.onnx
[06/04/2025-00:34:41] [TRT] [I] ONNX IR version:  0.0.9
[06/04/2025-00:34:41] [TRT] [I] Opset version:    19
[06/04/2025-00:34:41] [TRT] [I] Producer name:    pytorch
[06/04/2025-00:34:41] [TRT] [I] Producer version: 2.5.1
[06/04/2025-00:34:41] [TRT] [I] Domain:           
[06/04/2025-00:34:41] [TRT] [I] Model version:    0
[06/04/2025-00:34:41] [TRT] [I] Doc string:       
[06/04/2025-00:34:41] [TRT] [I] ----------------------------------------------------------------
[34m[1mTensorRT:[0m input "images" with shape(-1, 3, -1, -1) DataType.FLOAT
[34m[1mTensorRT:[0m output 

'runs/detect/train/weights/best.engine'

## Inference with TensorRT model

In [15]:
import matplotlib.pyplot as plt
import cv2

# Load the exported TensorRT model
tensorrt_model = YOLO("/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.engine")

# Run inference
results = tensorrt_model("/mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/images/test/MVI_39031_img00322.jpg" , device='cuda')

# draw the boxes (BGR array)
boxed = results[0].plot()             

# show in notebook
plt.imshow(cv2.cvtColor(boxed, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.show()

Loading /mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.engine for TensorRT inference...
[06/04/2025-00:38:13] [TRT] [I] Loaded engine size: 24 MiB
[06/04/2025-00:38:13] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +124, now: CPU 0, GPU 150 (MiB)

image 1/1 /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/images/test/MVI_39031_img00322.jpg: 384x640 7 cars, 1 van, 5.4ms
Speed: 1.2ms preprocess, 5.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)


<Figure size 640x480 with 1 Axes>

## Evaluation on Test Dataset


In [16]:

model = tensorrt_model
metrics = model.val(
    data="/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml",
    imgsz=640,
    batch=16,
    device = 'cuda:0',
    split = 'test',
    project = "/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/tensorrt"
)
print(metrics)

Ultralytics 8.3.146 🚀 Python-3.9.18 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 10822MiB)
Loading /mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.engine for TensorRT inference...
[06/04/2025-00:38:13] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.
[06/04/2025-00:38:13] [TRT] [I] Loaded engine size: 24 MiB
[06/04/2025-00:38:14] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +124, now: CPU 1, GPU 301 (MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.1 ms, read: 354.5±445.3 MB/s, size: 75.6 KB)


[34m[1mval: [0mScanning /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/labels/test.cache... 27998 images, 172 backgrounds, 0 corrupt: 100%|██████████| 28170/28170 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 28170/28170 [03:09<00:00, 148.55it/s]


                   all      28170     260676      0.684      0.727      0.653      0.448
                 truck       4299       4299      0.616      0.436      0.429      0.279
                   car      27595     209448      0.744      0.809      0.763      0.534
                   van       7743       9744      0.596      0.789      0.625       0.45
                   bus      15795      37185      0.782      0.873      0.795      0.528
Speed: 0.2ms preprocess, 2.0ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1m/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/tensorrt/val[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f969cf88160>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.00

## Real Time Inference Benchmark (FPS)

In [17]:
# Load model and input video
video_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4"
label_dir = "runs/detect/track/labels"

Real_Time_Inference(model= tensorrt_model , video_path = video_path , label_dir= label_dir)
#Real_Time_Inference_FrameByFrame(tensorrt_model, video_path)




inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 10 cars, 2 vans, 2.4ms
video 1/1 (frame 2/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 8 cars, 2 vans, 2.4ms
video 1/1 (frame 3/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 7 cars, 2 vans, 2.4ms
video 1/1 (frame 4/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 384x640 1 truck, 8 cars, 2 vans, 2.4ms
video 1/1 (frame 5/1760) /mnt/sda1/FYP_202

# ONNX to TensorRT (optional)

In [18]:
"""!LD_LIBRARY_PATH=/home/fyp2selfdriving/FYP_2024_work/TensorRT-10.11.0.33/targets/x86_64-linux-gnu/lib:$LD_LIBRARY_PATH \
/home/fyp2selfdriving/FYP_2024_work/TensorRT-10.11.0.33/targets/x86_64-linux-gnu/bin/trtexec \
--onnx=/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.onnx \
--int8 \
--calib=/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/calib.cache \
--saveEngine=/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best_int8.engine
"""

'!LD_LIBRARY_PATH=/home/fyp2selfdriving/FYP_2024_work/TensorRT-10.11.0.33/targets/x86_64-linux-gnu/lib:$LD_LIBRARY_PATH /home/fyp2selfdriving/FYP_2024_work/TensorRT-10.11.0.33/targets/x86_64-linux-gnu/bin/trtexec --onnx=/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best.onnx --int8 --calib=/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/calib.cache --saveEngine=/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best_int8.engine\n'

# Exporting YOLO11 Model OpenVINO

In [19]:
# Load a YOLO11n PyTorch model
model = YOLO("runs/detect/train/weights/best.pt")

# Export the model
model.export(format="openvino", data = "/mnt/sda1/FYP_2024/Helitha/CCTV/calib.yaml" , device = "cuda:0" , int8 = True )  # creates 'yolo11n_openvino_model/'

Ultralytics 8.3.146 🚀 Python-3.9.18 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 10822MiB)
YOLO11m summary (fused): 125 layers, 20,033,116 parameters, 0 gradients, 67.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'runs/detect/train/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 8, 8400) (38.6 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2025.1.0-18503-6fec06580ab-releases/2025/1...
[34m[1mOpenVINO:[0m collecting INT8 calibration images from 'data=/mnt/sda1/FYP_2024/Helitha/CCTV/calib.yaml'
Fast image access ✅ (ping: 0.1±0.1 ms, read: 3.2±1.7 MB/s, size: 71.8 KB)


Scanning /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/labels/calib.cache... 500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]




E0000 00:00:1748977981.052904 1641194 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748977981.072238 1641194 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748977981.208496 1641194 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748977981.208518 1641194 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748977981.208521 1641194 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748977981.208523 1641194 computation_placer.cc:177] computation placer already registered. Please check link

INFO:nncf:16 ignored nodes were found by patterns in the NNCFGraph
INFO:nncf:1 ignored nodes were found by types in the NNCFGraph
INFO:nncf:Not adding activation input quantizer for operation: 234 __module.model.23.dfl/aten::view/Reshape
INFO:nncf:Not adding activation input quantizer for operation: 235 __module.model.23/aten::sigmoid/Sigmoid
INFO:nncf:Not adding activation input quantizer for operation: 251 __module.model.23.dfl/aten::transpose/Transpose
INFO:nncf:Not adding activation input quantizer for operation: 269 __module.model.23.dfl/aten::softmax/Softmax
INFO:nncf:Not adding activation input quantizer for operation: 283 __module.model.23.dfl.conv/aten::_convolution/Convolution
INFO:nncf:Not adding activation input quantizer for operation: 296 __module.model.23.dfl/aten::view/Reshape_1
INFO:nncf:Not adding activation input quantizer for operation: 318 __module.model.23/aten::sub/Subtract
INFO:nncf:Not adding activation input quantizer for operation: 319 __module.model.23/aten:

Output()

Output()

[34m[1mOpenVINO:[0m export success ✅ 131.2s, saved as 'runs/detect/train/weights/best_int8_openvino_model/' (20.1 MB)

Export complete (131.6s)
Results saved to [1m/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights[0m
Predict:         yolo predict task=detect model=runs/detect/train/weights/best_int8_openvino_model imgsz=640 int8 
Validate:        yolo val task=detect model=runs/detect/train/weights/best_int8_openvino_model imgsz=640 data=/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml int8 
Visualize:       https://netron.app


'runs/detect/train/weights/best_int8_openvino_model'

## Inferene With Openvino Model

In [20]:
import matplotlib.pyplot as plt
import cv2


# Load the exported OpenVINO model
ov_model = YOLO("/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best_int8_openvino_model/")

# Run inference with specified device, available devices: ["intel:gpu", "intel:npu", "intel:cpu"]
results = ov_model("/mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/images/test/MVI_39031_img00322.jpg", device="intel:cpu")

# draw the boxes (BGR array)
boxed = results[0].plot()             

# show in notebook
plt.imshow(cv2.cvtColor(boxed, cv2.COLOR_BGR2RGB))
plt.axis("off")
plt.show()

Loading /mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best_int8_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...

image 1/1 /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/images/test/MVI_39031_img00322.jpg: 640x640 6 cars, 1 van, 67.4ms
Speed: 16.9ms preprocess, 67.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


<Figure size 640x480 with 1 Axes>

## Evaluation on Test Dataset

In [21]:

model = ov_model
metrics = model.val(
    data="/mnt/sda1/FYP_2024/Helitha/CCTV/ua_detrac.yaml",
    imgsz=640,
    batch=16,
    device = 'intel:cpu',
    split = 'test',
    project = "/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/openvino"
)
print(metrics)

Loading /mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/train/weights/best_int8_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
Setting batch=1 input of shape (1, 3, 640, 640)
[34m[1mval: [0mFast image access ✅ (ping: 0.1±0.1 ms, read: 215.1±138.9 MB/s, size: 72.9 KB)


[34m[1mval: [0mScanning /mnt/sda1/FYP_2024/Helitha/CCTV/UA-DETRAC/labels/test.cache... 27998 images, 172 backgrounds, 0 corrupt: 100%|██████████| 28170/28170 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 28170/28170 [29:56<00:00, 15.68it/s]


                   all      28170     260676      0.709      0.699      0.696      0.476
                 truck       4299       4299      0.604      0.414      0.409      0.258
                   car      27595     209448      0.749      0.772      0.798      0.557
                   van       7743       9744      0.694      0.751      0.726      0.522
                   bus      15795      37185      0.791       0.86      0.851      0.565
Speed: 0.8ms preprocess, 54.7ms inference, 0.0ms loss, 0.7ms postprocess per image
Results saved to [1m/mnt/sda1/FYP_2024/Helitha/CCTV/runs/detect/openvino/val[0m
ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f95c81dfd30>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.0

## Real Time Inference Benchmark (FPS)

In [22]:
# Load model and input video
video_path = "/mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4"
label_dir = "runs/detect/track/labels"

Real_Time_Inference(model= ov_model , video_path = video_path , label_dir= label_dir , device = "intel:cpu")
#Real_Time_Inference_FrameByFrame(ov_model, video_path)




inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 8 cars, 2 vans, 47.6ms
video 1/1 (frame 2/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 6 cars, 2 vans, 48.0ms
video 1/1 (frame 3/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 8 cars, 2 vans, 48.9ms
video 1/1 (frame 4/1760) /mnt/sda1/FYP_2024/Helitha/CCTV/Traffic_Cut.mp4: 640x640 1 truck, 7 cars, 2 vans, 48.9ms
video 1/1 (frame 5/1760) /mnt/sda1/FYP_