In [None]:
import cv2
import time
import numpy as np
import tensorflow as tf

# CONFIG
TFLITE_MODEL = "/home/saber/GitHub/road_anomaly_detection/runs/detect/yolov8s_rdd2022_2class7/weights/best_int8.tflite"
VIDEO_PATH = "/home/saber/GitHub/road_anomaly_detection/data/videos/3695999-hd_1920_1080_24fps.mp4"
IMG_SIZE = 640
NUM_WARMUP = 20        # frames (do not count)
MAX_FRAMES = 300       # cap for faster testing

print("Loading TFLite INT8 model...")

interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Quantization params
input_scale, input_zero_point = input_details[0]["quantization"]

print("Input dtype:", input_details[0]["dtype"])
print("Input quantization:", input_scale, input_zero_point)

cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "Could not open video"

frame_count = 0
timings = []

print("\n Running INT8 inference benchmark...")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count > MAX_FRAMES:
        break

    # Preprocess
    img = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0

    # Quantize input
    img = img / input_scale + input_zero_point
    img = np.clip(img, 0, 255).astype(np.uint8)
    img = np.expand_dims(img, axis=0)

    interpreter.set_tensor(input_details[0]["index"], img)

    start = time.perf_counter()
    interpreter.invoke()
    end = time.perf_counter()

    if frame_count > NUM_WARMUP:
        timings.append(end - start)

cap.release()

# RESULTS
total_frames = len(timings)
avg_time = sum(timings) / total_frames
avg_fps = 1.0 / avg_time

print("\n========== RESULTS ==========")
print(f"Frames measured : {total_frames}")
print(f"Avg latency     : {avg_time*1000:.2f} ms")
print(f"Avg FPS         : {avg_fps:.2f}")
print("================================")


2026-02-07 10:39:19.300372: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-07 10:39:19.308246: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770440959.317449   26668 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770440959.320196   26668 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770440959.327124   26668 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

▶ Loading TFLite INT8 model...
Input dtype: <class 'numpy.uint8'>
Input quantization: 0.003921568859368563 0

▶ Running INT8 inference benchmark...


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.



Frames measured : 280
Avg latency     : 74.70 ms
Avg FPS         : 13.39


In [None]:
import cv2
import time
import numpy as np
import tensorflow as tf

# CONFIG
TFLITE_MODEL = "/home/saber/GitHub/road_anomaly_detection/runs/detect/yolov8s_rdd2022_2class7/weights/best_int8.tflite"
VIDEO_PATH = "/home/saber/GitHub/road_anomaly_detection/data/videos/3695999-hd_1920_1080_24fps.mp4"

MODEL_IMG_SIZE = 640          # YOLO input
VIDEO_DECODE_WIDTH = 960      # ⬅️ LOWER than 1920
VIDEO_DECODE_HEIGHT = 540

NUM_WARMUP = 20
MAX_FRAMES = 300

print("▶ Loading TFLite INT8 model...")

interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_scale, input_zero_point = input_details[0]["quantization"]

print("Input dtype:", input_details[0]["dtype"])
print("Input quantization:", input_scale, input_zero_point)

# Video capture with reduced resolution
cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "Could not open video"

cap.set(cv2.CAP_PROP_FRAME_WIDTH, VIDEO_DECODE_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, VIDEO_DECODE_HEIGHT)

frame_count = 0
timings = []

print("\n▶ Running INT8 inference benchmark (downscaled decode)...")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count > MAX_FRAMES:
        break

    # Preprocess
    img = cv2.resize(frame, (MODEL_IMG_SIZE, MODEL_IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0

    # Quantize
    img = img / input_scale + input_zero_point
    img = np.clip(img, 0, 255).astype(np.uint8)
    img = np.expand_dims(img, axis=0)

    interpreter.set_tensor(input_details[0]["index"], img)

    start = time.perf_counter()
    interpreter.invoke()
    end = time.perf_counter()

    if frame_count > NUM_WARMUP:
        timings.append(end - start)

cap.release()

# RESULTS
total_frames = len(timings)
avg_time = sum(timings) / total_frames
avg_fps = 1.0 / avg_time

print("\n========== RESULTS ==========")
print(f"Frames measured : {total_frames}")
print(f"Avg latency     : {avg_time*1000:.2f} ms")
print(f"Avg FPS         : {avg_fps:.2f}")
print("================================")


▶ Loading TFLite INT8 model...
Input dtype: <class 'numpy.uint8'>
Input quantization: 0.003921568859368563 0

▶ Running INT8 inference benchmark (downscaled decode)...

Frames measured : 280
Avg latency     : 75.17 ms
Avg FPS         : 13.30


In [1]:
import cv2
import time
import numpy as np
import tensorflow as tf

# ================= CONFIG =================
TFLITE_MODEL = "/home/saber/GitHub/road_anomaly_detection/runs/detect/yolov8s_rdd2022_2class7/weights/best_int8.tflite"
VIDEO_PATH = "/home/saber/GitHub/road_anomaly_detection/data/videos/3695999-hd_1920_1080_24fps.mp4"
OUTPUT_VIDEO = "../../../runs/tflite/output_predictions.mp4"

MODEL_IMG_SIZE = 640
VIDEO_DECODE_WIDTH = 960
VIDEO_DECODE_HEIGHT = 540

CONF_THRESH = 0.25
NUM_WARMUP = 20
MAX_FRAMES = 300

CLASS_NAMES = {
    0: "Road Defect",
    1: "Pothole"
}

# ================= LOAD MODEL =================
print("▶ Loading TFLite INT8 model...")

interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_scale, input_zero_point = input_details[0]["quantization"]

# ================= VIDEO SETUP =================
cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "Could not open video"

cap.set(cv2.CAP_PROP_FRAME_WIDTH, VIDEO_DECODE_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, VIDEO_DECODE_HEIGHT)

fps = cap.get(cv2.CAP_PROP_FPS)

writer = cv2.VideoWriter(
    OUTPUT_VIDEO,
    cv2.VideoWriter_fourcc(*"mp4v"),
    fps,
    (VIDEO_DECODE_WIDTH, VIDEO_DECODE_HEIGHT)
)

frame_count = 0
timings = []

print("\n▶ Running inference + saving output video...")

# ================= INFERENCE LOOP =================
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count > MAX_FRAMES:
        break

    orig_h, orig_w = frame.shape[:2]

    # ---------- PREPROCESS ----------
    img = cv2.resize(frame, (MODEL_IMG_SIZE, MODEL_IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0

    img = img / input_scale + input_zero_point
    img = np.clip(img, 0, 255).astype(np.uint8)
    img = np.expand_dims(img, axis=0)

    interpreter.set_tensor(input_details[0]["index"], img)

    start = time.perf_counter()
    interpreter.invoke()
    end = time.perf_counter()

    if frame_count > NUM_WARMUP:
        timings.append(end - start)

    # ---------- POSTPROCESS ----------
    output = interpreter.get_tensor(output_details[0]["index"])[0]
    output = output.transpose(1, 0)   # (8400, 6)


    for det in output:
        x1, y1, x2, y2, score, cls = det

        if score < CONF_THRESH:
            continue

        cls = int(cls)

        # Scale boxes back to original frame
        x1 = int(x1 * orig_w / MODEL_IMG_SIZE)
        x2 = int(x2 * orig_w / MODEL_IMG_SIZE)
        y1 = int(y1 * orig_h / MODEL_IMG_SIZE)
        y2 = int(y2 * orig_h / MODEL_IMG_SIZE)

        label = f"{CLASS_NAMES.get(cls, 'Unknown')} {score:.2f}"

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            frame,
            label,
            (x1, max(20, y1 - 5)),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 255, 0),
            2
        )

    writer.write(frame)

cap.release()
writer.release()

# ================= RESULTS =================
avg_time = sum(timings) / len(timings)
avg_fps = 1.0 / avg_time

print("\n========== RESULTS ==========")
print(f"Frames measured : {len(timings)}")
print(f"Avg latency     : {avg_time*1000:.2f} ms")
print(f"Avg FPS         : {avg_fps:.2f}")
print(f"Saved video     : {OUTPUT_VIDEO}")
print("================================")


2026-02-10 14:11:31.277940: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-10 14:11:31.413463: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770712891.480986    8365 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770712891.500653    8365 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770712891.623689    8365 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

▶ Loading TFLite INT8 model...

▶ Running inference + saving output video...


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.



Frames measured : 280
Avg latency     : 181.14 ms
Avg FPS         : 5.52
Saved video     : ../../../runs/tflite/output_predictions.mp4


In [2]:
cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "Could not open video"

ret, test_frame = cap.read()
assert ret, "Could not read first frame"

FRAME_H, FRAME_W = test_frame.shape[:2]

cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # rewind video

True

In [3]:
writer = cv2.VideoWriter(
    OUTPUT_VIDEO.replace(".mp4", ".avi"),
    cv2.VideoWriter_fourcc(*"XVID"),
    fps,
    (FRAME_W, FRAME_H)
)

assert writer.isOpened(), "❌ VideoWriter failed to open"

In [2]:
print("Output details:", output_details)

Output details: [{'name': 'PartitionedCall:0', 'index': 419, 'shape': array([   1,    6, 8400], dtype=int32), 'shape_signature': array([   1,    6, 8400], dtype=int32), 'dtype': <class 'numpy.uint8'>, 'quantization': (2.5373830795288086, 0), 'quantization_parameters': {'scales': array([2.537383], dtype=float32), 'zero_points': array([0], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [3]:
print("Output shape:", output.shape)
print("One detection vector length:", output[0].shape)

Output shape: (6, 8400)
One detection vector length: (8400,)


In [4]:
import cv2
import time
import numpy as np
import tensorflow as tf
import os

# ================= CONFIG =================
TFLITE_MODEL = "/home/saber/GitHub/road_anomaly_detection/runs/detect/yolov8s_rdd2022_2class7/weights/best_int8.tflite"
VIDEO_PATH = "/home/saber/GitHub/road_anomaly_detection/data/videos/3695999-hd_1920_1080_24fps.mp4"
OUTPUT_VIDEO = "../../../runs/tflite/output_predictions.avi"

MODEL_IMG_SIZE = 640
CONF_THRESH = 0.25
NUM_WARMUP = 20
MAX_FRAMES = 300

CLASS_NAMES = {
    0: "Road Defect",
    1: "Pothole"
}

# ================= LOAD MODEL =================
print("▶ Loading TFLite INT8 model...")

interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_scale, input_zero_point = input_details[0]["quantization"]

# ================= VIDEO SETUP =================
cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "❌ Could not open input video"

# Read first frame to get REAL resolution
ret, first_frame = cap.read()
assert ret, "❌ Could not read first frame"

FRAME_H, FRAME_W = first_frame.shape[:2]
fps = cap.get(cv2.CAP_PROP_FPS)

# Reset video to first frame
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

# Create output directory if needed
os.makedirs(os.path.dirname(OUTPUT_VIDEO), exist_ok=True)

# Safe codec for Linux / Raspberry Pi
writer = cv2.VideoWriter(
    OUTPUT_VIDEO,
    cv2.VideoWriter_fourcc(*"XVID"),
    fps,
    (FRAME_W, FRAME_H)
)

assert writer.isOpened(), "❌ VideoWriter failed to open"

print(f"▶ Saving output video: {OUTPUT_VIDEO}")
print(f"▶ Resolution: {FRAME_W}x{FRAME_H}, FPS: {fps:.2f}")

# ================= INFERENCE LOOP =================
frame_count = 0
timings = []

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count > MAX_FRAMES:
        break

    orig_h, orig_w = frame.shape[:2]

    # ---------- PREPROCESS ----------
    img = cv2.resize(frame, (MODEL_IMG_SIZE, MODEL_IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0

    # Quantize to INT8
    img = img / input_scale + input_zero_point
    img = np.clip(img, 0, 255).astype(np.uint8)
    img = np.expand_dims(img, axis=0)

    interpreter.set_tensor(input_details[0]["index"], img)

    start = time.perf_counter()
    interpreter.invoke()
    end = time.perf_counter()

    if frame_count > NUM_WARMUP:
        timings.append(end - start)

    # ---------- POSTPROCESS ----------
    output = interpreter.get_tensor(output_details[0]["index"])[0]
    output = output.transpose(1, 0)  # (8400, 6)

    for det in output:
        cx, cy, w, h, conf, cls = det

        conf = conf / 255.0  # INT8 → float
        if conf < CONF_THRESH:
            continue

        cls = int(cls)

        # Convert center → corner
        x1 = int((cx - w / 2) * orig_w / MODEL_IMG_SIZE)
        y1 = int((cy - h / 2) * orig_h / MODEL_IMG_SIZE)
        x2 = int((cx + w / 2) * orig_w / MODEL_IMG_SIZE)
        y2 = int((cy + h / 2) * orig_h / MODEL_IMG_SIZE)

        # Clamp to image bounds
        x1 = max(0, min(x1, orig_w - 1))
        y1 = max(0, min(y1, orig_h - 1))
        x2 = max(0, min(x2, orig_w - 1))
        y2 = max(0, min(y2, orig_h - 1))

        label = f"{CLASS_NAMES.get(cls, 'Unknown')} {conf:.2f}"

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            frame,
            label,
            (x1, max(20, y1 - 5)),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 255, 0),
            2
        )

    # ---------- WRITE FRAME ----------
    writer.write(frame)

# ================= CLEANUP =================
cap.release()
writer.release()

# ================= RESULTS =================
if timings:
    avg_time = sum(timings) / len(timings)
    avg_fps = 1.0 / avg_time
else:
    avg_fps = 0.0

print("\n========== RESULTS ==========")
print(f"Frames processed : {frame_count}")
print(f"Avg FPS (model)  : {avg_fps:.2f}")
print(f"Output file     : {OUTPUT_VIDEO}")
print(f"File size       : {os.path.getsize(OUTPUT_VIDEO) / 1e6:.2f} MB")
print("================================")


▶ Loading TFLite INT8 model...
▶ Saving output video: ../../../runs/tflite/output_predictions.avi
▶ Resolution: 1920x1080, FPS: 24.00

Frames processed : 301
Avg FPS (model)  : 4.47
Output file     : ../../../runs/tflite/output_predictions.avi
File size       : 40.93 MB
