In [None]:
import cv2
import time
import numpy as np
import tensorflow as tf

# CONFIG
TFLITE_MODEL = "/home/saber/GitHub/road_anomaly_detection/runs/detect/yolov8s_rdd2022_2class7/weights/best_int8.tflite"
VIDEO_PATH = "/home/saber/GitHub/road_anomaly_detection/data/videos/3695999-hd_1920_1080_24fps.mp4"
IMG_SIZE = 640
NUM_WARMUP = 20        # frames (do not count)
MAX_FRAMES = 300       # cap for faster testing

print("Loading TFLite INT8 model...")

interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Quantization params
input_scale, input_zero_point = input_details[0]["quantization"]

print("Input dtype:", input_details[0]["dtype"])
print("Input quantization:", input_scale, input_zero_point)

cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "Could not open video"

frame_count = 0
timings = []

print("\n Running INT8 inference benchmark...")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count > MAX_FRAMES:
        break

    # Preprocess
    img = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0

    # Quantize input
    img = img / input_scale + input_zero_point
    img = np.clip(img, 0, 255).astype(np.uint8)
    img = np.expand_dims(img, axis=0)

    interpreter.set_tensor(input_details[0]["index"], img)

    start = time.perf_counter()
    interpreter.invoke()
    end = time.perf_counter()

    if frame_count > NUM_WARMUP:
        timings.append(end - start)

cap.release()

# RESULTS
total_frames = len(timings)
avg_time = sum(timings) / total_frames
avg_fps = 1.0 / avg_time

print("\n========== RESULTS ==========")
print(f"Frames measured : {total_frames}")
print(f"Avg latency     : {avg_time*1000:.2f} ms")
print(f"Avg FPS         : {avg_fps:.2f}")
print("================================")


2026-02-07 10:39:19.300372: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-07 10:39:19.308246: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770440959.317449   26668 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770440959.320196   26668 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770440959.327124   26668 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

▶ Loading TFLite INT8 model...
Input dtype: <class 'numpy.uint8'>
Input quantization: 0.003921568859368563 0

▶ Running INT8 inference benchmark...


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.



Frames measured : 280
Avg latency     : 74.70 ms
Avg FPS         : 13.39


In [None]:
import cv2
import time
import numpy as np
import tensorflow as tf

# CONFIG
TFLITE_MODEL = "/home/saber/GitHub/road_anomaly_detection/runs/detect/yolov8s_rdd2022_2class7/weights/best_int8.tflite"
VIDEO_PATH = "/home/saber/GitHub/road_anomaly_detection/data/videos/3695999-hd_1920_1080_24fps.mp4"

MODEL_IMG_SIZE = 640          # YOLO input
VIDEO_DECODE_WIDTH = 960      # ⬅️ LOWER than 1920
VIDEO_DECODE_HEIGHT = 540

NUM_WARMUP = 20
MAX_FRAMES = 300

print("▶ Loading TFLite INT8 model...")

interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_scale, input_zero_point = input_details[0]["quantization"]

print("Input dtype:", input_details[0]["dtype"])
print("Input quantization:", input_scale, input_zero_point)

# Video capture with reduced resolution
cap = cv2.VideoCapture(VIDEO_PATH)
assert cap.isOpened(), "Could not open video"

cap.set(cv2.CAP_PROP_FRAME_WIDTH, VIDEO_DECODE_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, VIDEO_DECODE_HEIGHT)

frame_count = 0
timings = []

print("\n▶ Running INT8 inference benchmark (downscaled decode)...")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count > MAX_FRAMES:
        break

    # Preprocess
    img = cv2.resize(frame, (MODEL_IMG_SIZE, MODEL_IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0

    # Quantize
    img = img / input_scale + input_zero_point
    img = np.clip(img, 0, 255).astype(np.uint8)
    img = np.expand_dims(img, axis=0)

    interpreter.set_tensor(input_details[0]["index"], img)

    start = time.perf_counter()
    interpreter.invoke()
    end = time.perf_counter()

    if frame_count > NUM_WARMUP:
        timings.append(end - start)

cap.release()

# RESULTS
total_frames = len(timings)
avg_time = sum(timings) / total_frames
avg_fps = 1.0 / avg_time

print("\n========== RESULTS ==========")
print(f"Frames measured : {total_frames}")
print(f"Avg latency     : {avg_time*1000:.2f} ms")
print(f"Avg FPS         : {avg_fps:.2f}")
print("================================")


▶ Loading TFLite INT8 model...
Input dtype: <class 'numpy.uint8'>
Input quantization: 0.003921568859368563 0

▶ Running INT8 inference benchmark (downscaled decode)...

Frames measured : 280
Avg latency     : 75.17 ms
Avg FPS         : 13.30
