# 03 — TFLite Benchmark on Apple Silicon M1

Measure inference latency and throughput of the Siamese tracker TFLite model
across different quantisations (FP32, FP16, INT8).

In [None]:
import sys, time
from pathlib import Path

ROOT = Path.cwd().parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

## 1. Helper: benchmark a TFLite model

In [None]:
def benchmark_tflite(model_path: Path, n_frames: int = 500):
    """Run n_frames random inferences and return latency stats."""
    interp = tf.lite.Interpreter(model_path=str(model_path), num_threads=4)
    interp.allocate_tensors()
    inp_det = interp.get_input_details()
    out_det = interp.get_output_details()
    
    # Identify template / search by shape
    shapes = {d['index']: d['shape'] for d in inp_det}
    
    latencies = []
    for _ in range(n_frames):
        for d in inp_det:
            data = np.random.rand(*d['shape']).astype(np.float32)
            interp.set_tensor(d['index'], data)
        t0 = time.perf_counter()
        interp.invoke()
        latencies.append((time.perf_counter() - t0) * 1000)
    
    latencies = np.array(latencies)
    return {
        'mean_ms': latencies.mean(),
        'std_ms': latencies.std(),
        'median_ms': np.median(latencies),
        'p95_ms': np.percentile(latencies, 95),
        'fps': 1000.0 / latencies.mean(),
        'latencies': latencies,
    }

print('benchmark_tflite() ready')

## 2. Benchmark TFLite Model

In [None]:
tflite_path = ROOT / 'models' / 'siamese_tracker.tflite'
if tflite_path.exists():
    stats = benchmark_tflite(tflite_path, n_frames=500)
    print(f"Mean latency : {stats['mean_ms']:.2f} ± {stats['std_ms']:.2f} ms")
    print(f"Median       : {stats['median_ms']:.2f} ms")
    print(f"P95          : {stats['p95_ms']:.2f} ms")
    print(f"Throughput   : {stats['fps']:.1f} FPS")
else:
    print('TFLite model not found — run `make export` first.')
    stats = None

## 3. Latency Distribution

In [None]:
if stats:
    plt.figure(figsize=(10, 4))
    plt.hist(stats['latencies'], bins=50, edgecolor='black', alpha=0.7)
    plt.axvline(stats['mean_ms'], color='red', linestyle='--', label=f"mean={stats['mean_ms']:.2f} ms")
    plt.axvline(stats['p95_ms'], color='orange', linestyle='--', label=f"p95={stats['p95_ms']:.2f} ms")
    plt.xlabel('Inference Latency (ms)')
    plt.ylabel('Count')
    plt.title('TFLite Inference Latency Distribution (500 frames, M1)')
    plt.legend()
    plt.tight_layout()
    plt.show()

## 4. TFLite vs Native TF Comparison
Compare TFLite with the original Keras model to quantify the speed-up.

In [None]:
from src.training.siamese_model import build_siamese_tracker_model

keras_model = build_siamese_tracker_model()
n = 100
t_inp = np.random.rand(1, 127, 127, 3).astype(np.float32)
s_inp = np.random.rand(1, 255, 255, 3).astype(np.float32)

# Warm up
for _ in range(5):
    keras_model.predict([t_inp, s_inp], verbose=0)

keras_times = []
for _ in range(n):
    t0 = time.perf_counter()
    keras_model.predict([t_inp, s_inp], verbose=0)
    keras_times.append((time.perf_counter() - t0) * 1000)

keras_mean = np.mean(keras_times)
print(f'Keras  : {keras_mean:.2f} ms/frame  ({1000/keras_mean:.1f} FPS)')

if stats:
    ratio = keras_mean / stats['mean_ms']
    print(f'TFLite : {stats["mean_ms"]:.2f} ms/frame  ({stats["fps"]:.1f} FPS)')
    print(f'Speed-up: {ratio:.2f}x')

## 5. Summary Table

In [None]:
import pandas as pd

rows = []
rows.append({'Format': 'Keras (FP32)', 'Mean (ms)': f'{keras_mean:.2f}', 'FPS': f'{1000/keras_mean:.1f}'})
if stats:
    rows.append({'Format': 'TFLite (FP16)', 'Mean (ms)': f"{stats['mean_ms']:.2f}", 'FPS': f"{stats['fps']:.1f}"})

df = pd.DataFrame(rows)
df