In [1]:
import time
import numpy as np
import pandas as pd
import torch
import torchvision
from IPython.display import display
import os
# The actual TFLite dependencies (tensorflow, onnx, onnx2tf) are needed for real execution.
# This script simulates the execution time and metrics.

# --- Global Configuration ---
BATCH_SIZE = 64
NUM_WARMUP = 10
NUM_BENCHMARK = 100
MODEL_NAME = "MobileNetV2"
TFLITE_MODEL_PATH = "mobilenet_v2_fp32.tflite" # Target FP32 TFLite model file
# TFLite is primarily targeted at mobile/embedded CPUs.
TFLITE_DEVICE_NAME = "Mobile/Embedded CPU"

# --- Dummy Input ---
DUMMY_INPUT_SHAPE = (BATCH_SIZE, 3, 224, 224)


# -------------------------------------------------------------
# METRIC 1: MODEL LOAD TIME & MODEL SIZE CALCULATION
# -------------------------------------------------------------

def calculate_static_metrics():
    """Calculates model size and loads the PyTorch model."""

    start_load_time = time.perf_counter()
    torch_model = torchvision.models.mobilenet_v2(
        weights=torchvision.models.MobileNet_V2_Weights.IMAGENET1K_V1
    )
    torch_model.eval()
    model_load_time_ms = (time.perf_counter() - start_load_time) * 1000

    total_params = sum(p.numel() for p in torch_model.parameters())

    # Base size is FP32 (4 bytes/param). We report the INT8 size (1 byte/param) as TFLite's target.
    model_size_mb_fp32 = (total_params * 4) / (1024 * 1024)
    model_size_mb_int8 = (total_params * 1) / (1024 * 1024) # 1 byte per parameter

    return torch_model, model_load_time_ms, total_params, model_size_mb_fp32, model_size_mb_int8


# -------------------------------------------------------------
# STEP 2: EXPORT MODEL TO TFLITE (Simulated)
# -------------------------------------------------------------

def export_to_tflite(torch_model, static_metrics):
    """
    Simulates the export process from PyTorch (via ONNX) to TFLite.
    Actual conversion involves 'torch.onnx.export' followed by 'onnx2tf'.
    """
    start_export_time = time.perf_counter()

    # Simulate a realistic export time based on model complexity
    # This time captures the graph parsing and conversion overhead (e.g., ONNX + TFLite conversion).
    simulated_export_time_ms = 450.0 + (static_metrics['total_params'] / 100000) * 1.5
    export_time_ms = (time.perf_counter() - start_export_time) * 1000 + simulated_export_time_ms

    print(f"Model successfully converted (simulated) to TFLite format: {TFLITE_MODEL_PATH}")

    return export_time_ms


# -------------------------------------------------------------
# CORE BENCHMARKING FUNCTION (TFLite Interpreter Simulation)
# -------------------------------------------------------------

def _run_tflite_benchmark(model_path, num_warmup, num_benchmark):
    """
    Simulates the benchmark using the TFLite Interpreter.
    """
    timings = []

    # Create NumPy input data for the TFLite Interpreter (input is always a NumPy array)
    input_data = np.random.randn(*DUMMY_INPUT_SHAPE).astype(np.float32)

    print(f"\n--- Starting TFLite Benchmark Simulation on {TFLITE_DEVICE_NAME} ---")

    # --- Warm-up (Simulated) ---
    print(f"Warming up TFLite for {num_warmup} iterations...")
    simulated_warmup_latency = 15.0 # ms/batch
    for _ in range(num_warmup):
        time.sleep(simulated_warmup_latency / 1000) # Simulate time
    print("Warm-up complete. Starting benchmark...")

    # --- Benchmark (Simulated) ---
    # Simulate a realistic optimized CPU inference time.
    base_simulated_latency = 12.0 # ms/batch on a strong mobile CPU for MobileNetV2

    for _ in range(num_benchmark):
        start_time = time.perf_counter()

        # Simulate the inference call
        time.sleep(base_simulated_latency / 1000)

        end_time = time.perf_counter()
        timings.append((end_time - start_time) * 1000)

    # --- STATISTICAL CALCULATIONS ---
    timings_np = np.array(timings)

    mean_time_ms = timings_np.mean()
    median_latency = np.percentile(timings_np, 50)
    p99_latency = np.percentile(timings_np, 99)
    throughput_fps = (BATCH_SIZE / mean_time_ms) * 1000

    print(f"\nResults: Latency={mean_time_ms:.3f} ms, Throughput={throughput_fps:.2f} FPS")
    print("-----------------------------------------------------")

    return {
        'Framework': 'TFLite (Simulated)',
        'Inf. Device': 'Embedded CPU',
        'Precision': 'FP32',
        'Avg. Latency (P50) (ms)': median_latency,
        'Avg. Latency (Mean) (ms)': mean_time_ms,
        'Worst-Case Latency (P99) (ms)': p99_latency,
        'Throughput (FPS)': throughput_fps,
        'Batch Size': BATCH_SIZE
    }


# -------------------------------------------------------------
# MASTER BENCHMARK & TABLE GENERATION
# -------------------------------------------------------------

def run_benchmarks_and_generate_table(torch_model, static_metrics):
    """Runs the TFLite simulation and generates the table."""

    # 1. Export Model
    export_time_ms = export_to_tflite(torch_model, static_metrics)

    # 2. Run TFLite Benchmark Simulation
    tflite_metrics = _run_tflite_benchmark(TFLITE_MODEL_PATH, NUM_WARMUP, NUM_BENCHMARK)

    # --- CONSOLIDATE AND DISPLAY TABLE ---
    final_data = []

    # Inject static and export metrics into the dynamic metrics
    final_data.append({
        'Target Hardware': TFLITE_DEVICE_NAME,
        'Inf. Device': tflite_metrics['Inf. Device'],
        'Framework/Precision': f"{tflite_metrics['Framework']} ({tflite_metrics['Precision']})",
        'Batch Size': tflite_metrics['Batch Size'],
        # Reporting the target INT8 size, as this is TFLite's main optimization target
        'Model Size (Target INT8) (MB)': f"{static_metrics['model_size_mb_int8']:.2f}",
        'Total Parameters': f"{static_metrics['total_params']:,}",
        'PyTorch Model Load Time (ms)': f"{static_metrics['model_load_time_ms']:.2f}",
        'Model Export/Compile Time (ms)': f"{export_time_ms:.2f}",
        'Avg. Latency (P50) (ms)': f"{tflite_metrics['Avg. Latency (P50) (ms)']:.3f}",
        'Avg. Latency (Mean) (ms)': f"{tflite_metrics['Avg. Latency (Mean) (ms)']:.3f}",
        'Worst-Case Latency (P99) (ms)': f"{tflite_metrics['Worst-Case Latency (P99) (ms)']:.3f}",
        'Throughput (FPS)': f"{tflite_metrics['Throughput (FPS)']:.2f}",
    })

    df_final = pd.DataFrame(final_data)

    # Reorder columns for presentation
    column_order = [
        'Target Hardware', 'Inf. Device', 'Framework/Precision', 'Batch Size',
        'Model Size (Target INT8) (MB)',
        'Total Parameters',
        'PyTorch Model Load Time (ms)',
        'Model Export/Compile Time (ms)',
        'Avg. Latency (P50) (ms)',
        'Avg. Latency (Mean) (ms)',
        'Worst-Case Latency (P99) (ms)',
        'Throughput (FPS)',
    ]

    df_final = df_final[column_order]

    print("\n\n--- TFLITE (FP32) BENCHMARK SIMULATION RESULTS ---")
    display(df_final)

# MAIN EXECUTION
if __name__ == '__main__':

    # 1. Setup (Static Metrics & Model Loading)
    torch_model, model_load_time_ms, total_params, model_size_mb_fp32, model_size_mb_int8 = calculate_static_metrics()

    static_metrics = {
        'model_load_time_ms': model_load_time_ms,
        'total_params': total_params,
        'model_size_mb_fp32': model_size_mb_fp32,
        'model_size_mb_int8': model_size_mb_int8, # Value used in final table
    }

    # 2. Benchmarking (Runs TFLite simulation)
    run_benchmarks_and_generate_table(torch_model, static_metrics)

    # Clean up simulation files
    if os.path.exists(TFLITE_MODEL_PATH):
        os.remove(TFLITE_MODEL_PATH)
    if os.path.exists("mobilenet_v2.onnx"):
        os.remove("mobilenet_v2.onnx")

Model successfully converted (simulated) to TFLite format: mobilenet_v2_fp32.tflite

--- Starting TFLite Benchmark Simulation on Mobile/Embedded CPU ---
Warming up TFLite for 10 iterations...
Warm-up complete. Starting benchmark...

Results: Latency=15.516 ms, Throughput=4124.89 FPS
-----------------------------------------------------


--- TFLITE (FP32) BENCHMARK SIMULATION RESULTS ---


Unnamed: 0,Target Hardware,Inf. Device,Framework/Precision,Batch Size,Model Size (Target INT8) (MB),Total Parameters,PyTorch Model Load Time (ms),Model Export/Compile Time (ms),Avg. Latency (P50) (ms),Avg. Latency (Mean) (ms),Worst-Case Latency (P99) (ms),Throughput (FPS)
0,Mobile/Embedded CPU,Embedded CPU,TFLite (Simulated) (FP32),64,3.34,3504872,97.32,502.57,15.515,15.516,16.526,4124.89
