In [1]:
import time
import os
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from pathlib import Path

In [2]:

def get_model_size(model_path):
    """
    Calculate the size of the TFLite model in MB.
    Args:
        model_path (str): Path to the TFLite model file.
    Returns:
        float: Size of the model in MB.
    """
    total_size = os.path.getsize(model_path)  # Directly get size of the TFLite file
    return total_size / (1024 * 1024)  # Convert to MB

def measure_inference_time_tflite(model_path, X_test, batch_size, trials=50):
    """
    Benchmark the inference time of a TFLite model.
    Args:
        model_path (str): Path to the TFLite model file.
        X_test (numpy array): Test dataset (images).
        batch_size (int): Batch size for inference.
        trials (int): Number of trials to run.
    Returns:
        tuple: Average inference time and throughput (images per second).
    """
    # Load TFLite model
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()  # Input details for the model
    output_details = interpreter.get_output_details()  # Output details for the model

    inference_times = []
    num_batches = len(X_test) // batch_size

    # Warm-up phase (Run a few trials to initialize the model)
    print(f"Running {10} warm-up trials to initialize the model...")
    for _ in range(10):
        for j in range(num_batches):
            batch_start = j * batch_size
            batch_end = (j + 1) * batch_size
            batch_images = X_test[batch_start:batch_end]

            # Preprocess the input batch to match TFLite input format
            interpreter.set_tensor(input_details[0]['index'], batch_images)
            interpreter.invoke()  # Run inference (no need to handle outputs here)

    print(f"Running {trials} inference trials on {len(X_test)} test images...")
    for i in range(trials):
        start_time = time.perf_counter()

        # Loop over the batches of X_test
        for j in range(num_batches):
            batch_start = j * batch_size
            batch_end = (j + 1) * batch_size
            batch_images = X_test[batch_start:batch_end]

            # Preprocess the input batch to match TFLite input format
            interpreter.set_tensor(input_details[0]['index'], batch_images)

            # Run inference
            interpreter.invoke()

        end_time = time.perf_counter()
        inference_time = end_time - start_time
        inference_times.append(inference_time)

        if i % 10 == 0:
            avg_inference = np.mean(inference_times)  # Average inference time per trial
            print(f"Step {i}: average inference time = {avg_inference:.6f} seconds")

    # Compute throughput (images per second)
    total_time = sum(inference_times)
    throughput = (trials * len(X_test)) / total_time
    return np.mean(inference_times), throughput

def measure_metrics_tflite(model_path, X_test, Y_test, batch_size):
    """
    Measure classification metrics for a TFLite model.
    Args:
        model_path (str): Path to the TFLite model file.
        X_test (numpy array): Test dataset (images).
        Y_test (numpy array): Ground truth labels.
        batch_size (int): Batch size for inference.
    Returns:
        dict: Dictionary of calculated metrics (accuracy, precision, recall, F1 score).
    """
    # Load TFLite model
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    num_batches = len(X_test) // batch_size
    all_predicted_classes = []

    # Loop over the test dataset in batches
    for j in range(num_batches):
        batch_start = j * batch_size
        batch_end = (j + 1) * batch_size
        batch_images = X_test[batch_start:batch_end]

        # Preprocess the input batch to match TFLite input format
        interpreter.set_tensor(input_details[0]['index'], batch_images)

        # Run inference
        interpreter.invoke()

        # Extract logits and apply sigmoid for binary classification
        prediction_logits = interpreter.get_tensor(output_details[0]['index'])
        probabilities = tf.nn.sigmoid(prediction_logits).numpy()
        predicted_classes = (probabilities > 0.5).astype(int)

        # Collect predictions
        all_predicted_classes.extend(predicted_classes)

    # Handle any remaining images that don't fit evenly in batches
    remaining_samples = len(X_test) % batch_size
    if remaining_samples > 0:
        batch_images = X_test[-remaining_samples:]
        interpreter.set_tensor(input_details[0]['index'], batch_images)
        interpreter.invoke()
        prediction_logits = interpreter.get_tensor(output_details[0]['index'])
        probabilities = tf.nn.sigmoid(prediction_logits).numpy()
        predicted_classes = (probabilities > 0.5).astype(int)
        all_predicted_classes.extend(predicted_classes)

    # Flatten predictions and labels to ensure they are 1D arrays
    all_predicted_classes = np.array(all_predicted_classes).flatten()
    Y_test = np.array(Y_test).flatten()

    # Ensure the number of predictions matches the number of ground truth labels
    if len(all_predicted_classes) != len(Y_test):
        raise ValueError(f"Number of predicted classes ({len(all_predicted_classes)}) "
                         f"does not match the number of ground truth labels ({len(Y_test)}).")

    # Calculate metrics
    accuracy = accuracy_score(Y_test, all_predicted_classes)
    precision = precision_score(Y_test, all_predicted_classes, average="binary")
    recall = recall_score(Y_test, all_predicted_classes, average="binary")
    f1 = f1_score(Y_test, all_predicted_classes, average="binary")

    # Output metrics
    metrics_dict = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
    }

    print(f"Metrics: {metrics_dict}")
    return metrics_dict


In [None]:
data_root = Path("../data/Original_Images")    # points to the folder containing the images that will be used for training

# hyperparameters
batch_size = 1         # size of the batch that will be fed to model
img_height = 224        # input image height
img_width = 224         # input image width
test_size = 0.14

# Load dataset without splitting
dataset = tf.keras.utils.image_dataset_from_directory(
    data_root,                                  # loads images from the data_root directory
    image_size=(img_height, img_width),         # resizes all images to (224, 224) pixels
    batch_size=batch_size,                      # set the batch size
    shuffle=False,                                # shufle data when loaded
    seed = 32
)

# normalization_layer = layers.Rescaling(1./255)
# dataset = dataset.map(lambda x, y: (normalization_layer(x), y))

image_batches, labels = [], []
for image_batch, label_batch in dataset:
    image_batches.append(image_batch)
    labels.append(label_batch)

image_batches = np.concatenate(image_batches) # Flatten batches to get all images
labels = np.concatenate(labels)               # Flatten batches to get all labels  
print(f"Total Images: {image_batches.shape[0]} \nTotal Labels: {labels.shape[0]}")

# Split the data into test subset for benchmarking
_, X_test, _, Y_test = train_test_split(image_batches, labels, test_size=test_size, random_state=42)

# Normalize the data
X_test = X_test / 255.0

Found 228 files belonging to 2 classes.
Total Images: 228 
Total Labels: 228


In [13]:
# Load the TFLite model
model_path = "../converted_models/TFLITE/fp16_quantized_model.tflite"
# interpreter = tf.lite.Interpreter(model_path=model_path)

# # Allocate tensors (prepares the model for inference)
# interpreter.allocate_tensors()

# # Get input and output details (to know the tensor indices and shapes)
# input_details = interpreter.get_input_details()
# output_details = interpreter.get_output_details()

print(get_model_size(model_path))
print(measure_inference_time_tflite(model_path, X_test, batch_size))
print(measure_metrics_tflite(model_path, X_test, Y_test, batch_size))

4.259204864501953
Running 10 warm-up trials to initialize the model...


ValueError: Cannot set tensor: Dimension mismatch. Got 32 but expected 1 for dimension 0 of input 0.