<a href="https://colab.research.google.com/github/AhmedFarrukh/DeepLearning-EdgeComputing/blob/main/notebooks/MeasuringInferenceTimes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook, 7 popular Convolutional Neural Networks are quantized, and the change in their inference times is noted.

In [1]:
import tensorflow as tf
from PIL import Image
import numpy as np
import os
import sys
import time
import numpy as np
import pathlib

In [2]:
modelNames = ["MobileNet", "ResNet50", "ResNet101", "InceptionV3", "VGG16", "VGG19", "ResNet152"]

In [None]:
for modelName in modelNames:
  model_class = getattr(tf.keras.applications, modelName)
  model = model_class(weights='imagenet')

  converter = tf.lite.TFLiteConverter.from_keras_model(model)
  tflite_model = converter.convert()

  converter = tf.lite.TFLiteConverter.from_keras_model(model)
  converter.optimizations = [tf.lite.Optimize.DEFAULT]
  tflite_model_quant = converter.convert()

  tflite_models_dir = pathlib.Path("/tmp/tflite_models/")
  tflite_models_dir.mkdir(exist_ok=True, parents=True)

  # Save the unquantized/float model:
  tflite_model_file = tflite_models_dir/(modelName+".tflite")
  tflite_model_file.write_bytes(tflite_model)
  # Save the quantized model:
  tflite_model_quant_file = tflite_models_dir/(modelName+"_quant.tflite")
  tflite_model_quant_file.write_bytes(tflite_model_quant)


Next, load the benchmark.

In [None]:
!mkdir /tmp/benchmark
!wget https://storage.googleapis.com/tensorflow-nightly-public/prod/tensorflow/release/lite/tools/nightly/latest/linux_x86-64_benchmark_model -P /tmp/benchmark
!chmod +x /tmp/benchmark/linux_x86-64_benchmark_model
!touch /tmp/benchmark/results

Next, define a parsing function to parse the output of the benchmark.

In [None]:
import re

def parse_benchmark_output(output):
    results = {}

    # Regular expressions to match the required information
    model_name_pattern = re.compile(r'INFO: Graph: \[(.*)\]')
    init_time_pattern = re.compile(r'INFO: Initialized session in (\d+.\d+)ms.')
    inference_pattern = re.compile(r'INFO: Inference timings in us: Init: (\d+), First inference: (\d+), Warmup \(avg\): (\d+.\d+), Inference \(avg\): (\d+.\d+)')
    inference_pattern2 = re.compile(r'INFO: Inference timings in us: Init: (\d+), First inference: (\d+), Warmup \(avg\): (\d+), Inference \(avg\): (\d+)')
    memory_pattern = re.compile(r'INFO: Memory footprint delta from the start of the tool \(MB\): init=(\d+.\d+) overall=(\d+.\d+)')

    # Initialize current model name
    current_model = None

    # Split the output into lines and iterate through each line
    for line in output.split('\n'):
        # Check for the model name
        model_match = model_name_pattern.search(line)
        if model_match:
            current_model = model_match.group(1).split('/')[-1].split('.')[0]  # Extract model name from the path
            results[current_model] = {}
            continue

        # Check for the initialization time
        init_match = init_time_pattern.search(line)
        if init_match and current_model:
            results[current_model]['Init Time (ms)'] = float(init_match.group(1))
            continue

        # Check for the inference timings
        inference_match = inference_pattern.search(line)
        if not inference_match:
            inferenece_match = inference_pattern2.search(line)
        if inference_match and current_model:
            results[current_model]['Inference Timings (us)'] = {
                'Init': int(inference_match.group(1)),
                'First Inference': int(inference_match.group(2)),
                'Warmup (avg)': float(inference_match.group(3)),
                'Inference (avg)': float(inference_match.group(4))
            }
            continue

        # Check for the memory footprint
        memory_match = memory_pattern.search(line)
        if memory_match and current_model:
            results[current_model]['Memory Footprint (MB)'] = {
                'Init': float(memory_match.group(1)),
                'Overall': float(memory_match.group(2))
            }

    return results


Finally, run the benchmark repeatedly and average the results.

In [None]:
import subprocess
from collections import defaultdict
from statistics import mean
from statistics import stdev

results = defaultdict(list)

numModels = len(modelNames)
allModels = []
for i in range(numModels):
  allModels.append(modelNames[i])
  allModels.append(modelNames[i] + "_quant")

n = 5

for modelName in allModels:
  init_time = []
  init_inference = []
  first_inference = []
  warmup_inference = []
  inference = []
  memory_init = []
  memory_overall = []
  for i in range(n):
    output = subprocess.check_output("/tmp/benchmark/linux_x86-64_benchmark_model \
      --graph=/tmp/tflite_models/" + modelName +".tflite"+" \
      --num_threads=1", shell=True)
    output = output.decode('utf-8')
    output = parse_benchmark_output(output)
    init_time.append(output[modelName]['Init Time (ms)'])
    init_inference.append(output[modelName]['Inference Timings (us)']['Init'])
    first_inference.append(output[modelName]['Inference Timings (us)']['First Inference'])
    warmup_inference.append(output[modelName]['Inference Timings (us)']['Warmup (avg)'])
    inference.append(output[modelName]['Inference Timings (us)']['Inference (avg)'])
    memory_init.append(output[modelName]['Memory Footprint (MB)']['Init'])
    memory_overall.append(output[modelName]['Memory Footprint (MB)']['Overall'])

  results["Init Time"].append((mean(init_time), stdev(init_time)))
  results["Init Inference"].append((mean(init_inference), stdev(init_inference)))
  results["First Inference"].append((mean(first_inference), stdev(first_inference)))
  results["Warmup Inference"].append((mean(warmup_inference), stdev(warmup_inference)))
  results["Avg Inference"].append((mean(inference), stdev(inference)))
  results["Memory Init"].append((mean(memory_init), stdev(memory_init)))
  results["Memory Overall"].append((mean(memory_overall), stdev(memory_overall)))

In [None]:
for i in range(len(allModels)):
  print(allModels[i])
  for key in results:
    print(key, ": ", results[key][i])