## Inference Latency

In [None]:
import tensorflow as tf
import numpy as np
import time
import os
import psutil
from PIL import Image

image_path = "image.png"
model_path = "mobilenetv2Model.keras"
image_size = (64, 64)

image = Image.open(image_path).convert('RGB')
image = image.resize(image_size)
image_array = np.array(image).astype(np.float32)
input_data = np.expand_dims(image_array, axis=0)

model = tf.keras.models.load_model(model_path)
input_shape = model.input_shape
print("input shape: ", input_shape)
model.summary()

print("WARMING UP")
_ = model.predict(input_data)
print("__________________________________________")

repeats = 10
latencies = []
for _ in range(repeats):
    start = time.time()
    _ = model.predict(input_data)
    end = time.time()
    latencies.append((end - start) * 1000)
avg_latency = np.mean(latencies)
print(f"\nInference Latency: {avg_latency:.2f} ms")

process = psutil.Process(os.getpid())
mem_mb = process.memory_info().rss / 1e6
print(f"Memory Usage (RAM): {mem_mb:.2f} MB")

if os.path.exists(model_path):
    keras_size = os.path.getsize(model_path) / (1024 * 1024)
    print(f"Model Size Binary (Keras): {keras_size:.2f} MB")

input shape:  (None, 64, 64, 3)


WARMING UP
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 440ms/step
__________________________________________
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

Inference Latency: 35.24 ms
Memory Usage (RAM): 341.72 MB
Model Size Binary (Keras): 33.76 MB
Model Size Decimal (Keras): 35.40 MB
