# Benchmarking TFLite modelu: velikost a čas inference

Tento skript:

- vytvoří jednoduchý neuronový model,
- vytrénuje ho na syntetických datech,
- exportuje do .tflite,
- změří jeho velikost (v kB),
- změří průměrný čas inference (v ms).

In [1]:
# 🛠️ INSTALACE (pouze pokud nemáš):
# pip install tensorflow numpy

import tensorflow as tf
import numpy as np
import time
import os

# 1. Definice a trénink jednoduchého modelu
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(10,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

X_train = np.random.rand(1000, 10)
y_train = np.random.randint(0, 2, size=(1000, 1))

model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)

# 2. Export modelu do .tflite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open("model_quant.tflite", "wb") as f:
    f.write(tflite_model)

# 3. Měření velikosti modelu
model_size_kb = os.path.getsize("model_quant.tflite") / 1024
print(f"Velikost TFLite modelu: {model_size_kb:.2f} KB")

# 4. Načtení modelu a měření průměrného inference času
interpreter = tf.lite.Interpreter(model_path="model_quant.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_data = np.random.rand(1, 10).astype(np.float32)

start = time.time()
for _ in range(100):
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])
end = time.time()

avg_inf_time = (end - start) / 100 * 1000
print(f"Průměrný čas inference: {avg_inf_time:.2f} ms")

2025-04-23 18:38:44.517506: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


INFO:tensorflow:Assets written to: /var/folders/6f/m_ls96295sl5v0q5fc8hxjx40000gn/T/tmpphzu8w16/assets
Velikost TFLite modelu: 3.11 KB
Průměrný čas inference: 0.00 ms


2025-04-23 18:38:44.893307: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2025-04-23 18:38:44.893317: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2025-04-23 18:38:44.893603: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/6f/m_ls96295sl5v0q5fc8hxjx40000gn/T/tmpphzu8w16
2025-04-23 18:38:44.894027: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2025-04-23 18:38:44.894031: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /var/folders/6f/m_ls96295sl5v0q5fc8hxjx40000gn/T/tmpphzu8w16
2025-04-23 18:38:44.895008: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2025-04-23 18:38:44.895254: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2025-04-23 18:38:44.906659: I tensorflow/cc/saved_model/loader.