In [16]:
import tensorflow as tf
import os

# =====================
# PATH
# =====================
MODEL_PATH = "baseline_mobilenetv3_final.keras"
OUTPUT_TFLITE = "baseline_mobilenetv3_fp16.tflite"

# =====================
# LOAD MODEL
# =====================
model = tf.keras.models.load_model(
    MODEL_PATH,
    compile=False
)

print("‚úÖ Baseline FP32 model loaded")


‚úÖ Baseline FP32 model loaded


In [17]:
# =====================
# FP16 QUANTIZATION
# =====================
converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_fp16 = converter.convert()

# =====================
# SAVE MODEL
# =====================
with open(OUTPUT_TFLITE, "wb") as f:
    f.write(tflite_fp16)

print("‚úÖ FP16 TFLite model saved:", OUTPUT_TFLITE)
print("üì¶ Model size (MB):", round(os.path.getsize(OUTPUT_TFLITE) / (1024**2), 2))


INFO:tensorflow:Assets written to: C:\Users\adity\AppData\Local\Temp\tmpmhrvnmh6\assets


INFO:tensorflow:Assets written to: C:\Users\adity\AppData\Local\Temp\tmpmhrvnmh6\assets


Saved artifact at 'C:\Users\adity\AppData\Local\Temp\tmpmhrvnmh6'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  1635750515664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656597776: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656598352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656598928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656598736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656600464: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656600848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656601232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656601040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1635656599504: TensorSpec(shape=(), dtype=tf.resource, name=None)
  163

In [18]:
# =====================
# DISABLE XNNPACK (WAJIB DI WINDOWS)
# =====================
os.environ["TF_LITE_DISABLE_XNNPACK"] = "1"

# =====================
# LOAD INTERPRETER
# =====================
interpreter = tf.lite.Interpreter(
    model_path=OUTPUT_TFLITE
)
interpreter.allocate_tensors()

input_details  = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("‚úÖ FP16 Interpreter ready")
print("Input :", input_details[0]["shape"], input_details[0]["dtype"])
print("Output:", output_details[0]["shape"], output_details[0]["dtype"])


‚úÖ FP16 Interpreter ready
Input : [  1 224 224   3] <class 'numpy.float32'>
Output: [ 1 10] <class 'numpy.float32'>


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [20]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input

# =====================
# CONFIG
# =====================
IMG_SIZE = 224
BATCH_SIZE = 16
TEST_DIR = r"C:\Users\adity\Downloads\dataset_split_final\test"

# =====================
# TEST GENERATOR (SAMA DENGAN TRAINING)
# =====================
test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

test_gen = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

print("‚úÖ test_gen ready")
print("Total test samples:", test_gen.samples)


Found 1000 images belonging to 10 classes.
‚úÖ test_gen ready
Total test samples: 1000


In [23]:
import time
import numpy as np

correct = 0
total = 0
times = []

for i in range(test_gen.samples):
    x_batch, y_batch = next(test_gen)
    x = x_batch[0:1]
    y = y_batch[0]

    start = time.time()
    interpreter.set_tensor(input_details[0]["index"], x.astype(np.float32))
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]["index"])
    end = time.time()

    pred = output.argmax(axis=1)[0]
    true = y.argmax()

    correct += int(pred == true)
    total += 1
    times.append(end - start)

accuracy = correct / total
avg_time = sum(times) / len(times)
fps = 1 / avg_time

print(f"‚úÖ TFLite FP16 Accuracy : {accuracy*100:.2f}%")
print(f"‚è± Avg inference / img  : {avg_time:.6f} sec")
print(f"üöÄ FPS                 : {fps:.2f}")


‚úÖ TFLite FP16 Accuracy : 98.40%
‚è± Avg inference / img  : 0.002301 sec
üöÄ FPS                 : 434.55
