## Lab09.2 Model Compression

Select free T4 GPU in Runtime settings

## Download data

In [1]:
!pip install -q gdown
!gdown https://drive.google.com/uc?id=12WhCCpKTWpeBztLegcoYx2gMo2KbaxDG

Downloading...
From (original): https://drive.google.com/uc?id=12WhCCpKTWpeBztLegcoYx2gMo2KbaxDG
From (redirected): https://drive.google.com/uc?id=12WhCCpKTWpeBztLegcoYx2gMo2KbaxDG&confirm=t&uuid=ddd0b742-7162-44c7-8d2a-d7ceabc71cc1
To: /home/kast/m-eng-robotics/embeded-systems/thd-mro-em-labs/lab-9/dogs-vs-cats.zip
100%|████████████████████████████████████████| 852M/852M [00:58<00:00, 14.7MB/s]


In [5]:
import zipfile

with zipfile.ZipFile('dogs-vs-cats.zip', 'r') as zip_file:
    zip_file.extractall('data')

with zipfile.ZipFile('data/train.zip', 'r') as zip_file:
    zip_file.extractall('data/')

In [6]:
import os
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# Paths
base_dir = 'data/train'
train_dir = 'data/train_split'
val_dir = 'data/val_split'

# Create directories
os.makedirs(os.path.join(train_dir, 'dogs'), exist_ok=True)
os.makedirs(os.path.join(train_dir, 'cats'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'dogs'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'cats'), exist_ok=True)

# Split data
filenames = os.listdir(base_dir)
train_files, val_files = train_test_split(filenames, test_size=0.2, random_state=42)

for file in train_files:
    if 'dog' in file:
        shutil.move(os.path.join(base_dir, file), os.path.join(train_dir, 'dogs', file))
    elif 'cat' in file:
        shutil.move(os.path.join(base_dir, file), os.path.join(train_dir, 'cats', file))

for file in val_files:
    if 'dog' in file:
        shutil.move(os.path.join(base_dir, file), os.path.join(val_dir, 'dogs', file))
    elif 'cat' in file:
        shutil.move(os.path.join(base_dir, file), os.path.join(val_dir, 'cats', file))

## Create data generators

In [7]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Load data from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.


## Create basic CNN classification model

In [None]:
from tensorflow.keras import models, layers

# Build the model
model_basic = models.Sequential([
    layers.Input(shape=(150, 150, 3)),

    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model_basic.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

## Train basic model

In [10]:
history_model_basic = model_basic.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 377ms/step - accuracy: 0.6033 - loss: 0.6631 - val_accuracy: 0.7508 - val_loss: 0.5133
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 331ms/step - accuracy: 0.7674 - loss: 0.4824 - val_accuracy: 0.8030 - val_loss: 0.4245
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 340ms/step - accuracy: 0.8206 - loss: 0.3898 - val_accuracy: 0.8294 - val_loss: 0.3795
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 339ms/step - accuracy: 0.8620 - loss: 0.3149 - val_accuracy: 0.8268 - val_loss: 0.4146
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 338ms/step - accuracy: 0.9095 - loss: 0.2154 - val_accuracy: 0.8428 - val_loss: 0.4161
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 343ms/step - accuracy: 0.9519 - loss: 0.1224 - val_accuracy: 0.8414 - val_loss: 0.5162
Epoch 7/10
[1m

# Tasks

## Save model and compress

Refer to the provided lab instructions document 'Lab09.2 Model Compression'

- Save the full model to disk (*.keras format).
- Convert the model to TFLite using the following methods:
    - Default conversion
    - Weight quantization (weights to INT8)
    - Float16 quantization (weights to FP16)
    - Integer quantization (weights and activations to INT8, use a representative dataset for calibration)
- Compare the file sizes of the different models.



In [None]:
import os
os.makedirs("models", exist_ok=True)

# Save the model as keras
model_basic.save("models/model_basic.keras")


In [18]:
# Get size in bytes and convert to megabytes
file_size_mb = os.path.getsize("models/model_basic.keras") / (1024 * 1024)
print(f"Model file size: {file_size_mb:.2f} MB")

Model file size: 55.30 MB


In [19]:
# Load the model

import tensorflow as tf

model = tf.keras.models.load_model("models/model_basic.keras")

## Default TFLite

In [21]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("models/model_basic_default.tflite", "wb") as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpknajm63v/assets


INFO:tensorflow:Assets written to: /tmp/tmpknajm63v/assets


Saved artifact at '/tmp/tmpknajm63v'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 150, 150, 3), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  135307042878944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135308579776368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911166208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911168848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911165328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913141984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913143040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913078560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076624: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1750348309.776299  932618 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1750348309.776564  932618 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-06-19 17:51:49.778863: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpknajm63v
2025-06-19 17:51:49.779708: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-06-19 17:51:49.779724: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpknajm63v
2025-06-19 17:51:49.792050: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-06-19 17:51:49.925457: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpknajm63v
2025-06-19 17:51:49.947616: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 168522 microseconds.


## Weight Quantization
8-bit integers (INT8) precision for the weights

In [22]:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open("models/model_basic_weight_quant.tflite", "wb") as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpi4yfcfn_/assets


INFO:tensorflow:Assets written to: /tmp/tmpi4yfcfn_/assets


Saved artifact at '/tmp/tmpi4yfcfn_'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 150, 150, 3), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  135307042878944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135308579776368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911166208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911168848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911165328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913141984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913143040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913078560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076624: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1750348380.405488  932618 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1750348380.405699  932618 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-06-19 17:53:00.408415: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpi4yfcfn_
2025-06-19 17:53:00.410793: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-06-19 17:53:00.410821: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpi4yfcfn_
2025-06-19 17:53:00.428950: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-06-19 17:53:00.570109: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpi4yfcfn_
2025-06-19 17:53:00.587111: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 179045 microseconds.


## Float16 quantization
16-bit floating-point (FP16) precision for the weights

In [23]:
converter.target_spec.supported_types = [tf.float16]
tflite_fp16 = converter.convert()

with open("models/model_basic_fp16.tflite", "wb") as f:
    f.write(tflite_fp16)

INFO:tensorflow:Assets written to: /tmp/tmp0bhq9igf/assets


INFO:tensorflow:Assets written to: /tmp/tmp0bhq9igf/assets


Saved artifact at '/tmp/tmp0bhq9igf'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 150, 150, 3), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  135307042878944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135308579776368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911166208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911168848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911165328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913141984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913143040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913078560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076624: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1750348418.638366  932618 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1750348418.638767  932618 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-06-19 17:53:38.640739: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp0bhq9igf
2025-06-19 17:53:38.642583: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-06-19 17:53:38.642610: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmp0bhq9igf
2025-06-19 17:53:38.659512: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-06-19 17:53:38.831594: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmp0bhq9igf
2025-06-19 17:53:38.857026: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 215941 microseconds.


## Integer quantization
8-bit integer (INT8) pricision for both weights and activations

In [26]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)

def representative_data_gen():
    for _ in range(100):
        data, _ = next(train_generator)
        yield [data.astype("float32")]

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_model_int8 = converter.convert()

with open("models/model_basic_int8.tflite", "wb") as f:
    f.write(tflite_model_int8)



INFO:tensorflow:Assets written to: /tmp/tmp90d6q39d/assets


INFO:tensorflow:Assets written to: /tmp/tmp90d6q39d/assets


Saved artifact at '/tmp/tmp90d6q39d'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 150, 150, 3), dtype=tf.float32, name='input_layer_1')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  135307042878944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135308579776368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911166208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911168848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306911165328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913141984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913143040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913078560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  135306913076624: TensorSpec(shape=(), dtype=tf.resource, name=None)


W0000 00:00:1750348636.585376  932618 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1750348636.585741  932618 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-06-19 17:57:16.589688: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp90d6q39d
2025-06-19 17:57:16.592066: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-06-19 17:57:16.592181: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmp90d6q39d
2025-06-19 17:57:16.613807: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-06-19 17:57:16.943988: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmp90d6q39d
2025-06-19 17:57:17.003337: I tensorflow/cc/saved_model/loader.cc:471] SavedModel load for tags { serve }; Status: success: OK. Took 413881 microseconds.
fully_quantize: 0, inference_type: 6, input_

## Compare file sizes

In [27]:
# Directory where all models are stored
model_dir = "models"

# List of model filenames
model_files = [
    "model_basic.keras",
    "model_basic_default.tflite",
    "model_basic_weight_quant.tflite",
    "model_basic_fp16.tflite",
    "model_basic_int8.tflite"
]

# Print file sizes in KB and MB
print(f"{'Model File':<40} {'Size (KB)':>10} {'Size (MB)':>10}")
print("-" * 60)
for filename in model_files:
    filepath = os.path.join(model_dir, filename)
    if os.path.exists(filepath):
        size_bytes = os.path.getsize(filepath)
        size_kb = size_bytes / 1024
        size_mb = size_kb / 1024
        print(f"{filename:<40} {size_kb:10.2f} {size_mb:10.2f}")
    else:
        print(f"{filename:<40} {'Not Found':>10}")


Model File                                Size (KB)  Size (MB)
------------------------------------------------------------
model_basic.keras                          56628.02      55.30
model_basic_default.tflite                 18865.16      18.42
model_basic_weight_quant.tflite             4726.91       4.62
model_basic_fp16.tflite                     9435.71       9.21
model_basic_int8.tflite                     4729.12       4.62
