## IoT for Eco-Friendly Tourism - Summer School
### Valencia, Spain - 2025



### ML-based model compression 




###

In [None]:
import os
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd

## Tensorflow + Keras libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD, Adam
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule
from tensorflow_model_optimization.sparsity.keras import strip_pruning

## Quantization
from qkeras import *

## Datasets
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.datasets import mnist
from tensorflow.keras.datasets import fashion_mnist

from distillationClassKeras import *
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

### GPU 

In [None]:
# GPU
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'

import tensorflow as tf
print("GPUs: ", len(tf.config.experimental.list_physical_devices('GPU')))

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    except RuntimeError as e:
        print(e)

#### Carga del dataset

In [None]:
# Cargar dataset MNIST
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0            # Normalización

In [None]:
x_train = x_train.reshape(-1, 28 * 28)                       # Aplanar imágenes
x_test = x_test.reshape(-1, 28 * 28)

In [None]:

y_train = to_categorical(y_train, num_classes=10, dtype=int)
y_test = to_categorical(y_test, num_classes=10, dtype=int)

### Student model definition

In [None]:

def build_student():
    # Fixed point precision
    kernelQ = "quantized_bits(8, 4,alpha=1)"
    biasQ = "quantized_bits(8, 4 ,alpha=1)"
    activationQ = 'quantized_bits(8, 2)'

    modelQ = Sequential(
                [   
                    
                    QDense(8, name='fc1_input',
                            input_shape=(28 * 28,),
                            kernel_quantizer= kernelQ, bias_quantizer= biasQ,
                            kernel_initializer='lecun_uniform'),
                    QActivation(activation= activationQ ,  name='relu_input'),

                    QDense(7, name='fc1',
                            kernel_quantizer= kernelQ, bias_quantizer= biasQ,
                            kernel_initializer='lecun_uniform'),
                    QActivation(activation= activationQ ,  name='relu1'),

                    # Dropout(0.2),

                    # QDense(10, name='fc2',
                    #         kernel_quantizer=kernelQ, bias_quantizer=biasQ,
                    #         kernel_initializer='lecun_uniform'),
                    # QActivation(activation=activationQ, name='relu2'), 
                    
                    Dropout(0.1),
                    

                    # QDense(5, name='fc3',
                    #         kernel_quantizer=kernelQ, bias_quantizer=biasQ,
                    #         kernel_initializer='lecun_uniform'),
                    # QActivation(activation=activationQ, name='relu3'), 
            
                    
                    QDense(10, name='output',
                            kernel_quantizer= kernelQ, bias_quantizer= biasQ,
                            kernel_initializer='lecun_uniform'),
                    Activation(activation='softmax', name='softmax')
                
                ],

            )
    return modelQ



### Teacher model

In [None]:

def build_teacher():
    model = keras.Sequential([
        Dense(256, activation="relu", input_shape=(28 * 28,)),
        Dense(64, activation="relu"),
        Dropout(0.2),
        Dense(10, activation="softmax")  
    ])
    return model




### Teacher model: compile and fit

In [None]:
teacher = build_teacher()
teacher.summary()


In [None]:
lr = 0.001
op = Adam(lr)
loss = "categorical_crossentropy"
metrics = ["accuracy"]
epochs=16
batch_size=128

teacher.compile(optimizer=op, loss=loss, metrics=metrics)
h = teacher.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test))

In [None]:
# Plot accuracy over epochs
plt.figure(figsize=(15,3))
plt.plot(h.history['accuracy'], label='Train Accuracy')
plt.plot(h.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy over epochs')
plt.show()

In [None]:
# Plot loss over epochs
plt.figure(figsize=(15,3))
plt.plot(h.history['loss'], label='Train Loss')
plt.plot(h.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss over epochs')
plt.show()


### Confusion matrix

In [None]:
y_pred_probs = teacher.predict(x_test)
y_pred = np.argmax(y_pred_probs, axis=1)

# Since y_test is one-hot encoded, you need to convert it back to class indices
y_true = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels to class indices


cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Purples')
plt.show()


### Build student model

In [None]:
modelQ = build_student()

### Pruning strategy

In [None]:
final_sparsity = 0.2

pruning_params = {
                'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
                initial_sparsity=0, final_sparsity=final_sparsity, begin_step=0, end_step=3000
                )
            }

model_QPKD = tfmot.sparsity.keras.prune_low_magnitude(modelQ, **pruning_params)

### Knowledge distillation implementation

In [None]:
train_labels = np.argmax(y_train, axis=1)

distilledMLP = Distiller(student=model_QPKD, teacher=teacher)

adam = Adam(0.001)
distilledMLP.compile(
        optimizer=adam,
        metrics=[keras.metrics.SparseCategoricalAccuracy()],
        student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        distillation_loss_fn=keras.losses.KLDivergence(),
        alpha=0.1, 
        temperature=9,
    )

history = distilledMLP.fit(x_train, train_labels, validation_split=0.2, batch_size = 64, epochs= 32, callbacks = [tfmot.sparsity.keras.UpdatePruningStep()])

In [None]:
# Plot accuracy over epochs
plt.figure(figsize=(15,3))
plt.plot(history.history['sparse_categorical_accuracy'], label='Train Accuracy')
plt.plot(history.history['val_sparse_categorical_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Accuracy over epochs')
plt.show()

In [None]:
# Plot loss over epochs
plt.figure(figsize=(15,3))
plt.plot(history.history['student_loss'], label='Train Loss')
plt.plot(history.history['val_student_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss over epochs')
plt.show()


#### Confusion matrix

In [None]:
y_pred_probs = distilledMLP.student.predict(x_test)
y_pred = np.argmax(y_pred_probs, axis=1)

# Since y_test is one-hot encoded, you need to convert it back to class indices
y_true = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels to class indices

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Purples')
plt.show()


In [None]:
distilledMLP.student.save("models/mnistKD.h5")

---
##### Romina Soledad Molina, Ph.D. - MLab/STI ICTP, Trieste, Italy