In [None]:
!unzip '/content/drive/MyDrive/dataset/klimb_llm_optimization_challenge.zip'

In [21]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers

In [3]:
# Path of train and test data
train_dir = "/content/klimb_llm_optimization_challenge/seg_train"
test_dir = "/content/klimb_llm_optimization_challenge/seg_test"

# Data configs
batch_size = 32
img_height = 150
img_width = 150


In [4]:
# Load train data
train_ds = tf.keras.utils.image_dataset_from_directory(
	train_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)


Found 14034 files belonging to 6 classes.
Using 11228 files for training.


In [5]:
# Load test data
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 3000 files belonging to 6 classes.
Using 600 files for validation.


In [56]:
# Training the MASTER Model - using Transfer Learning
# Here we are using ImageNet pre-trained model weights
base_model = keras.applications.ResNet152(
		weights='imagenet',  # Load weights pre-trained on ImageNet.
		input_shape=(img_height, img_width, 3),
		include_top=False)  # Do not include the ImageNet classifier at the top.
base_model.trainable = False
inputs = keras.Input(shape=(img_height, img_width, 3))
# We make sure that the base_model is running in inference mode here,
# by passing `training=False`. This is important for fine-tuning.
x = base_model(inputs, training=False)
# Convert features of shape `base_model.output_shape[1:]` to vectors
x = keras.layers.GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)
# x = layers.Flatten()(x)
outputs = layers.Dense(6)(x)
model = keras.Model(inputs, outputs)
model.summary()
model.compile(
		optimizer=keras.optimizers.Adam(),
		loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
		metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_27 (InputLayer)       [(None, 150, 150, 3)]     0         
                                                                 
 resnet152 (Functional)      (None, 5, 5, 2048)        58370944  
                                                                 
 global_average_pooling2d_5  (None, 2048)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_16 (Dense)            (None, 6)                 12294     
                                                                 
Total params: 58383238 (222.71 MB)
Trainable params: 12294 (48.02 KB)
Non-trainable params: 58370944 (222.67 MB)
_________________________________________________________________


In [67]:
epochs = 20
model.fit(train_ds, epochs=epochs)

# Generate results on test data
results = model.evaluate(test_ds)
print(f"Test accuracy with trained teacher model:{results[1]*100 :.2f} %")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy with trained teacher model:90.67 %


In [68]:
### saving Teacher Model
model = model  # Get model (Sequential, Functional Model, or Model subclass)
model.save('/content/teacher_model.keras')  # The file needs to end with the .keras extension

In [None]:
# It can be used to reconstruct the model identically.
# reconstructed_model = keras.models.load_model("teacher_model.keras")

In [59]:
### Student Model
# Create the student
student = keras.Sequential(
    [
        keras.Input(shape=(150, 150, 3)),
        layers.Conv2D(16, (3, 3), strides=(2, 2), padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding="same"),
        layers.Dropout(0.10),
        layers.Conv2D(16, (3, 3), strides=(2, 2), padding="same"),
        layers.Flatten(),
        layers.Dense(6),
    ],
    name="student",
)

In [53]:
student.summary()

Model: "student"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_25 (Conv2D)          (None, 75, 75, 16)        448       
                                                                 
 leaky_re_lu_13 (LeakyReLU)  (None, 75, 75, 16)        0         
                                                                 
 max_pooling2d_13 (MaxPooli  (None, 75, 75, 16)        0         
 ng2D)                                                           
                                                                 
 dropout_12 (Dropout)        (None, 75, 75, 16)        0         
                                                                 
 conv2d_26 (Conv2D)          (None, 38, 38, 16)        2320      
                                                                 
 flatten_11 (Flatten)        (None, 23104)             0         
                                                           

In [64]:
# Clone student for later comparison
student_scratch = keras.models.clone_model(student)

In [40]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):

        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def compute_loss(
        self, x=None, y=None, y_pred=None, sample_weight=None, allow_empty=False
    ):
        teacher_pred = self.teacher(x, training=False)
        student_loss = self.student_loss_fn(y, y_pred)

        distillation_loss = self.distillation_loss_fn(
            tf.nn.softmax(teacher_pred / self.temperature, axis=1),
            tf.nn.softmax(y_pred / self.temperature, axis=1),
        ) * (self.temperature**2)

        loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss
        return loss

    def call(self, x):
        return self.student(x)

In [None]:
# Initialize and compile distiller
distiller = Distiller(student=student, teacher=model)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=3,
)

# Distill teacher to student
distiller.fit(train_ds, epochs=2)

Epoch 1/2

In [72]:
# Evaluate distilled model on test dataset
distiller.evaluate(test_ds)



0.1483333259820938

In [66]:
# Train student model for comparison
student_scratch.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

# Train and evaluate student trained from scratch.
student_scratch.fit(train_ds, epochs=8)
student_scratch.evaluate(test_ds)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


[11.44959545135498, 0.6016666889190674]

In [70]:
### Metrics Comparison

In [None]:
"""
1. **Model Size Ratio:** Ratio = (222.71 * 1024 KB) / 552.34 KB = 413x reduction in size
2. **Parameter Ratio:** Ratio = 58383238 / 141398 = 412 times reduction in parameters
3. **Accuracy:** Both of master and student model should provide accurate responses.
4. **Latency:** Time taken from feeding an input (single image) to receiving an output (prediction). -> latency is reduced since parameters and size are less
5. **End-to-End Functionality:** The entire pipeline, from the model building to the final prediction from both master and student models should be operational without any errors.
"""