In [1]:
import tensorflow as tf

TEACHER_PATH = "baseline_mobilenetv3large_final.keras"

teacher = tf.keras.models.load_model(
    TEACHER_PATH,
    compile=False
)
teacher.trainable = False

print("✅ Teacher model loaded & frozen")


✅ Teacher model loaded & frozen


In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = 224
BATCH_SIZE = 16

DATASET_DIR = r"C:\Users\adity\Downloads\dataset_split_final"

train_dir = os.path.join(DATASET_DIR, "train")
val_dir   = os.path.join(DATASET_DIR, "val")

# ⚠️ TANPA RESCALE (konsisten dengan training sebelumnya)
datagen = ImageDataGenerator()

train_gen = datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)

val_gen = datagen.flow_from_directory(
    val_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

print("✅ train_gen & val_gen siap untuk KD")


Found 9000 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.
✅ train_gen & val_gen siap untuk KD


In [3]:
IMG_SIZE = 224
NUM_CLASSES = train_gen.num_classes

base_student = tf.keras.applications.MobileNetV3Small(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)

base_student.trainable = True

x = base_student.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(128, activation="relu")(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES)(x)  # logits

student = tf.keras.Model(
    inputs=base_student.input,
    outputs=outputs
)

print("✅ Student model built")


✅ Student model built


In [4]:
class Distiller(tf.keras.Model):
    def __init__(self, student, teacher, temperature=4.0, alpha=0.5):
        super().__init__()
        self.student = student
        self.teacher = teacher
        self.temperature = temperature
        self.alpha = alpha

        self.student_loss_fn = tf.keras.losses.CategoricalCrossentropy(
            from_logits=True
        )
        self.distill_loss_fn = tf.keras.losses.KLDivergence()
        self.metric = tf.keras.metrics.CategoricalAccuracy()

    def compile(self, optimizer):
        super().compile()
        self.optimizer = optimizer

    def train_step(self, data):
        x, y_true = data

        teacher_logits = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            student_logits = self.student(x, training=True)

            # Hard loss
            student_loss = self.student_loss_fn(y_true, student_logits)

            # Soft loss
            soft_teacher = tf.nn.softmax(
                teacher_logits / self.temperature, axis=1
            )
            soft_student = tf.nn.softmax(
                student_logits / self.temperature, axis=1
            )

            distill_loss = self.distill_loss_fn(
                soft_teacher, soft_student
            ) * (self.temperature ** 2)

            total_loss = (
                self.alpha * student_loss +
                (1 - self.alpha) * distill_loss
            )

        grads = tape.gradient(total_loss, self.student.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.student.trainable_variables)
        )

        self.metric.update_state(y_true, tf.nn.softmax(student_logits))

        return {
            "loss": total_loss,
            "student_loss": student_loss,
            "distill_loss": distill_loss,
            "accuracy": self.metric.result()
        }

    def test_step(self, data):
        x, y = data
        logits = self.student(x, training=False)
        loss = self.student_loss_fn(y, logits)
        self.metric.update_state(y, tf.nn.softmax(logits))
        return {"loss": loss, "accuracy": self.metric.result()}


In [5]:
distiller = Distiller(
    student=student,
    teacher=teacher,
    temperature=3.0,
    alpha=0.7
)

distiller.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4)
)

print("✅ Distiller compiled")


✅ Distiller compiled


In [6]:
history_kd = distiller.fit(
    train_gen,
    validation_data=val_gen,
    epochs=30,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=5,
            restore_best_weights=True
        )
    ]
)


Epoch 1/30


  self._warn_if_super_not_called()


[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 436ms/step - accuracy: 0.6130 - distill_loss: 0.5413 - loss: 0.7734 - student_loss: 0.8729 - val_accuracy: 0.6150 - val_loss: 1.1055
Epoch 2/30
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 435ms/step - accuracy: 0.9487 - distill_loss: 0.6223 - loss: 0.4779 - student_loss: 0.4160 - val_accuracy: 0.8850 - val_loss: 0.5038
Epoch 3/30
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 430ms/step - accuracy: 0.9734 - distill_loss: 0.6037 - loss: 0.4215 - student_loss: 0.3435 - val_accuracy: 0.9450 - val_loss: 0.4327
Epoch 4/30
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 432ms/step - accuracy: 0.9829 - distill_loss: 0.5899 - loss: 0.3898 - student_loss: 0.3040 - val_accuracy: 0.9770 - val_loss: 0.3489
Epoch 5/30
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 428ms/step - accuracy: 0.9904 - distill_loss: 0.5771 - loss: 0.3716 - student_loss: 

In [8]:
student.save("student_kd3_mobilenetv3small.keras")
print("✅ KD Student model saved")


✅ KD Student model saved
