In [22]:

# IMPORTS

import tensorflow as tf
import numpy as np
from tensorflow import keras
from keras import layers, models
import matplotlib.pyplot as plt
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras import metrics, losses
from keras.models import load_model
import urllib.request
import os
import tarfile
from PIL import Image
import seaborn as sns
import glob

In [23]:
# Load both teacher and student models
try:
    teacher_model = load_model('teacher_model.h5')
    scratch_student = load_model('student_model.h5')
    
    # Compile teacher model
    teacher_model.compile(optimizer='adam',
                         loss='categorical_crossentropy',
                         metrics=['accuracy'])
    
    print("Models loaded and compiled successfully!")
except Exception as e:
    print(f"Error loading models: {str(e)}")



Models loaded and compiled successfully!


In [24]:
# Load STL-10 dataset for model extraction attack
print("Loading STL-10 training images...")
train_images = []
train_path = 'STL-10/train_images'
for img_path in glob.glob(os.path.join(train_path, '*.*')):
    img = Image.open(img_path).convert('RGB')
    img = img.resize((32, 32))  # Resize to 32x32
    img_array = np.array(img)
    train_images.append(img_array)
X_train = np.array(train_images)

# Load test images
print("Loading STL-10 test images...")
test_images = []
test_path = 'STL-10/test_images'
for img_path in glob.glob(os.path.join(test_path, '*.*')):
    img = Image.open(img_path).convert('RGB')
    img = img.resize((32, 32))  # Resize to 32x32
    img_array = np.array(img)
    test_images.append(img_array)
X_test = np.array(test_images)

# Normalize the data
X_train = X_train.astype('float32')/255.0
X_test = X_test.astype('float32')/255.0

# Create labels (0-9 for 10 classes)
y_train = np.zeros(len(X_train))  # We'll update these with teacher's predictions
y_test = np.zeros(len(X_test))    # We'll update these with teacher's predictions

Loading STL-10 training images...
Loading STL-10 test images...


In [None]:
# Compute student model metrics without KD
# Compute student model metrics without KD (baseline performance)
scratch_student.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)


In [None]:
# Generate training labels from teacher model predictions
print("Generating training labels from teacher model...")
train_labels = teacher_model.predict(X_train)
test_labels = teacher_model.predict(X_test)

# First, let us try to see what if we directly train the student model without using knowledge distillation
scratch_student.fit(X_train, train_labels, epochs=7, batch_size=32)

In [None]:
# We evaluate student model for its loss and accuracy, if the student model is trained without using knowledge distillation
print("\nEvaluating student model performance without knowledge distillation:")
test_loss, test_accuracy = scratch_student.evaluate(X_test, test_labels)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Get predictions for confusion matrix
y_pred = scratch_student.predict(X_test)
y_true = np.argmax(test_labels, axis=1)
y_pred_classes = np.argmax(y_pred, axis=1)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
cm = tf.math.confusion_matrix(y_true, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Student Model (Without KD)')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [20]:
# Now let us try using knowledge distillation
# KNOWLEDGE DISTILLATION CLASS, You can adjust alpha based on how much you want the student to learn from the teacher

class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.2,
        temperature=3,
    ):
        """Configure the distiller.

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation
            student_loss_fn: Loss function of difference between student
                predictions and ground-truth
            distillation_loss_fn: Loss function of difference between soft
                student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions.
                Larger temperature gives softer distributions.
        """
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def compute_loss(
        self, x=None, y=None, y_pred=None, sample_weight=None, allow_empty=False
    ):
        teacher_pred = self.teacher(x, training=False)
        student_loss = self.student_loss_fn(y, y_pred)

        distillation_loss = self.distillation_loss_fn(
            tf.nn.softmax(teacher_pred / self.temperature, axis=1),
            tf.nn.softmax(y_pred / self.temperature, axis=1),
        ) * (self.temperature**2)

        loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss
        return loss

    def call(self, x):
        return self.student(x)

In [21]:
# Initialize the distiller
# Train the student model using knowledge distillation
print("\nInitializing knowledge distillation training...")
distiller = Distiller(student=scratch_student, teacher=teacher_model)  # Use scratch_student instead of student_model

# Compiling the Distiller with enhanced metrics
distiller.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=[
        metrics.CategoricalAccuracy(),
        metrics.Precision(),
        metrics.Recall()
    ],
    student_loss_fn=losses.CategoricalCrossentropy(),
    distillation_loss_fn=losses.CategoricalCrossentropy(),
    alpha=0.2,  # Weight for student loss
    temperature=1,  # Temperature for softening probabilities
) 

# Fitting the student model with knowledge distillation
print("\nTraining student model with knowledge distillation...")
history = distiller.fit(
    X_train,
    train_labels,
    epochs=7,
    batch_size=32,  
    validation_split=0.2,
    callbacks=[
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=2,
            restore_best_weights=True
        )
    ]
)

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['categorical_accuracy'], label='Training Accuracy')
plt.plot(history.history['val_categorical_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy During Training')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()


Initializing knowledge distillation training...


NameError: name 'student_model' is not defined

In [None]:
# We evaluate student model again for its loss and accuracy,
# But this time the student model is trained using knowledge distillation
print("\nEvaluating student model performance with knowledge distillation:")
test_loss, test_accuracy, test_precision, test_recall = distiller.evaluate(X_test, test_labels)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")

# Get predictions for confusion matrix
y_pred = distiller.predict(X_test)
y_true = np.argmax(test_labels, axis=1)
y_pred_classes = np.argmax(y_pred, axis=1)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
cm = tf.math.confusion_matrix(y_true, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Student Model (With Knowledge Distillation)')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Compare with baseline performance
print("\nPerformance Comparison:")
print("Without Knowledge Distillation:")
print(f"Accuracy: {0.5433:.4f}")  # Your baseline accuracy
print("\nWith Knowledge Distillation:")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Improvement: {(test_accuracy - 0.5433)*100:.2f}%")