In [1]:
# Install the required libraries
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet

# Import Kaggle and OpenDatasets to download datasets from Kaggle
import opendatasets as od

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: akshayanand2002
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset
Downloading uc-merced-land-use-dataset.zip to ./uc-merced-land-use-dataset


100%|██████████| 317M/317M [00:03<00:00, 92.4MB/s]





In [2]:
import os
import random
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D, MaxPooling2D, Conv2D
from keras.models import Model
import keras.backend as K


In [3]:
# Set the path to the image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):
            # Read the image
            image = cv2.imread(os.path.join(root, file))
            image = cv2.resize(image, (128, 128))
            image = img_to_array(image)
            image_data.append(image)

            # Use the folder name as the label
            label = root.split(os.path.sep)[-1]
            labels.append(label)

# Convert to numpy arrays and normalize the images
image_data = np.array(image_data, dtype="float") / 255.0
labels = np.array(labels)

# Binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')


Training samples: 1680, Testing samples: 420


In [4]:
def student_model(img_rows, img_cols, color_type=1, num_classes=None):
    nb_dense_block = 3  # Fewer dense blocks than the teacher model
    growth_rate = 16  # Smaller growth rate
    nb_filter = 32  # Fewer filters
    return densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=color_type,
                             nb_dense_block=nb_dense_block, growth_rate=growth_rate,
                             nb_filter=nb_filter, num_classes=num_classes)

'''
def distillation_loss(y_true, y_pred, teacher_pred, temperature=3.0, alpha=0.1):
    """
    Compute the distillation loss combining both:
    - Soft target loss (KL divergence)
    - Hard target loss (standard cross-entropy)
    """
    soft_labels = K.softmax(teacher_pred / temperature)
    soft_student = K.softmax(y_pred / temperature)

    # KL divergence for the soft labels
    distillation_loss = K.mean(K.sum(soft_labels * K.log(soft_labels / (soft_student + 1e-6)), axis=-1))

    # Standard cross-entropy loss
    standard_loss = K.categorical_crossentropy(y_true, y_pred)

    # Weighted sum of the losses
    return alpha * distillation_loss + (1. - alpha) * standard_loss
'''

'\ndef distillation_loss(y_true, y_pred, teacher_pred, temperature=3.0, alpha=0.1):\n    """\n    Compute the distillation loss combining both:\n    - Soft target loss (KL divergence)\n    - Hard target loss (standard cross-entropy)\n    """\n    soft_labels = K.softmax(teacher_pred / temperature)\n    soft_student = K.softmax(y_pred / temperature)\n\n    # KL divergence for the soft labels\n    distillation_loss = K.mean(K.sum(soft_labels * K.log(soft_labels / (soft_student + 1e-6)), axis=-1))\n\n    # Standard cross-entropy loss\n    standard_loss = K.categorical_crossentropy(y_true, y_pred)\n\n    # Weighted sum of the losses\n    return alpha * distillation_loss + (1. - alpha) * standard_loss\n'

In [5]:
# Cell 2: Define DenseNet-121 model and helper functions
from keras.layers import Concatenate, Conv2D, Activation, BatchNormalization, Dropout, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D, Dense, MaxPooling2D, Input
from keras.models import Model
from keras.optimizers import SGD
def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None):
    global concat_axis
    img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
    concat_axis = 3

    nb_filter = 64
    nb_layers = [6, 12, 24, 16]

    x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(img_input)
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
        x = transition_block(x, stage, nb_filter, dropout_rate=dropout_rate)
        nb_filter = int(nb_filter)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)

    x_fc = GlobalAveragePooling2D()(x)
    x_fc = Dense(1000)(x_fc)
    x_fc = Activation('softmax')(x_fc)

    model = Model(img_input, x_fc)

    x_newfc = GlobalAveragePooling2D()(x)
    x_newfc = Dense(num_classes)(x_newfc)
    x_newfc = Activation('softmax')(x_newfc)

    model = Model(img_input, x_newfc)

    sgd = SGD(learning_rate=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
    inter_channel = nb_filter * 4
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D((1, 1))(x)
    x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x

def transition_block(x, stage, nb_filter, dropout_rate=None):
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter), (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
    concat_feat = x

    for i in range(nb_layers):
        branch = i + 1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
        concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter


In [10]:
'''
import tensorflow as tf
# Build the teacher and student models
teacher = densenet121_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)  # 21 classes in UC-Merced
student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

teacher.compile(optimizer=SGD(learning_rate=1e-2, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# # Teacher model should be pre-trained (for simplicity, using it as untrained here)
# for layer in teacher.layers:
#     layer.trainable = False

# # Compile the student model with the distillation loss
# student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
#                 loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher.predict(x_train)),
#                 metrics=['accuracy'])

# # Train the student model
# student.fit(x_train, y_train,
#             batch_size=16,
#             epochs=5,
#             validation_data=(x_test, y_test),
#             shuffle=True)
# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(x_train)

# Define distillation loss function
# def distillation_loss(y_true, y_pred, teacher_preds, temperature=3):
#     y_true = tf.keras.activations.softmax(y_true / temperature)
#     y_pred = tf.keras.activations.softmax(y_pred / temperature)
#     teacher_preds = tf.keras.activations.softmax(teacher_preds / temperature)

#     # Cross-entropy between the student predictions and the teacher predictions
#     return tf.keras.losses.categorical_crossentropy(teacher_preds, y_pred)


'''
# def distillation_loss(y_true, y_pred, teacher_preds, temperature=3):
#     # Get the current batch size
#     batch_size = tf.shape(y_pred)[0]

#     # Use tf.gather to select the relevant teacher predictions for the current batch
#     teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

#     y_true = tf.keras.activations.softmax(y_true / temperature)
#     y_pred = tf.keras.activations.softmax(y_pred / temperature)
#     teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

#     # Cross-entropy between the student predictions and the teacher predictions
#     return tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)
'''
def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.1):
    """
    Compute the distillation loss combining both:
    - Soft target loss (KL divergence)
    - Hard target loss (standard cross-entropy)
    """
    # Get the current batch size
    batch_size = tf.shape(y_pred)[0]

    # Use tf.gather to select the relevant teacher predictions for the current batch
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

    # Softmax for soft labels and student predictions with temperature scaling
    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

    # Cross-entropy between the student predictions and the teacher predictions (soft targets)
    soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)

    # Standard cross-entropy loss with hard targets
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

    # Weighted sum of the distillation loss and the hard target loss
    return alpha * soft_loss + (1. - alpha) * hard_loss


# Compile the student model with the distillation loss (using precomputed teacher predictions)
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model
student.fit(x_train, y_train,
            batch_size=16,
            epochs=5,
            validation_data=(x_test, y_test))

'''
# Import required libraries
import tensorflow as tf
from tensorflow.keras.optimizers import SGD

# Assuming densenet121_model and student_model functions are already defined
# Replace with appropriate model definitions if necessary

# Build the teacher and student models
teacher = densenet121_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)  # 21 classes in UC-Merced
student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

# Compile the teacher model (pre-training the teacher model if necessary)
teacher.compile(optimizer=SGD(learning_rate=1e-2, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# Train teacher model (pretrain if needed)
# Note: If teacher model is already pre-trained, you can load weights here instead of training
teacher.fit(x_train, y_train,
            batch_size=16,
            epochs=5,  # Adjust epochs as needed for pre-training
            validation_data=(x_test, y_test))

# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(x_train)

# Define distillation loss function
def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.4):
    """
    Compute the distillation loss combining both:
    - Soft target loss (KL divergence)
    - Hard target loss (standard cross-entropy)
    """
    # Get the current batch size
    batch_size = tf.shape(y_pred)[0]

    # Use tf.gather to select the relevant teacher predictions for the current batch
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

    # Softmax for soft labels and student predictions with temperature scaling
    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

    # Cross-entropy between the student predictions and the teacher predictions (soft targets)
    soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)

    # Standard cross-entropy loss with hard targets
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

    # Weighted sum of the distillation loss and the hard target loss
    return alpha * soft_loss + (1. - alpha) * hard_loss

# Compile the student model with the distillation loss (using precomputed teacher predictions)
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model
student.fit(x_train, y_train,
            batch_size=16,
            epochs=5,
            validation_data=(x_test, y_test))





Epoch 1/5
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m549s[0m 5s/step - accuracy: 0.2159 - loss: 3.0263 - val_accuracy: 0.0762 - val_loss: 3.5808
Epoch 2/5
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m491s[0m 5s/step - accuracy: 0.3036 - loss: 2.7292 - val_accuracy: 0.1929 - val_loss: 3.6921
Epoch 3/5
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m491s[0m 5s/step - accuracy: 0.4190 - loss: 2.0500 - val_accuracy: 0.1738 - val_loss: 5.0872
Epoch 4/5
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m493s[0m 5s/step - accuracy: 0.4974 - loss: 1.6953 - val_accuracy: 0.0738 - val_loss: 17.4521
Epoch 5/5
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m476s[0m 5s/step - accuracy: 0.5974 - loss: 1.3525 - val_accuracy: 0.2024 - val_loss: 6.2045
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 2s/step
Epoch 1/5
[1m105/105[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 2s/step - accuracy: 0.0372 - loss: 3.

<keras.src.callbacks.history.History at 0x7cbbddc36020>

In [11]:
 # Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')


Test accuracy: 5.95%


In [12]:
teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

Teacher test accuracy: 20.24%


In [13]:
# Cell: Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


=== Teacher Model Evaluation ===
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

     agricultural       0.00      0.00      0.00        17
         airplane       0.00      0.00      0.00        17
  baseballdiamond       1.00      0.05      0.09        21
            beach       1.00      0.05      0.10        20
        buildings       0.23      0.29      0.26        17
        chaparral       0.00      0.00      0.00        16
 denseresidential       1.00      0.06      0.11        18
           forest       0.84      0.67      0.74        24
          freeway       1.00      0.06      0.11        18
       golfcourse       0.00      0.00      0.00        27
           harbor       0.77      0.45      0.57        22
     intersection       0.12      0.35      0.18        23
mediumresidential       0.00      0.00      0.00        26
   mobilehomepark       0.00      0.00      0.00        18
         overpass       0.08      0.95      0.15        20
       parkinglot       0.64      0.45      0.53       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
