In [2]:
# Install the required libraries
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet
import opendatasets as od

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/nilesh789/eurosat-rgb'
od.download(dataset_url)

# Set the path to the image folder
data_path = '/content/eurosat-rgb/2750'

Skipping, found downloaded files in "./eurosat-rgb" (use force=True to force download)


In [3]:
import os
import random
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D, MaxPooling2D, Conv2D
from keras.models import Model
import keras.backend as K

In [4]:
data_path = '/content/eurosat-rgb/2750'
# Initialize image data and labels
image_data = []
labels = []
import os
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".jpg"):
            # Read the image
            image = cv2.imread(os.path.join(root, file))
            # image = cv2.resize(image, (128, 128))
            image = cv2.resize(image, (64, 64))
            image = img_to_array(image)
            image_data.append(image)

            # Use the folder name as the label
            label = root.split(os.path.sep)[-1]
            labels.append(label)

# Convert to numpy arrays and normalize the images
image_data = np.array(image_data, dtype="float") / 255.0
labels = np.array(labels)

# Binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')

Training samples: 21600, Testing samples: 5400


In [5]:
def student_model(img_rows, img_cols, color_type=1, num_classes=None):
    nb_dense_block = 3  # Fewer dense blocks than the teacher model
    growth_rate = 16  # Smaller growth rate
    nb_filter = 32  # Fewer filters
    return densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=color_type,
                             nb_dense_block=nb_dense_block, growth_rate=growth_rate,
                             nb_filter=nb_filter, num_classes=num_classes)

def distillation_loss(y_true, y_pred, teacher_pred, temperature=3.0, alpha=0.1):
    """
    Compute the distillation loss combining both:
    - Soft target loss (KL divergence)
    - Hard target loss (standard cross-entropy)
    """
    soft_labels = K.softmax(teacher_pred / temperature)
    soft_student = K.softmax(y_pred / temperature)

    # KL divergence for the soft labels
    distillation_loss = K.mean(K.sum(soft_labels * K.log(soft_labels / (soft_student + 1e-6)), axis=-1))

    # Standard cross-entropy loss
    standard_loss = K.categorical_crossentropy(y_true, y_pred)

    # Weighted sum of the losses
    return alpha * distillation_loss + (1. - alpha) * standard_loss


In [6]:
# Cell 2: Define DenseNet-121 model and helper functions
from keras.layers import Concatenate, Conv2D, Activation, BatchNormalization, Dropout, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D, Dense, MaxPooling2D, Input
from keras.models import Model
from keras.optimizers import SGD
def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None):
    global concat_axis
    img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
    concat_axis = 3

    nb_filter = 64
    nb_layers = [6, 12, 24, 16]

    x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(img_input)
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
        x = transition_block(x, stage, nb_filter, dropout_rate=dropout_rate)
        nb_filter = int(nb_filter)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)

    x_fc = GlobalAveragePooling2D()(x)
    x_fc = Dense(1000)(x_fc)
    x_fc = Activation('softmax')(x_fc)

    model = Model(img_input, x_fc)

    x_newfc = GlobalAveragePooling2D()(x)
    x_newfc = Dense(num_classes)(x_newfc)
    x_newfc = Activation('softmax')(x_newfc)

    model = Model(img_input, x_newfc)

    sgd = SGD(learning_rate=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
    inter_channel = nb_filter * 4
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D((1, 1))(x)
    x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x

def transition_block(x, stage, nb_filter, dropout_rate=None):
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter), (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
    concat_feat = x

    for i in range(nb_layers):
        branch = i + 1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
        concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter


In [9]:
import tensorflow as tf
# Build the teacher and student models
teacher = densenet121_model(img_rows=64, img_cols=64, color_type=3, num_classes=10)
student = student_model(img_rows=64, img_cols=64, color_type=3, num_classes=10)

# # Teacher model should be pre-trained (for simplicity, using it as untrained here)
# for layer in teacher.layers:
#     layer.trainable = False

# # Compile the student model with the distillation loss
# student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
#                 loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher.predict(x_train)),
#                 metrics=['accuracy'])

# # Train the student model
# student.fit(x_train, y_train,
#             batch_size=16,
#             epochs=5,
#             validation_data=(x_test, y_test),
#             shuffle=True)
# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(x_train)

# Define distillation loss function
# def distillation_loss(y_true, y_pred, teacher_preds, temperature=3):
#     y_true = tf.keras.activations.softmax(y_true / temperature)
#     y_pred = tf.keras.activations.softmax(y_pred / temperature)
#     teacher_preds = tf.keras.activations.softmax(teacher_preds / temperature)

#     # Cross-entropy between the student predictions and the teacher predictions
#     return tf.keras.losses.categorical_crossentropy(teacher_preds, y_pred)

def distillation_loss(y_true, y_pred, teacher_preds, temperature=3):
    # Get the current batch size
    batch_size = tf.shape(y_pred)[0]

    # Use tf.gather to select the relevant teacher predictions for the current batch
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

    # Cross-entropy between the student predictions and the teacher predictions
    return tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)


# Compile the student model with the distillation loss (using precomputed teacher predictions)
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model
student.fit(x_train, y_train,
            batch_size=16,
            epochs=5,
            validation_data=(x_test, y_test))



[1m675/675[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 353ms/step
Epoch 1/5
[1m1350/1350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 332ms/step - accuracy: 0.0840 - loss: 2.3029 - val_accuracy: 0.0833 - val_loss: 2.3028
Epoch 2/5
[1m1350/1350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m531s[0m 354ms/step - accuracy: 0.0817 - loss: 2.3027 - val_accuracy: 0.0789 - val_loss: 2.3027
Epoch 3/5
[1m1350/1350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 343ms/step - accuracy: 0.0796 - loss: 2.3027 - val_accuracy: 0.0783 - val_loss: 2.3027
Epoch 4/5
[1m1350/1350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 343ms/step - accuracy: 0.0841 - loss: 2.3027 - val_accuracy: 0.0961 - val_loss: 2.3027
Epoch 5/5
[1m1350/1350[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 343ms/step - accuracy: 0.0865 - loss: 2.3027 - val_accuracy: 0.1043 - val_loss: 2.3027


<keras.src.callbacks.history.History at 0x7ee5efcb5c90>

In [10]:
 # Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')

Test accuracy: 10.43%


In [11]:
teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

Teacher test accuracy: 12.26%


In [12]:
# Cell: Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


=== Teacher Model Evaluation ===
[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 369ms/step
                      precision    recall  f1-score   support

          AnnualCrop       0.00      0.00      0.00       572
              Forest       0.00      0.00      0.00       610
HerbaceousVegetation       0.00      0.00      0.00       599
             Highway       0.00      0.00      0.00       486
          Industrial       0.00      0.00      0.00       482
             Pasture       0.33      0.26      0.29       425
       PermanentCrop       0.00      0.00      0.00       505
         Residential       0.13      0.80      0.22       632
               River       0.04      0.09      0.06       507
             SeaLake       0.00      0.00      0.00       582

            accuracy                           0.12      5400
           macro avg       0.05      0.12      0.06      5400
        weighted avg       0.04      0.12      0.05      5400

Inference Time: 63

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m169/169[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 141ms/step
                      precision    recall  f1-score   support

          AnnualCrop       0.05      0.06      0.06       572
              Forest       0.27      0.13      0.17       610
HerbaceousVegetation       0.02      0.00      0.00       599
             Highway       0.05      0.01      0.02       486
          Industrial       0.12      0.13      0.12       482
             Pasture       0.06      0.07      0.06       425
       PermanentCrop       0.11      0.01      0.02       505
         Residential       0.09      0.08      0.08       632
               River       0.09      0.36      0.15       507
             SeaLake       0.18      0.20      0.19       582

            accuracy                           0.10      5400
           macro avg       0.10      0.10      0.09      5400
        weighted avg       0.11      0.10      0.09      5400

Inference Time: 41.4499 seconds


In [None]:
\