In [None]:
# Install the required libraries
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet

# Import Kaggle and OpenDatasets to download datasets from Kaggle
import opendatasets as od

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: akshayanand2002
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset
Downloading uc-merced-land-use-dataset.zip to ./uc-merced-land-use-dataset


100%|██████████| 317M/317M [00:11<00:00, 28.0MB/s]





In [None]:
import os
import random
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D, MaxPooling2D, Conv2D
from keras.models import Model
import keras.backend as K


In [None]:
# Set the path to the image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):
            # Read the image
            image = cv2.imread(os.path.join(root, file))
            image = cv2.resize(image, (128, 128))
            image = img_to_array(image)
            image_data.append(image)

            # Use the folder name as the label
            label = root.split(os.path.sep)[-1]
            labels.append(label)

# Convert to numpy arrays and normalize the images
image_data = np.array(image_data, dtype="float") / 255.0
labels = np.array(labels)

# Binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')


Training samples: 1680, Testing samples: 420


In [None]:
def student_model(img_rows, img_cols, color_type=1, num_classes=None):
    nb_dense_block = 3  # Fewer dense blocks than the teacher model
    growth_rate = 32  # Smaller growth rate
    nb_filter = 32  # Fewer filters
    return densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=color_type,
                             nb_dense_block=nb_dense_block, growth_rate=growth_rate,
                             nb_filter=nb_filter, num_classes=num_classes)

'\ndef distillation_loss(y_true, y_pred, teacher_pred, temperature=3.0, alpha=0.1):\n    """\n    Compute the distillation loss combining both:\n    - Soft target loss (KL divergence)\n    - Hard target loss (standard cross-entropy)\n    """\n    soft_labels = K.softmax(teacher_pred / temperature)\n    soft_student = K.softmax(y_pred / temperature)\n\n    # KL divergence for the soft labels\n    distillation_loss = K.mean(K.sum(soft_labels * K.log(soft_labels / (soft_student + 1e-6)), axis=-1))\n\n    # Standard cross-entropy loss\n    standard_loss = K.categorical_crossentropy(y_true, y_pred)\n\n    # Weighted sum of the losses\n    return alpha * distillation_loss + (1. - alpha) * standard_loss\n'

In [None]:
from keras.layers import Concatenate, Conv2D, Activation, BatchNormalization, Dropout, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D, Dense, MaxPooling2D, Input
from keras.models import Model
from keras.optimizers import SGD
def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None):
    global concat_axis
    img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
    concat_axis = 3

    nb_filter = 64
    nb_layers = [6, 12, 24, 16]

    x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(img_input)
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
        x = transition_block(x, stage, nb_filter, dropout_rate=dropout_rate)
        nb_filter = int(nb_filter)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)

    x_fc = GlobalAveragePooling2D()(x)
    x_fc = Dense(1000)(x_fc)
    x_fc = Activation('softmax')(x_fc)

    model = Model(img_input, x_fc)

    x_newfc = GlobalAveragePooling2D()(x)
    x_newfc = Dense(num_classes)(x_newfc)
    x_newfc = Activation('softmax')(x_newfc)

    model = Model(img_input, x_newfc)

    sgd = SGD(learning_rate=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
    inter_channel = nb_filter * 4
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D((1, 1))(x)
    x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x

def transition_block(x, stage, nb_filter, dropout_rate=None):
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter), (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
    concat_feat = x

    for i in range(nb_layers):
        branch = i + 1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
        concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter


In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import SGD

# Assuming densenet121_model and student_model functions are already defined
# Replace with appropriate model definitions if necessary

# Build the teacher and student models
teacher = densenet121_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)
student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

# Compile the teacher model (pre-training the teacher model if necessary)
teacher.compile(optimizer=SGD(learning_rate=1e-2, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# Train teacher model (pretrain if needed)
# Note: If teacher model is already pre-trained, you can load weights here instead of training
teacher.fit(x_train, y_train,
            batch_size=32,
            epochs=5,  # Adjust epochs as needed for pre-training
            validation_data=(x_test, y_test))

# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(x_train)

# Define distillation loss function
def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.1):
    """
    Compute the distillation loss combining both:
    - Soft target loss (KL divergence)
    - Hard target loss (standard cross-entropy)
    """
    # Get the current batch size
    batch_size = tf.shape(y_pred)[0]

    # Use tf.gather to select the relevant teacher predictions for the current batch
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

    # Softmax for soft labels and student predictions with temperature scaling
    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

    # Cross-entropy between the student predictions and the teacher predictions (soft targets)
    soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)

    # Standard cross-entropy loss with hard targets
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

    # Weighted sum of the distillation loss and the hard target loss
    return alpha * soft_loss + (1. - alpha) * hard_loss

# Compile the student model with the distillation loss (using precomputed teacher predictions)
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model
student.fit(x_train, y_train,
            batch_size=32,
            epochs=15,
            validation_data=(x_test, y_test))





Epoch 1/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m530s[0m 9s/step - accuracy: 0.2119 - loss: 2.7510 - val_accuracy: 0.0667 - val_loss: 3.8740
Epoch 2/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m513s[0m 9s/step - accuracy: 0.4523 - loss: 1.9220 - val_accuracy: 0.1357 - val_loss: 3.3072
Epoch 3/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m483s[0m 9s/step - accuracy: 0.4932 - loss: 1.6031 - val_accuracy: 0.1190 - val_loss: 3.6098
Epoch 4/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m504s[0m 9s/step - accuracy: 0.5838 - loss: 1.3487 - val_accuracy: 0.2333 - val_loss: 2.8818
Epoch 5/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m477s[0m 9s/step - accuracy: 0.6919 - loss: 0.9659 - val_accuracy: 0.3548 - val_loss: 2.3230
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step
Epoch 1/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 7s/step - accuracy: 0.0298 - loss: 3.0446 - val_ac

<keras.src.callbacks.history.History at 0x77fffdc79180>

In [None]:
 # Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')


Test accuracy: 6.19%


In [None]:
teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

Teacher test accuracy: 35.48%


In [None]:
# Cell: Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


=== Teacher Model Evaluation ===
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 2s/step
                   precision    recall  f1-score   support

     agricultural       0.00      0.00      0.00        18
         airplane       1.00      0.25      0.40        20
  baseballdiamond       0.71      0.19      0.30        26
            beach       1.00      0.36      0.53        22
        buildings       1.00      0.19      0.32        21
        chaparral       0.54      0.93      0.68        27
 denseresidential       0.29      0.90      0.43        20
           forest       0.31      1.00      0.48        17
          freeway       0.36      0.43      0.39        23
       golfcourse       0.31      0.22      0.26        18
           harbor       0.86      0.71      0.77        17
     intersection       0.00      0.00      0.00        14
mediumresidential       0.20      0.70      0.31        23
   mobilehomepark       1.00      0.13      0.24        15
        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step
                   precision    recall  f1-score   support

     agricultural       0.00      0.00      0.00        18
         airplane       0.00      0.00      0.00        20
  baseballdiamond       0.00      0.00      0.00        26
            beach       0.00      0.00      0.00        22
        buildings       0.00      0.00      0.00        21
        chaparral       0.00      0.00      0.00        27
 denseresidential       0.00      0.00      0.00        20
           forest       0.00      0.00      0.00        17
          freeway       0.00      0.00      0.00        23
       golfcourse       0.00      0.00      0.00        18
           harbor       0.00      0.00      0.00        17
     intersection       0.00      0.00      0.00        14
mediumresidential       0.00      0.00      0.00        23
   mobilehomepark       0.00      0.00      0.00        15
         overpass       0.00      0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Install the required libraries
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet

# Import Kaggle and OpenDatasets to download datasets from Kaggle
import opendatasets as od

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)

import os
import random
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.optimizers import SGD
from keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D, MaxPooling2D, Conv2D
from keras.models import Model
import keras.backend as K

# Set the path to the image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):
            # Read the image
            image = cv2.imread(os.path.join(root, file))
            image = cv2.resize(image, (128, 128))
            image = img_to_array(image)
            image_data.append(image)

            # Use the folder name as the label
            label = root.split(os.path.sep)[-1]
            labels.append(label)

# Convert to numpy arrays and normalize the images
image_data = np.array(image_data, dtype="float") / 255.0
labels = np.array(labels)

# Binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')

def student_model(img_rows, img_cols, color_type=1, num_classes=None):
    nb_dense_block = 3  # Fewer dense blocks than the teacher model
    growth_rate = 32  # Smaller growth rate
    nb_filter = 32  # Fewer filters
    return densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=color_type,
                             nb_dense_block=nb_dense_block, growth_rate=growth_rate,
                             nb_filter=nb_filter, num_classes=num_classes)

from keras.layers import Concatenate, Conv2D, Activation, BatchNormalization, Dropout, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D, Dense, MaxPooling2D, Input
from keras.models import Model
from keras.optimizers import SGD
def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None):
    global concat_axis
    img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
    concat_axis = 3

    nb_filter = 64
    nb_layers = [6, 12, 24, 16]

    x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(img_input)
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
        x = transition_block(x, stage, nb_filter, dropout_rate=dropout_rate)
        nb_filter = int(nb_filter)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)

    x_fc = GlobalAveragePooling2D()(x)
    x_fc = Dense(1000)(x_fc)
    x_fc = Activation('softmax')(x_fc)

    model = Model(img_input, x_fc)

    x_newfc = GlobalAveragePooling2D()(x)
    x_newfc = Dense(num_classes)(x_newfc)
    x_newfc = Activation('softmax')(x_newfc)

    model = Model(img_input, x_newfc)

    sgd = SGD(learning_rate=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
    inter_channel = nb_filter * 4
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D((1, 1))(x)
    x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x

def transition_block(x, stage, nb_filter, dropout_rate=None):
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter), (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
    concat_feat = x

    for i in range(nb_layers):
        branch = i + 1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
        concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter

import tensorflow as tf
from tensorflow.keras.optimizers import SGD

# Build the teacher and student models
teacher = densenet121_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)
student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

# Define a learning rate scheduler function
def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

# Compile the teacher model (pre-training the teacher model if necessary)
teacher.compile(optimizer=SGD(learning_rate=1e-2, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# Train teacher model with the learning rate scheduler
teacher.fit(x_train, y_train,
            batch_size=32,
            epochs=5,
            validation_data=(x_test, y_test),
            callbacks=[LearningRateScheduler(lr_scheduler)])

# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(x_train)

# Define distillation loss function
def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.1):
    batch_size = tf.shape(y_pred)[0]
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

    soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

    return alpha * soft_loss + (1. - alpha) * hard_loss

# Compile the student model with the distillation loss
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model with learning rate scheduler
student.fit(x_train, y_train,
            batch_size=32,
            epochs=5,
            validation_data=(x_test, y_test),
            callbacks=[LearningRateScheduler(lr_scheduler)])

# Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')

teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

# Cell: Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: akshayanand2002
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset
Downloading uc-merced-land-use-dataset.zip to ./uc-merced-land-use-dataset


100%|██████████| 317M/317M [00:06<00:00, 52.7MB/s]



Training samples: 1680, Testing samples: 420




Epoch 1/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 7s/step - accuracy: 0.2288 - loss: 2.6923 - val_accuracy: 0.0619 - val_loss: 3.3667 - learning_rate: 0.0100
Epoch 2/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m457s[0m 8s/step - accuracy: 0.3921 - loss: 1.9700 - val_accuracy: 0.1071 - val_loss: 4.2233 - learning_rate: 0.0100
Epoch 3/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m443s[0m 8s/step - accuracy: 0.5068 - loss: 1.6341 - val_accuracy: 0.1571 - val_loss: 3.3799 - learning_rate: 0.0100
Epoch 4/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 8s/step - accuracy: 0.5985 - loss: 1.2398 - val_accuracy: 0.2095 - val_loss: 2.9695 - learning_rate: 0.0100
Epoch 5/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m418s[0m 7s/step - accuracy: 0.6999 - loss: 0.9592 - val_accuracy: 0.2119 - val_loss: 3.1578 - learning_rate: 0.0100
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 2s/step
Epoch 1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 1s/step
                   precision    recall  f1-score   support

     agricultural       0.00      0.00      0.00        21
         airplane       0.00      0.00      0.00        18
  baseballdiamond       0.00      0.00      0.00        14
            beach       0.00      0.00      0.00        17
        buildings       0.00      0.00      0.00        17
        chaparral       0.00      0.00      0.00        20
 denseresidential       0.00      0.00      0.00        20
           forest       0.00      0.00      0.00        20
          freeway       0.00      0.00      0.00        18
       golfcourse       0.00      0.00      0.00        18
           harbor       0.00      0.00      0.00        22
     intersection       0.05      1.00      0.10        23
mediumresidential       0.00      0.00      0.00        27
   mobilehomepark       0.00      0.00      0.00        17
         overpass       0.00      0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Compile the student model with the distillation loss
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model with learning rate scheduler
student.fit(x_train, y_train,
            batch_size=32,
            epochs=15,
            validation_data=(x_test, y_test),
            callbacks=[LearningRateScheduler(lr_scheduler)])

# Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')

teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

# Cell: Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


Epoch 1/15
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m376s[0m 6s/step - accuracy: 0.0677 - loss: 3.0446 - val_accuracy: 0.0905 - val_loss: 3.0445 - learning_rate: 0.0010
Epoch 2/15
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m313s[0m 6s/step - accuracy: 0.0840 - loss: 3.0446 - val_accuracy: 0.0810 - val_loss: 3.0445 - learning_rate: 0.0010
Epoch 3/15
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 6s/step - accuracy: 0.0800 - loss: 3.0446 - val_accuracy: 0.0405 - val_loss: 3.0445 - learning_rate: 0.0010
Epoch 4/15
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 6s/step - accuracy: 0.0698 - loss: 3.0446 - val_accuracy: 0.0333 - val_loss: 3.0445 - learning_rate: 0.0010
Epoch 5/15
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 6s/step - accuracy: 0.0733 - loss: 3.0446 - val_accuracy: 0.0381 - val_loss: 3.0445 - learning_rate: 0.0010
Epoch 6/15
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0

ValueError: The output of the `schedule` function should be a float. Got: 0.0009048374486155808

In [None]:
# Define a learning rate scheduler function
def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

# Compile the student model with the distillation loss
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model with learning rate scheduler
student.fit(x_train, y_train,
            batch_size=32,
            epochs=15,
            validation_data=(x_test, y_test),
            callbacks=[LearningRateScheduler(lr_scheduler)])

# Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')

teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

# Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


In [None]:
# Install the required libraries
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet

# Import Kaggle and OpenDatasets to download datasets from Kaggle
import opendatasets as od

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)

import os
import random
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.optimizers import SGD
from keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D, MaxPooling2D, Conv2D
from keras.models import Model
import keras.backend as K

# Set the path to the image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):
            # Read the image
            image = cv2.imread(os.path.join(root, file))
            image = cv2.resize(image, (128, 128))
            image = img_to_array(image)
            image_data.append(image)

            # Use the folder name as the label
            label = root.split(os.path.sep)[-1]
            labels.append(label)

# Convert to numpy arrays and normalize the images
image_data = np.array(image_data, dtype="float") / 255.0
labels = np.array(labels)

# Binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')

def student_model(img_rows, img_cols, color_type=1, num_classes=None):
    nb_dense_block = 3  # Fewer dense blocks than the teacher model
    growth_rate = 32  # Smaller growth rate
    nb_filter = 32  # Fewer filters
    return densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=color_type,
                             nb_dense_block=nb_dense_block, growth_rate=growth_rate,
                             nb_filter=nb_filter, num_classes=num_classes)

from keras.layers import Concatenate, Conv2D, Activation, BatchNormalization, Dropout, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D, Dense, MaxPooling2D, Input
from keras.models import Model
from keras.optimizers import SGD
def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None):
    global concat_axis
    img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
    concat_axis = 3

    nb_filter = 64
    nb_layers = [6, 12, 24, 16]

    x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(img_input)
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
        x = transition_block(x, stage, nb_filter, dropout_rate=dropout_rate)
        nb_filter = int(nb_filter)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)

    x_fc = GlobalAveragePooling2D()(x)
    x_fc = Dense(1000)(x_fc)
    x_fc = Activation('softmax')(x_fc)

    model = Model(img_input, x_fc)

    x_newfc = GlobalAveragePooling2D()(x)
    x_newfc = Dense(num_classes)(x_newfc)
    x_newfc = Activation('softmax')(x_newfc)

    model = Model(img_input, x_newfc)

    sgd = SGD(learning_rate=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
    inter_channel = nb_filter * 4
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D((1, 1))(x)
    x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x

def transition_block(x, stage, nb_filter, dropout_rate=None):
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter), (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
    concat_feat = x

    for i in range(nb_layers):
        branch = i + 1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
        concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter

import tensorflow as tf
from tensorflow.keras.optimizers import SGD

# Build the teacher and student models
teacher = densenet121_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)
student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

# Define a learning rate scheduler function
def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

# Compile the teacher model (pre-training the teacher model if necessary)
teacher.compile(optimizer=SGD(learning_rate=1e-2, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# Train teacher model with the learning rate scheduler
teacher.fit(x_train, y_train,
            batch_size=32,
            epochs=8,
            validation_data=(x_test, y_test),
            callbacks=[LearningRateScheduler(lr_scheduler)])

# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(x_train)

# Define distillation loss function
def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.1):
    batch_size = tf.shape(y_pred)[0]
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

    soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

    return alpha * soft_loss + (1. - alpha) * hard_loss

# Define a learning rate scheduler function
def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

# Compile the student model with the distillation loss
student.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
                metrics=['accuracy'])

# Train the student model with learning rate scheduler
student.fit(x_train, y_train,
            batch_size=32,
            epochs=20,
            validation_data=(x_test, y_test),
            callbacks=[LearningRateScheduler(lr_scheduler)])

# Evaluate the trained student model
score = student.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {score[1] * 100:.2f}%')

teacher_score = teacher.evaluate(x_test, y_test, verbose=0)
print(f'Teacher test accuracy: {teacher_score[1] * 100:.2f}%')

# Evaluate and print additional metrics for teacher and student models
from sklearn.metrics import classification_report
import time

def evaluate_model(model, x_test, y_test):
    # Predict the labels
    start_time = time.time()
    y_pred_probs = model.predict(x_test)
    inference_time = time.time() - start_time

    # Get the predicted classes
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Calculate precision, recall, F1 score
    report = classification_report(y_true, y_pred, target_names=lb.classes_)

    # Print the metrics
    print(report)
    print(f"Inference Time: {inference_time:.4f} seconds")
    return y_pred_probs

print("=== Teacher Model Evaluation ===")
teacher_pred_probs = evaluate_model(teacher, x_test, y_test)

print("\n=== Student Model Evaluation ===")
student_pred_probs = evaluate_model(student, x_test, y_test)


Skipping, found downloaded files in "./uc-merced-land-use-dataset" (use force=True to force download)
Training samples: 1680, Testing samples: 420




Epoch 1/8
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m521s[0m 9s/step - accuracy: 0.2279 - loss: 2.7244 - val_accuracy: 0.0452 - val_loss: 3.1722 - learning_rate: 0.0100
Epoch 2/8
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m490s[0m 9s/step - accuracy: 0.4114 - loss: 1.9415 - val_accuracy: 0.0786 - val_loss: 3.1818 - learning_rate: 0.0100
Epoch 3/8
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m500s[0m 9s/step - accuracy: 0.5598 - loss: 1.4859 - val_accuracy: 0.2262 - val_loss: 2.8257 - learning_rate: 0.0100
Epoch 4/8
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m480s[0m 8s/step - accuracy: 0.6022 - loss: 1.2802 - val_accuracy: 0.1214 - val_loss: 4.7133 - learning_rate: 0.0100
Epoch 5/8
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m464s[0m 9s/step - accuracy: 0.6731 - loss: 1.0531 - val_accuracy: 0.2976 - val_loss: 2.5896 - learning_rate: 0.0100
Epoch 6/8
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 9s/s

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

     agricultural       0.00      0.00      0.00        17
         airplane       0.75      0.15      0.25        20
  baseballdiamond       0.45      0.23      0.30        22
            beach       0.90      0.35      0.50        26
        buildings       0.41      0.50      0.45        18
        chaparral       1.00      0.74      0.85        23
 denseresidential       0.58      0.83      0.68        18
           forest       0.83      0.71      0.77        21
          freeway       0.35      0.35      0.35        20
       golfcourse       0.00      0.00      0.00        26
           harbor       1.00      0.71      0.83        17
     intersection       0.35      0.38      0.36        24
mediumresidential       0.33      0.13      0.19        15
   mobilehomepark       0.38      0.94      0.55        16
         overpass       1.00      0.30      0.46        20
       parkinglot       1.00      0.43      0.60       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
